1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * ibcm_impl.c
28  *
29  * contains internal functions of IB CM module.
30  *
31  * TBD:
32  * 1. HCA CATASTROPHIC/RECOVERED not handled yet
33  */
34 
35 #include <sys/ib/mgt/ibcm/ibcm_impl.h>
36 #include <sys/disp.h>
37 
38 
39 /* function prototypes */
40 static ibcm_status_t	ibcm_init(void);
41 static ibcm_status_t	ibcm_fini(void);
42 
43 /* Routines to initialize and destroy CM global locks and CVs */
44 static void		ibcm_init_locks(void);
45 static void		ibcm_fini_locks(void);
46 
47 /* Routines that initialize/teardown CM's global hca structures */
48 static void		ibcm_init_hcas();
49 static ibcm_status_t	ibcm_fini_hcas();
50 
51 static void		ibcm_init_classportinfo();
52 static void		ibcm_stop_timeout_thread();
53 
54 /* Routines that handle HCA attach/detach asyncs */
55 static void		ibcm_hca_attach(ib_guid_t);
56 static ibcm_status_t	ibcm_hca_detach(ibcm_hca_info_t *);
57 
58 /* Routines that initialize the HCA's port related fields */
59 static ibt_status_t	ibcm_hca_init_port(ibcm_hca_info_t *hcap,
60 			    uint8_t port_index);
61 static ibcm_status_t	ibcm_hca_fini_port(ibcm_hca_info_t *hcap,
62 			    uint8_t port_index);
63 
64 static void ibcm_rc_flow_control_init(void);
65 static void ibcm_rc_flow_control_fini(void);
66 
67 /*
68  * Routines that check if hca's avl trees and sidr lists are free of any
69  * active client resources ie., RC or UD state structures in certain states
70  */
71 static ibcm_status_t	ibcm_check_avl_clean(ibcm_hca_info_t *hcap);
72 static ibcm_status_t	ibcm_check_sidr_clean(ibcm_hca_info_t *hcap);
73 
74 /* Add a new hca structure to CM's global hca list */
75 static ibcm_hca_info_t	*ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports);
76 
77 static void		ibcm_comm_est_handler(ibt_async_event_t *);
78 void			ibcm_async_handler(void *, ibt_hca_hdl_t,
79 			    ibt_async_code_t, ibt_async_event_t *);
80 
81 /* Global variables */
82 char			cmlog[] = "ibcm";	/* for debug log messages */
83 ibt_clnt_hdl_t		ibcm_ibt_handle;	/* IBT handle */
84 kmutex_t		ibcm_svc_info_lock;	/* list lock */
85 kcondvar_t		ibcm_svc_info_cv;	/* cv for deregister */
86 kmutex_t		ibcm_recv_mutex;
87 avl_tree_t		ibcm_svc_avl_tree;
88 taskq_t			*ibcm_taskq = NULL;
89 int			taskq_dispatch_fail_cnt;
90 
91 kmutex_t		ibcm_mcglist_lock;	/* MCG list lock */
92 kmutex_t		ibcm_trace_mutex;	/* Trace mutex */
93 kmutex_t		ibcm_trace_print_mutex;	/* Trace print mutex */
94 int			ibcm_conn_max_trcnt = IBCM_MAX_CONN_TRCNT;
95 
96 int			ibcm_enable_trace = 2;	/* Trace level 4 by default */
97 int			ibcm_dtrace = 0; /* conditionally enable more dtrace */
98 
99 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_info_s::{svc_bind_list
100     svc_ref_cnt svc_to_delete}))
101 
102 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_bind_s::{sbind_link}))
103 
104 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_mutex, ibcm_conn_trace_s))
105 
106 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_conn_trace_s))
107 
108 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_print_mutex, ibcm_debug_buf))
109 
110 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_debug_buf))
111 
112 /*
113  * Initial state is INIT. All hca dr's return success immediately in this
114  * state, without adding or deleting any hca's to CM.
115  */
116 ibcm_finit_state_t	ibcm_finit_state = IBCM_FINIT_INIT;
117 
118 /* mutex and cv to manage hca's reference and resource count(s) */
119 kmutex_t		ibcm_global_hca_lock;
120 kcondvar_t		ibcm_global_hca_cv;
121 
122 /* mutex and cv to sa session open */
123 kmutex_t		ibcm_sa_open_lock;
124 kcondvar_t		ibcm_sa_open_cv;
125 int			ibcm_sa_timeout_delay = 1;		/* in ticks */
126 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sa_open_lock,
127     ibcm_port_info_s::{port_ibmf_saa_hdl port_saa_open_in_progress}))
128 
129 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_port_info_s::{port_ibmf_saa_hdl}))
130 
131 /* serialize sm notice callbacks */
132 kmutex_t		ibcm_sm_notice_serialize_lock;
133 
134 _NOTE(LOCK_ORDER(ibcm_sm_notice_serialize_lock ibcm_global_hca_lock))
135 
136 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock, ibcm_hca_info_s::{hca_state
137     hca_svc_cnt hca_acc_cnt hca_res_cnt hca_next}))
138 
139 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock,
140     ibcm_port_info_s::{port_ibmf_hdl}))
141 
142 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sm_notice_serialize_lock,
143     ibcm_port_info_s::{port_event_status}))
144 
145 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_hca_info_s::{hca_state}))
146 _NOTE(DATA_READABLE_WITHOUT_LOCK(
147     ibcm_hca_info_s::{hca_port_info.port_ibmf_hdl}))
148 
149 /* mutex for CM's qp list management */
150 kmutex_t		ibcm_qp_list_lock;
151 
152 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_port_info_s::{port_qplist}))
153 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
154 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
155 
156 kcondvar_t		ibcm_timeout_list_cv;
157 kcondvar_t		ibcm_timeout_thread_done_cv;
158 kt_did_t		ibcm_timeout_thread_did;
159 ibcm_state_data_t	*ibcm_timeout_list_hdr, *ibcm_timeout_list_tail;
160 ibcm_ud_state_data_t	*ibcm_ud_timeout_list_hdr, *ibcm_ud_timeout_list_tail;
161 kmutex_t		ibcm_timeout_list_lock;
162 uint8_t			ibcm_timeout_list_flags = 0;
163 pri_t			ibcm_timeout_thread_pri = MINCLSYSPRI;
164 
165 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
166     ibcm_state_data_s::timeout_next))
167 
168 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
169     ibcm_ud_state_data_s::ud_timeout_next))
170 
171 /*
172  * Flow control logic for open_rc_channel uses the following.
173  */
174 
175 struct ibcm_open_s {
176 	kmutex_t		mutex;
177 	kcondvar_t		cv;
178 	uint8_t			task_running;
179 	uint_t			queued;
180 	uint_t			exit_deferred;
181 	uint_t			in_progress;
182 	uint_t			in_progress_max;
183 	uint_t			sends;
184 	uint_t			sends_max;
185 	uint_t			sends_lowat;
186 	uint_t			sends_hiwat;
187 	ibcm_state_data_t	*tail;
188 	ibcm_state_data_t	head;
189 } ibcm_open;
190 
191 /*
192  * Flow control logic for SA access and close_rc_channel calls follows.
193  */
194 
195 int ibcm_close_simul_max	= 12;
196 int ibcm_lapr_simul_max		= 12;
197 int ibcm_saa_simul_max		= 8;
198 
199 typedef struct ibcm_flow1_s {
200 	struct ibcm_flow1_s	*link;
201 	kcondvar_t		cv;
202 	uint8_t			waiters;	/* 1 to IBCM_FLOW_SIMUL_MAX */
203 } ibcm_flow1_t;
204 
205 typedef struct ibcm_flow_s {
206 	ibcm_flow1_t		*list;
207 	uint_t			simul;	/* #requests currently outstanding */
208 	uint_t			simul_max;
209 	uint_t			waiters_per_chunk;
210 	uint_t			lowat;
211 	uint_t			lowat_default;
212 	/* statistics */
213 	uint_t			total;
214 } ibcm_flow_t;
215 
216 ibcm_flow_t ibcm_saa_flow;
217 ibcm_flow_t ibcm_close_flow;
218 ibcm_flow_t ibcm_lapr_flow;
219 
220 /* NONBLOCKING close requests are queued */
221 struct ibcm_close_s {
222 	kmutex_t		mutex;
223 	ibcm_state_data_t	*tail;
224 	ibcm_state_data_t	head;
225 } ibcm_close;
226 
227 static ibt_clnt_modinfo_t ibcm_ibt_modinfo = {	/* Client's modinfop */
228 	IBTI_V_CURR,
229 	IBT_CM,
230 	ibcm_async_handler,
231 	NULL,
232 	"IBCM"
233 };
234 
235 /* IBCM's list of HCAs registered with it */
236 static ibcm_hca_info_t	*ibcm_hca_listp = NULL;	/* CM's HCA list */
237 
238 /* Array of CM state call table functions */
239 ibcm_state_handler_t	ibcm_sm_funcs_tbl[] = {
240 	ibcm_process_req_msg,
241 	ibcm_process_mra_msg,
242 	ibcm_process_rej_msg,
243 	ibcm_process_rep_msg,
244 	ibcm_process_rtu_msg,
245 	ibcm_process_dreq_msg,
246 	ibcm_process_drep_msg,
247 	ibcm_process_sidr_req_msg,
248 	ibcm_process_sidr_rep_msg,
249 	ibcm_process_lap_msg,
250 	ibcm_process_apr_msg
251 };
252 
253 /* the following globals are CM tunables */
254 ibt_rnr_nak_time_t	ibcm_default_rnr_nak_time = IBT_RNR_NAK_655ms;
255 
256 uint8_t		ibcm_max_retries = IBCM_MAX_RETRIES;
257 clock_t		ibcm_local_processing_time = IBCM_LOCAL_RESPONSE_TIME;
258 clock_t		ibcm_remote_response_time = IBCM_REMOTE_RESPONSE_TIME;
259 ib_time_t	ibcm_max_sidr_rep_proctime = IBCM_MAX_SIDR_PROCESS_TIME;
260 ib_time_t	ibcm_max_sidr_pktlife_time = IBCM_MAX_SIDR_PKT_LIFE_TIME;
261 
262 ib_time_t	ibcm_max_sidr_rep_store_time = 18;
263 uint32_t	ibcm_wait_for_acc_cnt_timeout = 2000000;	/* 2 sec */
264 
265 ib_time_t	ibcm_max_ib_pkt_lt = IBCM_MAX_IB_PKT_LT;
266 ib_time_t	ibcm_max_ib_mad_pkt_lt = IBCM_MAX_IB_MAD_PKT_LT;
267 
268 /*
269  * This delay accounts for time involved in various activities as follows :
270  *
271  * IBMF delays for posting the MADs in non-blocking mode
272  * IBMF delays for receiving the MADs and delivering to CM
273  * CM delays in processing the MADs before invoking client handlers,
274  * Any other delays associated with HCA driver in processing the MADs and
275  * 	other subsystems that CM may invoke (ex : SA, HCA driver)
276  */
277 uint32_t	ibcm_sw_delay	= 1000;	/* 1000us / 1ms */
278 uint32_t	ibcm_max_sa_retries = IBCM_MAX_SA_RETRIES + 1;
279 
280 /*	approx boot time */
281 uint32_t	ibcm_adj_btime = 4;	/* 4 seconds */
282 
283 /*
284  * The information in ibcm_clpinfo is kept in wireformat and is setup at
285  * init time, and used read-only after that
286  */
287 ibcm_classportinfo_msg_t	ibcm_clpinfo;
288 
289 char	*event_str[] = {
290 	"NEVER SEE THIS             ",
291 	"SESSION_ID                 ",
292 	"CHAN_HDL                   ",
293 	"LOCAL_COMID/HCA/PORT       ",
294 	"LOCAL_QPN                  ",
295 	"REMOTE_COMID/HCA           ",
296 	"REMOTE_QPN                 ",
297 	"BASE_TIME                  ",
298 	"INCOMING_REQ               ",
299 	"INCOMING_REP               ",
300 	"INCOMING_RTU               ",
301 	"INCOMING_COMEST            ",
302 	"INCOMING_MRA               ",
303 	"INCOMING_REJ               ",
304 	"INCOMING_LAP               ",
305 	"INCOMING_APR               ",
306 	"INCOMING_DREQ              ",
307 	"INCOMING_DREP              ",
308 	"OUTGOING_REQ               ",
309 	"OUTGOING_REP               ",
310 	"OUTGOING_RTU               ",
311 	"OUTGOING_LAP               ",
312 	"OUTGOING_APR               ",
313 	"OUTGOING_MRA               ",
314 	"OUTGOING_REJ               ",
315 	"OUTGOING_DREQ              ",
316 	"OUTGOING_DREP              ",
317 	"REQ_POST_COMPLETE          ",
318 	"REP_POST_COMPLETE          ",
319 	"RTU_POST_COMPLETE          ",
320 	"MRA_POST_COMPLETE          ",
321 	"REJ_POST_COMPLETE          ",
322 	"LAP_POST_COMPLETE          ",
323 	"APR_POST_COMPLETE          ",
324 	"DREQ_POST_COMPLETE         ",
325 	"DREP_POST_COMPLETE         ",
326 	"TIMEOUT_REP                ",
327 	"CALLED_REQ_RCVD_EVENT      ",
328 	"RET_REQ_RCVD_EVENT         ",
329 	"CALLED_REP_RCVD_EVENT      ",
330 	"RET_REP_RCVD_EVENT         ",
331 	"CALLED_CONN_EST_EVENT      ",
332 	"RET_CONN_EST_EVENT         ",
333 	"CALLED_CONN_FAIL_EVENT     ",
334 	"RET_CONN_FAIL_EVENT        ",
335 	"CALLED_CONN_CLOSE_EVENT    ",
336 	"RET_CONN_CLOSE_EVENT       ",
337 	"INIT_INIT                  ",
338 	"INIT_INIT_FAIL             ",
339 	"INIT_RTR                   ",
340 	"INIT_RTR_FAIL              ",
341 	"RTR_RTS                    ",
342 	"RTR_RTS_FAIL               ",
343 	"RTS_RTS                    ",
344 	"RTS_RTS_FAIL               ",
345 	"TO_ERROR                   ",
346 	"ERROR_FAIL                 ",
347 	"SET_ALT                    ",
348 	"SET_ALT_FAIL               ",
349 	"STALE_DETECT               ",
350 	"OUTGOING_REQ_RETRY         ",
351 	"OUTGOING_REP_RETRY         ",
352 	"OUTGOING_LAP_RETRY         ",
353 	"OUTGOING_MRA_RETRY         ",
354 	"OUTGOING_DREQ_RETRY        ",
355 	"NEVER SEE THIS             "
356 };
357 
358 char	ibcm_debug_buf[IBCM_DEBUG_BUF_SIZE];
359 
360 _NOTE(SCHEME_PROTECTS_DATA("used in a localized function consistently",
361     ibcm_debug_buf))
362 _NOTE(READ_ONLY_DATA(ibcm_taskq))
363 
364 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_flags))
365 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_hdr))
366 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_ud_timeout_list_hdr))
367 
368 #ifdef DEBUG
369 int		ibcm_test_mode = 0;	/* set to 1, if running tests */
370 #endif
371 
372 
373 /* Module Driver Info */
374 static struct modlmisc ibcm_modlmisc = {
375 	&mod_miscops,
376 	"IB Communication Manager"
377 };
378 
379 /* Module Linkage */
380 static struct modlinkage ibcm_modlinkage = {
381 	MODREV_1,
382 	&ibcm_modlmisc,
383 	NULL
384 };
385 
386 
387 int
_init(void)388 _init(void)
389 {
390 	int		rval;
391 	ibcm_status_t	status;
392 
393 	status = ibcm_init();
394 	if (status != IBCM_SUCCESS) {
395 		IBTF_DPRINTF_L2(cmlog, "_init: ibcm failed %d", status);
396 		return (EINVAL);
397 	}
398 
399 	rval = mod_install(&ibcm_modlinkage);
400 	if (rval != 0) {
401 		IBTF_DPRINTF_L2(cmlog, "_init: ibcm mod_install failed %d",
402 		    rval);
403 		(void) ibcm_fini();
404 	}
405 
406 	IBTF_DPRINTF_L5(cmlog, "_init: ibcm successful");
407 	return (rval);
408 
409 }
410 
411 
412 int
_info(struct modinfo * modinfop)413 _info(struct modinfo *modinfop)
414 {
415 	return (mod_info(&ibcm_modlinkage, modinfop));
416 }
417 
418 
419 int
_fini(void)420 _fini(void)
421 {
422 	int status;
423 
424 	if (ibcm_fini() != IBCM_SUCCESS)
425 		return (EBUSY);
426 
427 	if ((status = mod_remove(&ibcm_modlinkage)) != 0) {
428 		IBTF_DPRINTF_L2(cmlog, "_fini: ibcm mod_remove failed %d",
429 		    status);
430 		return (status);
431 	}
432 
433 	IBTF_DPRINTF_L5(cmlog, "_fini: ibcm successful");
434 
435 	return (status);
436 }
437 
438 /* Initializes all global mutex and CV in cm module */
439 static void
ibcm_init_locks()440 ibcm_init_locks()
441 {
442 
443 	/* Verify CM MAD sizes */
444 #ifdef DEBUG
445 
446 	if (ibcm_test_mode > 1) {
447 
448 		IBTF_DPRINTF_L1(cmlog, "REQ MAD SIZE %d",
449 		    sizeof (ibcm_req_msg_t));
450 		IBTF_DPRINTF_L1(cmlog, "REP MAD SIZE %d",
451 		    sizeof (ibcm_rep_msg_t));
452 		IBTF_DPRINTF_L1(cmlog, "RTU MAD SIZE %d",
453 		    sizeof (ibcm_rtu_msg_t));
454 		IBTF_DPRINTF_L1(cmlog, "MRA MAD SIZE %d",
455 		    sizeof (ibcm_mra_msg_t));
456 		IBTF_DPRINTF_L1(cmlog, "REJ MAD SIZE %d",
457 		    sizeof (ibcm_rej_msg_t));
458 		IBTF_DPRINTF_L1(cmlog, "LAP MAD SIZE %d",
459 		    sizeof (ibcm_lap_msg_t));
460 		IBTF_DPRINTF_L1(cmlog, "APR MAD SIZE %d",
461 		    sizeof (ibcm_apr_msg_t));
462 		IBTF_DPRINTF_L1(cmlog, "DREQ MAD SIZE %d",
463 		    sizeof (ibcm_dreq_msg_t));
464 		IBTF_DPRINTF_L1(cmlog, "DREP MAD SIZE %d",
465 		    sizeof (ibcm_drep_msg_t));
466 		IBTF_DPRINTF_L1(cmlog, "SIDR REQ MAD SIZE %d",
467 		    sizeof (ibcm_sidr_req_msg_t));
468 		IBTF_DPRINTF_L1(cmlog, "SIDR REP MAD SIZE %d",
469 		    sizeof (ibcm_sidr_rep_msg_t));
470 	}
471 
472 #endif
473 
474 	/* Create all global locks within cm module */
475 	mutex_init(&ibcm_svc_info_lock, NULL, MUTEX_DEFAULT, NULL);
476 	mutex_init(&ibcm_mcglist_lock, NULL, MUTEX_DEFAULT, NULL);
477 	mutex_init(&ibcm_timeout_list_lock, NULL, MUTEX_DEFAULT, NULL);
478 	mutex_init(&ibcm_global_hca_lock, NULL, MUTEX_DEFAULT, NULL);
479 	mutex_init(&ibcm_sa_open_lock, NULL, MUTEX_DEFAULT, NULL);
480 	mutex_init(&ibcm_recv_mutex, NULL, MUTEX_DEFAULT, NULL);
481 	mutex_init(&ibcm_sm_notice_serialize_lock, NULL, MUTEX_DEFAULT, NULL);
482 	mutex_init(&ibcm_qp_list_lock, NULL, MUTEX_DEFAULT, NULL);
483 	mutex_init(&ibcm_trace_mutex, NULL, MUTEX_DEFAULT, NULL);
484 	mutex_init(&ibcm_trace_print_mutex, NULL, MUTEX_DEFAULT, NULL);
485 	cv_init(&ibcm_svc_info_cv, NULL, CV_DRIVER, NULL);
486 	cv_init(&ibcm_timeout_list_cv, NULL, CV_DRIVER, NULL);
487 	cv_init(&ibcm_timeout_thread_done_cv, NULL, CV_DRIVER, NULL);
488 	cv_init(&ibcm_global_hca_cv, NULL, CV_DRIVER, NULL);
489 	cv_init(&ibcm_sa_open_cv, NULL, CV_DRIVER, NULL);
490 	avl_create(&ibcm_svc_avl_tree, ibcm_svc_compare,
491 	    sizeof (ibcm_svc_info_t),
492 	    offsetof(struct ibcm_svc_info_s, svc_link));
493 
494 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_locks: done");
495 }
496 
497 /* Destroys all global mutex and CV in cm module */
498 static void
ibcm_fini_locks()499 ibcm_fini_locks()
500 {
501 	/* Destroy all global locks within cm module */
502 	mutex_destroy(&ibcm_svc_info_lock);
503 	mutex_destroy(&ibcm_mcglist_lock);
504 	mutex_destroy(&ibcm_timeout_list_lock);
505 	mutex_destroy(&ibcm_global_hca_lock);
506 	mutex_destroy(&ibcm_sa_open_lock);
507 	mutex_destroy(&ibcm_recv_mutex);
508 	mutex_destroy(&ibcm_sm_notice_serialize_lock);
509 	mutex_destroy(&ibcm_qp_list_lock);
510 	mutex_destroy(&ibcm_trace_mutex);
511 	mutex_destroy(&ibcm_trace_print_mutex);
512 	cv_destroy(&ibcm_svc_info_cv);
513 	cv_destroy(&ibcm_timeout_list_cv);
514 	cv_destroy(&ibcm_timeout_thread_done_cv);
515 	cv_destroy(&ibcm_global_hca_cv);
516 	cv_destroy(&ibcm_sa_open_cv);
517 	avl_destroy(&ibcm_svc_avl_tree);
518 
519 	IBTF_DPRINTF_L5(cmlog, "ibcm_fini_locks: done");
520 }
521 
522 
523 /* Initialize CM's classport info */
524 static void
ibcm_init_classportinfo()525 ibcm_init_classportinfo()
526 {
527 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
528 
529 	ibcm_clpinfo.BaseVersion = IBCM_MAD_BASE_VERSION;
530 	ibcm_clpinfo.ClassVersion = IBCM_MAD_CLASS_VERSION;
531 
532 	/* For now, CM supports same capabilities at all ports */
533 	ibcm_clpinfo.CapabilityMask =
534 	    h2b16(IBCM_CPINFO_CAP_RC | IBCM_CPINFO_CAP_SIDR);
535 
536 	/* Bits 0-7 are all 0 for Communication Mgmt Class */
537 
538 	/* For now, CM has the same respvalue at all ports */
539 	ibcm_clpinfo.RespTimeValue_plus =
540 	    h2b32(ibt_usec2ib(ibcm_local_processing_time) & 0x1f);
541 
542 	/* For now, redirect fields are set to 0 */
543 	/* Trap fields are not applicable to CM, hence set to 0 */
544 
545 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
546 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_classportinfo: done");
547 }
548 
549 /*
550  * ibcm_init():
551  * 	- call ibt_attach()
552  * 	- create AVL trees
553  *	- Attach HCA handlers that are already present before
554  *	CM got loaded.
555  *
556  * Arguments:	NONE
557  *
558  * Return values:
559  *	IBCM_SUCCESS - success
560  */
561 static ibcm_status_t
ibcm_init(void)562 ibcm_init(void)
563 {
564 	ibt_status_t	status;
565 	kthread_t	*t;
566 
567 	IBTF_DPRINTF_L3(cmlog, "ibcm_init:");
568 
569 	ibcm_init_classportinfo();
570 
571 	if (ibcm_init_ids() != IBCM_SUCCESS) {
572 		IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
573 		    "fatal error: vmem_create() failed");
574 		return (IBCM_FAILURE);
575 	}
576 	ibcm_init_locks();
577 
578 	if (ibcm_ar_init() != IBCM_SUCCESS) {
579 		IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
580 		    "fatal error: ibcm_ar_init() failed");
581 		ibcm_fini_ids();
582 		ibcm_fini_locks();
583 		return (IBCM_FAILURE);
584 	}
585 	ibcm_rc_flow_control_init();
586 
587 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_taskq))
588 	ibcm_taskq = system_taskq;
589 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_taskq))
590 
591 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
592 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
593 
594 	/* Start the timeout list processing thread */
595 	ibcm_timeout_list_flags = 0;
596 	t = thread_create(NULL, 0, ibcm_process_tlist, 0, 0, &p0, TS_RUN,
597 	    ibcm_timeout_thread_pri);
598 	ibcm_timeout_thread_did = t->t_did;
599 
600 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
601 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
602 
603 	/*
604 	 * NOTE : if ibt_attach is done after ibcm_init_hcas, then some
605 	 * HCA DR events may be lost. CM could call re-init hca list
606 	 * again, but it is more complicated. Some HCA's DR's lost may
607 	 * be HCA detach, which makes hca list re-syncing and locking more
608 	 * complex
609 	 */
610 	status = ibt_attach(&ibcm_ibt_modinfo, NULL, NULL, &ibcm_ibt_handle);
611 	if (status != IBT_SUCCESS) {
612 		IBTF_DPRINTF_L2(cmlog, "ibcm_init(): ibt_attach failed %d",
613 		    status);
614 		(void) ibcm_ar_fini();
615 		ibcm_stop_timeout_thread();
616 		ibcm_fini_ids();
617 		ibcm_fini_locks();
618 		ibcm_rc_flow_control_fini();
619 		return (IBCM_FAILURE);
620 	}
621 
622 	/* Block all HCA attach/detach asyncs */
623 	mutex_enter(&ibcm_global_hca_lock);
624 
625 	ibcm_init_hcas();
626 	ibcm_finit_state = IBCM_FINIT_IDLE;
627 
628 	ibcm_path_cache_init();
629 	/*
630 	 * This callback will be used by IBTL to get the Node record for a
631 	 * given LID via the speccified HCA and port.
632 	 */
633 	ibtl_cm_set_node_info_cb(ibcm_ibtl_node_info);
634 
635 	/* Unblock any waiting HCA DR asyncs in CM */
636 	mutex_exit(&ibcm_global_hca_lock);
637 
638 	IBTF_DPRINTF_L4(cmlog, "ibcm_init: done");
639 	return (IBCM_SUCCESS);
640 }
641 
642 /* Allocates and initializes the "per hca" global data in CM */
643 static void
ibcm_init_hcas()644 ibcm_init_hcas()
645 {
646 	uint_t	num_hcas = 0;
647 	ib_guid_t *guid_array;
648 	int i;
649 
650 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas:");
651 
652 	/* Get the number of HCAs */
653 	num_hcas = ibt_get_hca_list(&guid_array);
654 	IBTF_DPRINTF_L4(cmlog, "ibcm_init_hcas: ibt_get_hca_list() "
655 	    "returned %d hcas", num_hcas);
656 
657 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
658 
659 	for (i = 0; i < num_hcas; i++)
660 		ibcm_hca_attach(guid_array[i]);
661 
662 	if (num_hcas)
663 		ibt_free_hca_list(guid_array, num_hcas);
664 
665 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas: done");
666 }
667 
668 
669 /*
670  * ibcm_fini():
671  * 	- Deregister w/ ibt
672  * 	- Cleanup IBCM HCA listp
673  * 	- Destroy mutexes
674  *
675  * Arguments:	NONE
676  *
677  * Return values:
678  *	IBCM_SUCCESS - success
679  */
680 static ibcm_status_t
ibcm_fini(void)681 ibcm_fini(void)
682 {
683 	ibt_status_t	status;
684 
685 	IBTF_DPRINTF_L3(cmlog, "ibcm_fini:");
686 
687 	/*
688 	 * CM assumes that the all general clients got rid of all the
689 	 * established connections and service registrations, completed all
690 	 * pending SIDR operations before a call to ibcm_fini()
691 	 */
692 
693 	if (ibcm_ar_fini() != IBCM_SUCCESS) {
694 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: ibcm_ar_fini failed");
695 		return (IBCM_FAILURE);
696 	}
697 
698 	/* cleanup the svcinfo list */
699 	mutex_enter(&ibcm_svc_info_lock);
700 	if (avl_first(&ibcm_svc_avl_tree) != NULL) {
701 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
702 		    "ibcm_svc_avl_tree is not empty");
703 		mutex_exit(&ibcm_svc_info_lock);
704 		return (IBCM_FAILURE);
705 	}
706 	mutex_exit(&ibcm_svc_info_lock);
707 
708 	/* disables any new hca attach/detaches */
709 	mutex_enter(&ibcm_global_hca_lock);
710 
711 	ibcm_finit_state = IBCM_FINIT_BUSY;
712 
713 	if (ibcm_fini_hcas() != IBCM_SUCCESS) {
714 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
715 		    "some hca's still have client resources");
716 
717 		/* First, re-initialize the hcas */
718 		ibcm_init_hcas();
719 		/* and then enable the HCA asyncs */
720 		ibcm_finit_state = IBCM_FINIT_IDLE;
721 		mutex_exit(&ibcm_global_hca_lock);
722 		if (ibcm_ar_init() != IBCM_SUCCESS) {
723 			IBTF_DPRINTF_L1(cmlog, "ibcm_fini:ibcm_ar_init failed");
724 		}
725 		return (IBCM_FAILURE);
726 	}
727 
728 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
729 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
730 
731 	ASSERT(ibcm_timeout_list_hdr == NULL);
732 	ASSERT(ibcm_ud_timeout_list_hdr == NULL);
733 
734 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
735 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
736 
737 	/* Release any pending asyncs on ibcm_global_hca_lock */
738 	ibcm_finit_state = IBCM_FINIT_SUCCESS;
739 	mutex_exit(&ibcm_global_hca_lock);
740 
741 	ibcm_stop_timeout_thread();
742 
743 	ibtl_cm_set_node_info_cb(NULL);
744 	/*
745 	 * Detach from IBTL. Waits until all pending asyncs are complete.
746 	 * Above cv_broadcast wakes up any waiting hca attach/detach asyncs
747 	 */
748 	status = ibt_detach(ibcm_ibt_handle);
749 
750 	/* if detach fails, CM didn't free up some resources, so assert */
751 	if (status != IBT_SUCCESS)
752 		IBTF_DPRINTF_L1(cmlog, "ibcm_fini: ibt_detach failed %d",
753 		    status);
754 
755 	ibcm_rc_flow_control_fini();
756 
757 	ibcm_path_cache_fini();
758 
759 	ibcm_fini_ids();
760 	ibcm_fini_locks();
761 	IBTF_DPRINTF_L3(cmlog, "ibcm_fini: done");
762 	return (IBCM_SUCCESS);
763 }
764 
765 /* This routine exit's the ibcm timeout thread  */
766 static void
ibcm_stop_timeout_thread()767 ibcm_stop_timeout_thread()
768 {
769 	mutex_enter(&ibcm_timeout_list_lock);
770 
771 	/* Stop the timeout list processing thread */
772 	ibcm_timeout_list_flags =
773 	    ibcm_timeout_list_flags | IBCM_TIMEOUT_THREAD_EXIT;
774 
775 	/* Wake up, if the timeout thread is on a cv_wait */
776 	cv_signal(&ibcm_timeout_list_cv);
777 
778 	mutex_exit(&ibcm_timeout_list_lock);
779 	thread_join(ibcm_timeout_thread_did);
780 
781 	IBTF_DPRINTF_L5(cmlog, "ibcm_stop_timeout_thread: done");
782 }
783 
784 
785 /* Attempts to release all the hca's associated with CM */
786 static ibcm_status_t
ibcm_fini_hcas()787 ibcm_fini_hcas()
788 {
789 	ibcm_hca_info_t *hcap, *next;
790 
791 	IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas:");
792 
793 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
794 
795 	hcap = ibcm_hca_listp;
796 	while (hcap != NULL) {
797 		next = hcap->hca_next;
798 		if (ibcm_hca_detach(hcap) != IBCM_SUCCESS) {
799 			ibcm_hca_listp = hcap;
800 			return (IBCM_FAILURE);
801 		}
802 		hcap = next;
803 	}
804 
805 	IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas: SUCCEEDED");
806 	return (IBCM_SUCCESS);
807 }
808 
809 
810 /*
811  * ibcm_hca_attach():
812  *	Called as an asynchronous event to notify CM of an attach of HCA.
813  *	Here ibcm_hca_info_t is initialized and all fields are
814  *	filled in along with SA Access handles and IBMA handles.
815  *	Also called from ibcm_init to initialize ibcm_hca_info_t's for each
816  *	hca's
817  *
818  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
819  *	hca_guid	- HCA's guid
820  *
821  * Return values: NONE
822  */
823 static void
ibcm_hca_attach(ib_guid_t hcaguid)824 ibcm_hca_attach(ib_guid_t hcaguid)
825 {
826 	int			i;
827 	ibt_status_t		status;
828 	uint8_t			nports = 0;
829 	ibcm_hca_info_t		*hcap;
830 	ibt_hca_attr_t		hca_attrs;
831 
832 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: guid = 0x%llX", hcaguid);
833 
834 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
835 
836 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*hcap))
837 
838 	status = ibt_query_hca_byguid(hcaguid, &hca_attrs);
839 	if (status != IBT_SUCCESS) {
840 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
841 		    "ibt_query_hca_byguid failed = %d", status);
842 		return;
843 	}
844 	nports = hca_attrs.hca_nports;
845 
846 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_attach: num ports = %x", nports);
847 
848 	if ((hcap = ibcm_add_hca_entry(hcaguid, nports)) == NULL)
849 		return;
850 
851 	hcap->hca_guid = hcaguid;	/* Set GUID */
852 	hcap->hca_num_ports = nports;	/* Set number of ports */
853 
854 	if (ibcm_init_hca_ids(hcap) != IBCM_SUCCESS) {
855 		ibcm_delete_hca_entry(hcap);
856 		return;
857 	}
858 
859 	/* Store the static hca attribute data */
860 	hcap->hca_caps = hca_attrs.hca_flags;
861 	hcap->hca_vendor_id = hca_attrs.hca_vendor_id;
862 	hcap->hca_device_id = hca_attrs.hca_device_id;
863 	hcap->hca_ack_delay = hca_attrs.hca_local_ack_delay;
864 	hcap->hca_max_rdma_in_qp = hca_attrs.hca_max_rdma_in_qp;
865 	hcap->hca_max_rdma_out_qp = hca_attrs.hca_max_rdma_out_qp;
866 
867 	/* loop thru nports and initialize IBMF handles */
868 	for (i = 0; i < hcap->hca_num_ports; i++) {
869 		status = ibt_get_port_state_byguid(hcaguid, i + 1, NULL, NULL);
870 		if (status != IBT_SUCCESS) {
871 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
872 			    "port_num %d state DOWN", i + 1);
873 		}
874 
875 		hcap->hca_port_info[i].port_hcap = hcap;
876 		hcap->hca_port_info[i].port_num = i+1;
877 
878 		if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
879 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
880 			    "ibcm_hca_init_port failed %d port_num %d",
881 			    status, i+1);
882 	}
883 
884 	/* create the "active" CM AVL tree */
885 	avl_create(&hcap->hca_active_tree, ibcm_active_node_compare,
886 	    sizeof (ibcm_state_data_t),
887 	    offsetof(struct ibcm_state_data_s, avl_active_link));
888 
889 	/* create the "passive" CM AVL tree */
890 	avl_create(&hcap->hca_passive_tree, ibcm_passive_node_compare,
891 	    sizeof (ibcm_state_data_t),
892 	    offsetof(struct ibcm_state_data_s, avl_passive_link));
893 
894 	/* create the "passive comid" CM AVL tree */
895 	avl_create(&hcap->hca_passive_comid_tree,
896 	    ibcm_passive_comid_node_compare,
897 	    sizeof (ibcm_state_data_t),
898 	    offsetof(struct ibcm_state_data_s, avl_passive_comid_link));
899 
900 	/*
901 	 * Mark the state of the HCA to "attach" only at the end
902 	 * Now CM starts accepting incoming MADs and client API calls
903 	 */
904 	hcap->hca_state = IBCM_HCA_ACTIVE;
905 
906 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*hcap))
907 
908 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: ATTACH Done");
909 }
910 
911 /*
912  * ibcm_hca_detach():
913  *	Called as an asynchronous event to notify CM of a detach of HCA.
914  *	Here ibcm_hca_info_t is freed up and all fields that
915  *	were initialized earlier are cleaned up
916  *
917  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
918  *	hca_guid    - HCA's guid
919  *
920  * Return values:
921  *	IBCM_SUCCESS	- able to detach HCA
922  *	IBCM_FAILURE	- failed to detach HCA
923  */
924 static ibcm_status_t
ibcm_hca_detach(ibcm_hca_info_t * hcap)925 ibcm_hca_detach(ibcm_hca_info_t *hcap)
926 {
927 	int		port_index, i;
928 	ibcm_status_t	status = IBCM_SUCCESS;
929 	clock_t		absolute_time;
930 
931 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: hcap = 0x%p guid = 0x%llX",
932 	    hcap, hcap->hca_guid);
933 
934 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
935 
936 	/*
937 	 * Declare hca is going away to all CM clients. Wait until the
938 	 * access count becomes zero.
939 	 */
940 	hcap->hca_state = IBCM_HCA_NOT_ACTIVE;
941 
942 	/* wait on response CV */
943 	absolute_time = ddi_get_lbolt() +
944 	    drv_usectohz(ibcm_wait_for_acc_cnt_timeout);
945 
946 	while (hcap->hca_acc_cnt > 0)
947 		if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock,
948 		    absolute_time) == -1)
949 			break;
950 
951 	if (hcap->hca_acc_cnt != 0) {
952 		/* We got a timeout */
953 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
954 		    " to timeout on hca_acc_cnt %u, \n Some CM Clients are "
955 		    "still active, looks like we need to wait some more time "
956 		    "(ibcm_wait_for_acc_cnt_timeout).", hcap->hca_acc_cnt);
957 		hcap->hca_state = IBCM_HCA_ACTIVE;
958 		return (IBCM_FAILURE);
959 	}
960 
961 	/*
962 	 * First make sure, there are no active users of ibma handles,
963 	 * and then de-register handles.
964 	 */
965 
966 	/* make sure that there are no "Service"s registered w/ this HCA. */
967 	if (hcap->hca_svc_cnt != 0) {
968 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
969 		    "Active services still there %d", hcap->hca_svc_cnt);
970 		hcap->hca_state = IBCM_HCA_ACTIVE;
971 		return (IBCM_FAILURE);
972 	}
973 
974 	if (ibcm_check_sidr_clean(hcap) != IBCM_SUCCESS) {
975 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach:"
976 		    "There are active SIDR operations");
977 		hcap->hca_state = IBCM_HCA_ACTIVE;
978 		return (IBCM_FAILURE);
979 	}
980 
981 	if (ibcm_check_avl_clean(hcap) != IBCM_SUCCESS) {
982 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
983 		    "There are active RC connections");
984 		hcap->hca_state = IBCM_HCA_ACTIVE;
985 		return (IBCM_FAILURE);
986 	}
987 
988 	/*
989 	 * Now, wait until all rc and sidr stateps go away
990 	 * All these stateps must be short lived ones, waiting to be cleaned
991 	 * up after some timeout value, based on the current state.
992 	 */
993 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach:hca_guid = 0x%llX res_cnt = %d",
994 	    hcap->hca_guid, hcap->hca_res_cnt);
995 
996 	while (hcap->hca_res_cnt > 0)
997 		cv_wait(&ibcm_global_hca_cv, &ibcm_global_hca_lock);
998 
999 	/* Re-assert the while loop step above */
1000 	ASSERT(hcap->hca_sidr_list == NULL);
1001 	avl_destroy(&hcap->hca_active_tree);
1002 	avl_destroy(&hcap->hca_passive_tree);
1003 	avl_destroy(&hcap->hca_passive_comid_tree);
1004 
1005 	/*
1006 	 * Unregister all ports from IBMA
1007 	 * If there is a failure, re-initialize any free'd ibma handles. This
1008 	 * is required to receive the incoming mads
1009 	 */
1010 	status = IBCM_SUCCESS;
1011 	for (port_index = 0; port_index < hcap->hca_num_ports; port_index++) {
1012 		if ((status = ibcm_hca_fini_port(hcap, port_index)) !=
1013 		    IBCM_SUCCESS) {
1014 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
1015 			    "Failed to free IBMA Handle for port_num %d",
1016 			    port_index + 1);
1017 			break;
1018 		}
1019 	}
1020 
1021 	/* If detach fails, re-initialize ibma handles for incoming mads */
1022 	if (status != IBCM_SUCCESS)  {
1023 		for (i = 0; i < port_index; i++) {
1024 			if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
1025 				IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
1026 				    "Failed to re-allocate IBMA Handles for"
1027 				    " port_num %d", port_index + 1);
1028 		}
1029 		hcap->hca_state = IBCM_HCA_ACTIVE;
1030 		return (IBCM_FAILURE);
1031 	}
1032 
1033 	ibcm_fini_hca_ids(hcap);
1034 	ibcm_delete_hca_entry(hcap);
1035 
1036 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: DETACH succeeded");
1037 	return (IBCM_SUCCESS);
1038 }
1039 
1040 /* Checks, if there are any active sidr state entries in the specified hca */
1041 static ibcm_status_t
ibcm_check_sidr_clean(ibcm_hca_info_t * hcap)1042 ibcm_check_sidr_clean(ibcm_hca_info_t *hcap)
1043 {
1044 	ibcm_ud_state_data_t	*usp;
1045 	uint32_t		transient_cnt = 0;
1046 
1047 	IBTF_DPRINTF_L5(cmlog, "ibcm_check_sidr_clean:");
1048 
1049 	rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);
1050 	usp = hcap->hca_sidr_list;	/* Point to the list */
1051 	while (usp != NULL) {
1052 		mutex_enter(&usp->ud_state_mutex);
1053 		if ((usp->ud_state != IBCM_STATE_SIDR_REP_SENT) &&
1054 		    (usp->ud_state != IBCM_STATE_TIMED_OUT) &&
1055 		    (usp->ud_state != IBCM_STATE_DELETE)) {
1056 
1057 			IBTF_DPRINTF_L3(cmlog, "ibcm_check_sidr_clean:"
1058 			    "usp = %p not in transient state = %d", usp,
1059 			    usp->ud_state);
1060 
1061 			mutex_exit(&usp->ud_state_mutex);
1062 			rw_exit(&hcap->hca_sidr_list_lock);
1063 			return (IBCM_FAILURE);
1064 		} else {
1065 			mutex_exit(&usp->ud_state_mutex);
1066 			++transient_cnt;
1067 		}
1068 
1069 		usp = usp->ud_nextp;
1070 	}
1071 	rw_exit(&hcap->hca_sidr_list_lock);
1072 
1073 	IBTF_DPRINTF_L4(cmlog, "ibcm_check_sidr_clean: transient_cnt %d",
1074 	    transient_cnt);
1075 
1076 	return (IBCM_SUCCESS);
1077 }
1078 
1079 /* Checks, if there are any active rc state entries, in the specified hca */
1080 static ibcm_status_t
ibcm_check_avl_clean(ibcm_hca_info_t * hcap)1081 ibcm_check_avl_clean(ibcm_hca_info_t *hcap)
1082 
1083 {
1084 	ibcm_state_data_t	*sp;
1085 	avl_tree_t		*avl_tree;
1086 	uint32_t		transient_cnt = 0;
1087 
1088 	IBTF_DPRINTF_L5(cmlog, "ibcm_check_avl_clean:");
1089 	/*
1090 	 * Both the trees ie., active and passive must reference to all
1091 	 * statep's, so let's use one
1092 	 */
1093 	avl_tree = &hcap->hca_active_tree;
1094 
1095 	rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
1096 
1097 	for (sp = avl_first(avl_tree); sp != NULL;
1098 	    sp = avl_walk(avl_tree, sp, AVL_AFTER)) {
1099 		mutex_enter(&sp->state_mutex);
1100 		if ((sp->state != IBCM_STATE_TIMEWAIT) &&
1101 		    (sp->state != IBCM_STATE_REJ_SENT) &&
1102 		    (sp->state != IBCM_STATE_DELETE)) {
1103 			IBTF_DPRINTF_L3(cmlog, "ibcm_check_avl_clean: "
1104 			    "sp = %p not in transient state = %d", sp,
1105 			    sp->state);
1106 			mutex_exit(&sp->state_mutex);
1107 			rw_exit(&hcap->hca_state_rwlock);
1108 			return (IBCM_FAILURE);
1109 		} else {
1110 			mutex_exit(&sp->state_mutex);
1111 			++transient_cnt;
1112 		}
1113 	}
1114 
1115 	rw_exit(&hcap->hca_state_rwlock);
1116 
1117 	IBTF_DPRINTF_L4(cmlog, "ibcm_check_avl_clean: transient_cnt %d",
1118 	    transient_cnt);
1119 
1120 	return (IBCM_SUCCESS);
1121 }
1122 
1123 /* Adds a new entry into CM's global hca list, if hca_guid is not there yet */
1124 static ibcm_hca_info_t *
ibcm_add_hca_entry(ib_guid_t hcaguid,uint_t nports)1125 ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports)
1126 {
1127 	ibcm_hca_info_t	*hcap;
1128 
1129 	IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: guid = 0x%llX",
1130 	    hcaguid);
1131 
1132 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1133 
1134 	/*
1135 	 * Check if this hca_guid already in the list
1136 	 * If yes, then ignore this and return NULL
1137 	 */
1138 
1139 	hcap = ibcm_hca_listp;
1140 
1141 	/* search for this HCA */
1142 	while (hcap != NULL) {
1143 		if (hcap->hca_guid == hcaguid) {
1144 			/* already exists */
1145 			IBTF_DPRINTF_L2(cmlog, "ibcm_add_hca_entry: "
1146 			    "hcap %p guid 0x%llX, entry already exists !!",
1147 			    hcap, hcap->hca_guid);
1148 			return (NULL);
1149 		}
1150 		hcap = hcap->hca_next;
1151 	}
1152 
1153 	/* Allocate storage for the new HCA entry found */
1154 	hcap = kmem_zalloc(sizeof (ibcm_hca_info_t) +
1155 	    (nports - 1) * sizeof (ibcm_port_info_t), KM_SLEEP);
1156 
1157 	/* initialize RW lock */
1158 	rw_init(&hcap->hca_state_rwlock, NULL, RW_DRIVER, NULL);
1159 	/* initialize SIDR list lock */
1160 	rw_init(&hcap->hca_sidr_list_lock, NULL, RW_DRIVER, NULL);
1161 	/* Insert "hcap" into the global HCA list maintained by CM */
1162 	hcap->hca_next = ibcm_hca_listp;
1163 	ibcm_hca_listp = hcap;
1164 
1165 	IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: done hcap = 0x%p", hcap);
1166 
1167 	return (hcap);
1168 
1169 }
1170 
1171 /* deletes the given ibcm_hca_info_t from CM's global hca list */
1172 void
ibcm_delete_hca_entry(ibcm_hca_info_t * hcap)1173 ibcm_delete_hca_entry(ibcm_hca_info_t *hcap)
1174 {
1175 	ibcm_hca_info_t	*headp, *prevp = NULL;
1176 
1177 	/* ibcm_hca_global_lock is held */
1178 	IBTF_DPRINTF_L5(cmlog, "ibcm_delete_hca_entry: guid = 0x%llX "
1179 	    "hcap = 0x%p", hcap->hca_guid, hcap);
1180 
1181 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1182 
1183 	headp = ibcm_hca_listp;
1184 	while (headp != NULL) {
1185 		if (headp == hcap) {
1186 			IBTF_DPRINTF_L3(cmlog, "ibcm_delete_hca_entry: "
1187 			    "deleting hcap %p hcaguid %llX", hcap,
1188 			    hcap->hca_guid);
1189 			if (prevp) {
1190 				prevp->hca_next = headp->hca_next;
1191 			} else {
1192 				prevp = headp->hca_next;
1193 				ibcm_hca_listp = prevp;
1194 			}
1195 			rw_destroy(&hcap->hca_state_rwlock);
1196 			rw_destroy(&hcap->hca_sidr_list_lock);
1197 			kmem_free(hcap, sizeof (ibcm_hca_info_t) +
1198 			    (hcap->hca_num_ports - 1) *
1199 			    sizeof (ibcm_port_info_t));
1200 			return;
1201 		}
1202 
1203 		prevp = headp;
1204 		headp = headp->hca_next;
1205 	}
1206 }
1207 
1208 /*
1209  * ibcm_find_hca_entry:
1210  *	Given a HCA's GUID find out ibcm_hca_info_t entry for that HCA
1211  *	This entry can be then used to access AVL tree/SIDR list etc.
1212  *	If entry exists and in HCA ATTACH state, then hca's ref cnt is
1213  *	incremented and entry returned. Else NULL returned.
1214  *
1215  *	All functions that use ibcm_find_hca_entry and get a non-NULL
1216  *	return values must call ibcm_dec_hca_acc_cnt to decrement the
1217  *	respective hca ref cnt. There shouldn't be any usage of
1218  *	ibcm_hca_info_t * returned from ibcm_find_hca_entry,
1219  *	after decrementing the hca_acc_cnt
1220  *
1221  * INPUTS:
1222  *	hca_guid	- HCA's guid
1223  *
1224  * RETURN VALUE:
1225  *	hcap		- if a match is found, else NULL
1226  */
1227 ibcm_hca_info_t *
ibcm_find_hca_entry(ib_guid_t hca_guid)1228 ibcm_find_hca_entry(ib_guid_t hca_guid)
1229 {
1230 	ibcm_hca_info_t *hcap;
1231 
1232 	IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: guid = 0x%llX", hca_guid);
1233 
1234 	mutex_enter(&ibcm_global_hca_lock);
1235 
1236 	hcap = ibcm_hca_listp;
1237 	/* search for this HCA */
1238 	while (hcap != NULL) {
1239 		if (hcap->hca_guid == hca_guid)
1240 			break;
1241 		hcap = hcap->hca_next;
1242 	}
1243 
1244 	/* if no hcap for the hca_guid, return NULL */
1245 	if (hcap == NULL) {
1246 		mutex_exit(&ibcm_global_hca_lock);
1247 		return (NULL);
1248 	}
1249 
1250 	/* return hcap, only if it valid to use */
1251 	if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1252 		++(hcap->hca_acc_cnt);
1253 
1254 		IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: "
1255 		    "found hcap = 0x%p hca_acc_cnt %u", hcap,
1256 		    hcap->hca_acc_cnt);
1257 
1258 		mutex_exit(&ibcm_global_hca_lock);
1259 		return (hcap);
1260 	} else {
1261 		mutex_exit(&ibcm_global_hca_lock);
1262 
1263 		IBTF_DPRINTF_L2(cmlog, "ibcm_find_hca_entry: "
1264 		    "found hcap = 0x%p not in active state", hcap);
1265 		return (NULL);
1266 	}
1267 }
1268 
1269 /*
1270  * Searches for ibcm_hca_info_t entry based on hca_guid, but doesn't increment
1271  * the hca's reference count. This function is used, where the calling context
1272  * is attempting to delete hcap itself and hence acc_cnt cannot be incremented
1273  * OR assumes that valid hcap must be available in ibcm's global hca list.
1274  */
1275 ibcm_hca_info_t *
ibcm_find_hcap_entry(ib_guid_t hca_guid)1276 ibcm_find_hcap_entry(ib_guid_t hca_guid)
1277 {
1278 	ibcm_hca_info_t *hcap;
1279 
1280 	IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: guid = 0x%llX", hca_guid);
1281 
1282 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1283 
1284 	hcap = ibcm_hca_listp;
1285 	/* search for this HCA */
1286 	while (hcap != NULL) {
1287 		if (hcap->hca_guid == hca_guid)
1288 			break;
1289 		hcap = hcap->hca_next;
1290 	}
1291 
1292 	if (hcap == NULL)
1293 		IBTF_DPRINTF_L2(cmlog, "ibcm_find_hcap_entry: No hcap found for"
1294 		    " hca_guid 0x%llX", hca_guid);
1295 	else
1296 		IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: hcap found for"
1297 		    " hca_guid 0x%llX", hca_guid);
1298 
1299 	return (hcap);
1300 }
1301 
1302 /* increment the hca's temporary reference count */
1303 ibcm_status_t
ibcm_inc_hca_acc_cnt(ibcm_hca_info_t * hcap)1304 ibcm_inc_hca_acc_cnt(ibcm_hca_info_t *hcap)
1305 {
1306 	mutex_enter(&ibcm_global_hca_lock);
1307 	if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1308 		++(hcap->hca_acc_cnt);
1309 		IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_acc_cnt: "
1310 		    "hcap = 0x%p  acc_cnt = %d ", hcap, hcap->hca_acc_cnt);
1311 		mutex_exit(&ibcm_global_hca_lock);
1312 		return (IBCM_SUCCESS);
1313 	} else {
1314 		IBTF_DPRINTF_L2(cmlog, "ibcm_inc_hca_acc_cnt: "
1315 		    "hcap INACTIVE 0x%p  acc_cnt = %d ", hcap,
1316 		    hcap->hca_acc_cnt);
1317 		mutex_exit(&ibcm_global_hca_lock);
1318 		return (IBCM_FAILURE);
1319 	}
1320 }
1321 
1322 /* decrement the hca's ref count, and wake up any waiting threads */
1323 void
ibcm_dec_hca_acc_cnt(ibcm_hca_info_t * hcap)1324 ibcm_dec_hca_acc_cnt(ibcm_hca_info_t *hcap)
1325 {
1326 	mutex_enter(&ibcm_global_hca_lock);
1327 	ASSERT(hcap->hca_acc_cnt > 0);
1328 	--(hcap->hca_acc_cnt);
1329 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_acc_cnt: hcap = 0x%p "
1330 	    "acc_cnt = %d", hcap, hcap->hca_acc_cnt);
1331 	if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1332 	    (hcap->hca_acc_cnt == 0)) {
1333 		IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_acc_cnt: "
1334 		    "cv_broadcast for hcap = 0x%p", hcap);
1335 		cv_broadcast(&ibcm_global_hca_cv);
1336 	}
1337 	mutex_exit(&ibcm_global_hca_lock);
1338 }
1339 
1340 /* increment the hca's resource count */
1341 void
ibcm_inc_hca_res_cnt(ibcm_hca_info_t * hcap)1342 ibcm_inc_hca_res_cnt(ibcm_hca_info_t *hcap)
1343 
1344 {
1345 	mutex_enter(&ibcm_global_hca_lock);
1346 	++(hcap->hca_res_cnt);
1347 	IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_res_cnt: hcap = 0x%p "
1348 	    "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1349 	mutex_exit(&ibcm_global_hca_lock);
1350 }
1351 
1352 /* decrement the hca's resource count, and wake up any waiting threads */
1353 void
ibcm_dec_hca_res_cnt(ibcm_hca_info_t * hcap)1354 ibcm_dec_hca_res_cnt(ibcm_hca_info_t *hcap)
1355 {
1356 	mutex_enter(&ibcm_global_hca_lock);
1357 	ASSERT(hcap->hca_res_cnt > 0);
1358 	--(hcap->hca_res_cnt);
1359 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_res_cnt: hcap = 0x%p "
1360 	    "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1361 	if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1362 	    (hcap->hca_res_cnt == 0)) {
1363 		IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_res_cnt: "
1364 		    "cv_broadcast for hcap = 0x%p", hcap);
1365 		cv_broadcast(&ibcm_global_hca_cv);
1366 	}
1367 	mutex_exit(&ibcm_global_hca_lock);
1368 }
1369 
1370 /* increment the hca's service count */
1371 void
ibcm_inc_hca_svc_cnt(ibcm_hca_info_t * hcap)1372 ibcm_inc_hca_svc_cnt(ibcm_hca_info_t *hcap)
1373 
1374 {
1375 	mutex_enter(&ibcm_global_hca_lock);
1376 	++(hcap->hca_svc_cnt);
1377 	IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_svc_cnt: hcap = 0x%p "
1378 	    "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1379 	mutex_exit(&ibcm_global_hca_lock);
1380 }
1381 
1382 /* decrement the hca's service count */
1383 void
ibcm_dec_hca_svc_cnt(ibcm_hca_info_t * hcap)1384 ibcm_dec_hca_svc_cnt(ibcm_hca_info_t *hcap)
1385 {
1386 	mutex_enter(&ibcm_global_hca_lock);
1387 	ASSERT(hcap->hca_svc_cnt > 0);
1388 	--(hcap->hca_svc_cnt);
1389 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_svc_cnt: hcap = 0x%p "
1390 	    "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1391 	mutex_exit(&ibcm_global_hca_lock);
1392 }
1393 
1394 /*
1395  * The following code manages three classes of requests that CM makes to
1396  * the fabric.  Those three classes are SA_ACCESS, REQ/REP/RTU, and DREQ/DREP.
1397  * The main issue is that the fabric can become very busy, and the CM
1398  * protocols rely on responses being made based on a predefined timeout
1399  * value.  By managing how many simultaneous sessions are allowed, there
1400  * is observed extremely high reliability of CM protocol succeeding when
1401  * it should.
1402  *
1403  * SA_ACCESS and DREQ/DREP are managed at the thread level, whereby the
1404  * thread blocks until there are less than some number of threads doing
1405  * similar requests.
1406  *
1407  * REQ/REP/RTU requests beyond a given limit are added to a list,
1408  * allowing the thread to return immediately to its caller in the
1409  * case where the "mode" is IBT_NONBLOCKING.  This is the mode used
1410  * by uDAPL and seems to be an important feature/behavior.
1411  */
1412 
1413 static int
ibcm_ok_to_start(struct ibcm_open_s * openp)1414 ibcm_ok_to_start(struct ibcm_open_s *openp)
1415 {
1416 	return (openp->sends < openp->sends_hiwat &&
1417 	    openp->in_progress < openp->in_progress_max);
1418 }
1419 
1420 void
ibcm_open_done(ibcm_state_data_t * statep)1421 ibcm_open_done(ibcm_state_data_t *statep)
1422 {
1423 	int run;
1424 	ibcm_state_data_t **linkp, *tmp;
1425 
1426 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1427 	if (statep->open_flow == 1) {
1428 		statep->open_flow = 0;
1429 		mutex_enter(&ibcm_open.mutex);
1430 		if (statep->open_link == NULL) {
1431 			ibcm_open.in_progress--;
1432 			run = ibcm_ok_to_start(&ibcm_open);
1433 		} else {
1434 			ibcm_open.queued--;
1435 			linkp = &ibcm_open.head.open_link;
1436 			while (*linkp != statep)
1437 				linkp = &((*linkp)->open_link);
1438 			*linkp = statep->open_link;
1439 			statep->open_link = NULL;
1440 			/*
1441 			 * If we remove what tail pointed to, we need
1442 			 * to reassign tail (it is never NULL).
1443 			 * tail points to head for the empty list.
1444 			 */
1445 			if (ibcm_open.tail == statep) {
1446 				tmp = &ibcm_open.head;
1447 				while (tmp->open_link != &ibcm_open.head)
1448 					tmp = tmp->open_link;
1449 				ibcm_open.tail = tmp;
1450 			}
1451 			run = 0;
1452 		}
1453 		mutex_exit(&ibcm_open.mutex);
1454 		if (run)
1455 			ibcm_run_tlist_thread();
1456 	}
1457 }
1458 
1459 /* dtrace */
1460 void
ibcm_open_wait(hrtime_t delta)1461 ibcm_open_wait(hrtime_t delta)
1462 {
1463 	if (delta > 1000000)
1464 		IBTF_DPRINTF_L2(cmlog, "ibcm_open_wait: flow more %lld", delta);
1465 }
1466 
1467 void
ibcm_open_start(ibcm_state_data_t * statep)1468 ibcm_open_start(ibcm_state_data_t *statep)
1469 {
1470 	ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_REQ);
1471 
1472 	mutex_enter(&statep->state_mutex);
1473 	ibcm_open_wait(gethrtime() - statep->post_time);
1474 	mutex_exit(&statep->state_mutex);
1475 
1476 	ibcm_post_rc_mad(statep, statep->stored_msg, ibcm_post_req_complete,
1477 	    statep);
1478 
1479 	mutex_enter(&statep->state_mutex);
1480 	IBCM_REF_CNT_DECR(statep);
1481 	mutex_exit(&statep->state_mutex);
1482 }
1483 
1484 void
ibcm_open_enqueue(ibcm_state_data_t * statep)1485 ibcm_open_enqueue(ibcm_state_data_t *statep)
1486 {
1487 	int run;
1488 
1489 	mutex_enter(&statep->state_mutex);
1490 	statep->post_time = gethrtime();
1491 	mutex_exit(&statep->state_mutex);
1492 	mutex_enter(&ibcm_open.mutex);
1493 	if (ibcm_open.queued == 0 && ibcm_ok_to_start(&ibcm_open)) {
1494 		ibcm_open.in_progress++;
1495 		mutex_exit(&ibcm_open.mutex);
1496 		ibcm_open_start(statep);
1497 	} else {
1498 		ibcm_open.queued++;
1499 		statep->open_link = &ibcm_open.head;
1500 		ibcm_open.tail->open_link = statep;
1501 		ibcm_open.tail = statep;
1502 		run = ibcm_ok_to_start(&ibcm_open);
1503 		mutex_exit(&ibcm_open.mutex);
1504 		if (run)
1505 			ibcm_run_tlist_thread();
1506 	}
1507 }
1508 
1509 ibcm_state_data_t *
ibcm_open_dequeue(void)1510 ibcm_open_dequeue(void)
1511 {
1512 	ibcm_state_data_t *statep;
1513 
1514 	ASSERT(MUTEX_HELD(&ibcm_open.mutex));
1515 	ibcm_open.queued--;
1516 	ibcm_open.in_progress++;
1517 	statep = ibcm_open.head.open_link;
1518 	ibcm_open.head.open_link = statep->open_link;
1519 	statep->open_link = NULL;
1520 	/*
1521 	 * If we remove what tail pointed to, we need
1522 	 * to reassign tail (it is never NULL).
1523 	 * tail points to head for the empty list.
1524 	 */
1525 	if (ibcm_open.tail == statep)
1526 		ibcm_open.tail = &ibcm_open.head;
1527 	return (statep);
1528 }
1529 
1530 void
ibcm_check_for_opens(void)1531 ibcm_check_for_opens(void)
1532 {
1533 	ibcm_state_data_t 	*statep;
1534 
1535 	mutex_enter(&ibcm_open.mutex);
1536 
1537 	while (ibcm_open.queued > 0) {
1538 		if (ibcm_ok_to_start(&ibcm_open)) {
1539 			statep = ibcm_open_dequeue();
1540 			mutex_exit(&ibcm_open.mutex);
1541 
1542 			ibcm_open_start(statep);
1543 
1544 			mutex_enter(&ibcm_open.mutex);
1545 		} else {
1546 			break;
1547 		}
1548 	}
1549 	mutex_exit(&ibcm_open.mutex);
1550 }
1551 
1552 
1553 static void
ibcm_flow_init(ibcm_flow_t * flow,uint_t simul_max)1554 ibcm_flow_init(ibcm_flow_t *flow, uint_t simul_max)
1555 {
1556 	flow->list			= NULL;
1557 	flow->simul			= 0;
1558 	flow->waiters_per_chunk		= 4;
1559 	flow->simul_max			= simul_max;
1560 	flow->lowat			= simul_max - flow->waiters_per_chunk;
1561 	flow->lowat_default		= flow->lowat;
1562 	/* stats */
1563 	flow->total			= 0;
1564 }
1565 
1566 static void
ibcm_rc_flow_control_init(void)1567 ibcm_rc_flow_control_init(void)
1568 {
1569 	mutex_init(&ibcm_open.mutex, NULL, MUTEX_DEFAULT, NULL);
1570 	mutex_enter(&ibcm_open.mutex);
1571 	ibcm_flow_init(&ibcm_close_flow, ibcm_close_simul_max);
1572 	ibcm_flow_init(&ibcm_lapr_flow, ibcm_lapr_simul_max);
1573 	ibcm_flow_init(&ibcm_saa_flow, ibcm_saa_simul_max);
1574 
1575 	ibcm_open.queued 		= 0;
1576 	ibcm_open.exit_deferred 	= 0;
1577 	ibcm_open.in_progress 		= 0;
1578 	ibcm_open.in_progress_max 	= 16;
1579 	ibcm_open.sends 		= 0;
1580 	ibcm_open.sends_max 		= 0;
1581 	ibcm_open.sends_lowat 		= 8;
1582 	ibcm_open.sends_hiwat 		= 16;
1583 	ibcm_open.tail 			= &ibcm_open.head;
1584 	ibcm_open.head.open_link 	= NULL;
1585 	mutex_exit(&ibcm_open.mutex);
1586 
1587 	mutex_init(&ibcm_close.mutex, NULL, MUTEX_DEFAULT, NULL);
1588 	mutex_enter(&ibcm_close.mutex);
1589 	ibcm_close.tail			= &ibcm_close.head;
1590 	ibcm_close.head.close_link 	= NULL;
1591 	mutex_exit(&ibcm_close.mutex);
1592 }
1593 
1594 static void
ibcm_rc_flow_control_fini(void)1595 ibcm_rc_flow_control_fini(void)
1596 {
1597 	mutex_destroy(&ibcm_open.mutex);
1598 	mutex_destroy(&ibcm_close.mutex);
1599 }
1600 
1601 static ibcm_flow1_t *
ibcm_flow_find(ibcm_flow_t * flow)1602 ibcm_flow_find(ibcm_flow_t *flow)
1603 {
1604 	ibcm_flow1_t *flow1;
1605 	ibcm_flow1_t *f;
1606 
1607 	f = flow->list;
1608 	if (f) {	/* most likely code path */
1609 		while (f->link != NULL)
1610 			f = f->link;
1611 		if (f->waiters < flow->waiters_per_chunk)
1612 			return (f);
1613 	}
1614 
1615 	/* There was no flow1 list element ready for another waiter */
1616 	mutex_exit(&ibcm_open.mutex);
1617 	flow1 = kmem_alloc(sizeof (*flow1), KM_SLEEP);
1618 	mutex_enter(&ibcm_open.mutex);
1619 
1620 	f = flow->list;
1621 	if (f) {
1622 		while (f->link != NULL)
1623 			f = f->link;
1624 		if (f->waiters < flow->waiters_per_chunk) {
1625 			kmem_free(flow1, sizeof (*flow1));
1626 			return (f);
1627 		}
1628 		f->link = flow1;
1629 	} else {
1630 		flow->list = flow1;
1631 	}
1632 	cv_init(&flow1->cv, NULL, CV_DRIVER, NULL);
1633 	flow1->waiters = 0;
1634 	flow1->link = NULL;
1635 	return (flow1);
1636 }
1637 
1638 static void
ibcm_flow_enter(ibcm_flow_t * flow)1639 ibcm_flow_enter(ibcm_flow_t *flow)
1640 {
1641 	mutex_enter(&ibcm_open.mutex);
1642 	if (flow->list == NULL && flow->simul < flow->simul_max) {
1643 		flow->simul++;
1644 		flow->total++;
1645 		mutex_exit(&ibcm_open.mutex);
1646 	} else {
1647 		ibcm_flow1_t *flow1;
1648 
1649 		flow1 = ibcm_flow_find(flow);
1650 		flow1->waiters++;
1651 		cv_wait(&flow1->cv, &ibcm_open.mutex);
1652 		if (--flow1->waiters == 0) {
1653 			cv_destroy(&flow1->cv);
1654 			mutex_exit(&ibcm_open.mutex);
1655 			kmem_free(flow1, sizeof (*flow1));
1656 		} else
1657 			mutex_exit(&ibcm_open.mutex);
1658 	}
1659 }
1660 
1661 static void
ibcm_flow_exit(ibcm_flow_t * flow)1662 ibcm_flow_exit(ibcm_flow_t *flow)
1663 {
1664 	mutex_enter(&ibcm_open.mutex);
1665 	if (--flow->simul < flow->lowat) {
1666 		if (flow->lowat < flow->lowat_default)
1667 			flow->lowat++;
1668 		if (flow->list) {
1669 			ibcm_flow1_t *flow1;
1670 
1671 			flow1 = flow->list;
1672 			flow->list = flow1->link;	/* unlink */
1673 			flow1->link = NULL;		/* be clean */
1674 			flow->total += flow1->waiters;
1675 			flow->simul += flow1->waiters;
1676 			cv_broadcast(&flow1->cv);
1677 		}
1678 	}
1679 	mutex_exit(&ibcm_open.mutex);
1680 }
1681 
1682 void
ibcm_flow_inc(void)1683 ibcm_flow_inc(void)
1684 {
1685 	mutex_enter(&ibcm_open.mutex);
1686 	if (++ibcm_open.sends > ibcm_open.sends_max) {
1687 		ibcm_open.sends_max = ibcm_open.sends;
1688 		IBTF_DPRINTF_L2(cmlog, "ibcm_flow_inc: sends max = %d",
1689 		    ibcm_open.sends_max);
1690 	}
1691 	mutex_exit(&ibcm_open.mutex);
1692 }
1693 
1694 static void
ibcm_check_send_cmpltn_time(hrtime_t delta,char * event_msg)1695 ibcm_check_send_cmpltn_time(hrtime_t delta, char *event_msg)
1696 {
1697 	if (delta > 4000000LL) {
1698 		IBTF_DPRINTF_L2(cmlog, "ibcm_check_send_cmpltn_time: "
1699 		    "%s: %lldns", event_msg, delta);
1700 	}
1701 }
1702 
1703 void
ibcm_flow_dec(hrtime_t time,char * mad_type)1704 ibcm_flow_dec(hrtime_t time, char *mad_type)
1705 {
1706 	int flow_exit = 0;
1707 	int run = 0;
1708 
1709 	if (ibcm_dtrace)
1710 		ibcm_check_send_cmpltn_time(gethrtime() - time, mad_type);
1711 	mutex_enter(&ibcm_open.mutex);
1712 	ibcm_open.sends--;
1713 	if (ibcm_open.sends < ibcm_open.sends_lowat) {
1714 		run = ibcm_ok_to_start(&ibcm_open);
1715 		if (ibcm_open.exit_deferred) {
1716 			ibcm_open.exit_deferred--;
1717 			flow_exit = 1;
1718 		}
1719 	}
1720 	mutex_exit(&ibcm_open.mutex);
1721 	if (flow_exit)
1722 		ibcm_flow_exit(&ibcm_close_flow);
1723 	if (run)
1724 		ibcm_run_tlist_thread();
1725 }
1726 
1727 void
ibcm_close_enqueue(ibcm_state_data_t * statep)1728 ibcm_close_enqueue(ibcm_state_data_t *statep)
1729 {
1730 	mutex_enter(&ibcm_close.mutex);
1731 	statep->close_link = NULL;
1732 	ibcm_close.tail->close_link = statep;
1733 	ibcm_close.tail = statep;
1734 	mutex_exit(&ibcm_close.mutex);
1735 	ibcm_run_tlist_thread();
1736 }
1737 
1738 void
ibcm_check_for_async_close()1739 ibcm_check_for_async_close()
1740 {
1741 	ibcm_state_data_t 	*statep;
1742 
1743 	mutex_enter(&ibcm_close.mutex);
1744 
1745 	while (ibcm_close.head.close_link) {
1746 		statep = ibcm_close.head.close_link;
1747 		ibcm_close.head.close_link = statep->close_link;
1748 		statep->close_link = NULL;
1749 		if (ibcm_close.tail == statep)
1750 			ibcm_close.tail = &ibcm_close.head;
1751 		mutex_exit(&ibcm_close.mutex);
1752 		ibcm_close_start(statep);
1753 		mutex_enter(&ibcm_close.mutex);
1754 	}
1755 	mutex_exit(&ibcm_close.mutex);
1756 }
1757 
1758 void
ibcm_close_enter(void)1759 ibcm_close_enter(void)
1760 {
1761 	ibcm_flow_enter(&ibcm_close_flow);
1762 }
1763 
1764 void
ibcm_close_exit(void)1765 ibcm_close_exit(void)
1766 {
1767 	int flow_exit;
1768 
1769 	mutex_enter(&ibcm_open.mutex);
1770 	if (ibcm_open.sends < ibcm_open.sends_lowat ||
1771 	    ibcm_open.exit_deferred >= 4)
1772 		flow_exit = 1;
1773 	else {
1774 		flow_exit = 0;
1775 		ibcm_open.exit_deferred++;
1776 	}
1777 	mutex_exit(&ibcm_open.mutex);
1778 	if (flow_exit)
1779 		ibcm_flow_exit(&ibcm_close_flow);
1780 }
1781 
1782 /*
1783  * This function needs to be called twice to finish our flow
1784  * control accounting when closing down a connection.  One
1785  * call has send_done set to 1, while the other has it set to 0.
1786  * Because of retries, this could get called more than once
1787  * with either 0 or 1, but additional calls have no effect.
1788  */
1789 void
ibcm_close_done(ibcm_state_data_t * statep,int send_done)1790 ibcm_close_done(ibcm_state_data_t *statep, int send_done)
1791 {
1792 	int flow_exit;
1793 
1794 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1795 	if (statep->close_flow == 1) {
1796 		if (send_done)
1797 			statep->close_flow = 3;
1798 		else
1799 			statep->close_flow = 2;
1800 	} else if ((send_done && statep->close_flow == 2) ||
1801 	    (!send_done && statep->close_flow == 3)) {
1802 		statep->close_flow = 0;
1803 		mutex_enter(&ibcm_open.mutex);
1804 		if (ibcm_open.sends < ibcm_open.sends_lowat ||
1805 		    ibcm_open.exit_deferred >= 4)
1806 			flow_exit = 1;
1807 		else {
1808 			flow_exit = 0;
1809 			ibcm_open.exit_deferred++;
1810 		}
1811 		mutex_exit(&ibcm_open.mutex);
1812 		if (flow_exit)
1813 			ibcm_flow_exit(&ibcm_close_flow);
1814 	}
1815 }
1816 
1817 void
ibcm_lapr_enter(void)1818 ibcm_lapr_enter(void)
1819 {
1820 	ibcm_flow_enter(&ibcm_lapr_flow);
1821 }
1822 
1823 void
ibcm_lapr_exit(void)1824 ibcm_lapr_exit(void)
1825 {
1826 	ibcm_flow_exit(&ibcm_lapr_flow);
1827 }
1828 
1829 void
ibcm_sa_access_enter()1830 ibcm_sa_access_enter()
1831 {
1832 	ibcm_flow_enter(&ibcm_saa_flow);
1833 }
1834 
1835 void
ibcm_sa_access_exit()1836 ibcm_sa_access_exit()
1837 {
1838 	ibcm_flow_exit(&ibcm_saa_flow);
1839 }
1840 
1841 static void
ibcm_sm_notice_handler(ibmf_saa_handle_t saa_handle,ibmf_saa_subnet_event_t saa_event_code,ibmf_saa_event_details_t * saa_event_details,void * callback_arg)1842 ibcm_sm_notice_handler(ibmf_saa_handle_t saa_handle,
1843     ibmf_saa_subnet_event_t saa_event_code,
1844     ibmf_saa_event_details_t *saa_event_details,
1845     void *callback_arg)
1846 {
1847 	ibcm_port_info_t	*portp = (ibcm_port_info_t *)callback_arg;
1848 	ibt_subnet_event_code_t code;
1849 	ibt_subnet_event_t	event;
1850 	uint8_t			event_status;
1851 
1852 	IBTF_DPRINTF_L3(cmlog, "ibcm_sm_notice_handler: saa_hdl %p, code = %d",
1853 	    saa_handle, saa_event_code);
1854 
1855 	mutex_enter(&ibcm_sm_notice_serialize_lock);
1856 
1857 	switch (saa_event_code) {
1858 	case IBMF_SAA_EVENT_MCG_CREATED:
1859 		code = IBT_SM_EVENT_MCG_CREATED;
1860 		break;
1861 	case IBMF_SAA_EVENT_MCG_DELETED:
1862 		code = IBT_SM_EVENT_MCG_DELETED;
1863 		break;
1864 	case IBMF_SAA_EVENT_GID_AVAILABLE:
1865 		code = IBT_SM_EVENT_GID_AVAIL;
1866 		ibcm_path_cache_purge();
1867 		break;
1868 	case IBMF_SAA_EVENT_GID_UNAVAILABLE:
1869 		code = IBT_SM_EVENT_GID_UNAVAIL;
1870 		ibcm_path_cache_purge();
1871 		break;
1872 	case IBMF_SAA_EVENT_SUBSCRIBER_STATUS_CHG:
1873 		event_status =
1874 		    saa_event_details->ie_producer_event_status_mask &
1875 		    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
1876 		if (event_status == (portp->port_event_status &
1877 		    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)) {
1878 			mutex_exit(&ibcm_sm_notice_serialize_lock);
1879 			return;	/* no change */
1880 		}
1881 		portp->port_event_status = event_status;
1882 		if (event_status == IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)
1883 			code = IBT_SM_EVENT_AVAILABLE;
1884 		else
1885 			code = IBT_SM_EVENT_UNAVAILABLE;
1886 		break;
1887 	default:
1888 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1889 		return;
1890 	}
1891 
1892 	mutex_enter(&ibcm_global_hca_lock);
1893 
1894 	/* don't send the event if we're tearing down */
1895 	if (!IBCM_ACCESS_HCA_OK(portp->port_hcap)) {
1896 		mutex_exit(&ibcm_global_hca_lock);
1897 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1898 		return;
1899 	}
1900 
1901 	++(portp->port_hcap->hca_acc_cnt);
1902 	mutex_exit(&ibcm_global_hca_lock);
1903 
1904 	event.sm_notice_gid = saa_event_details->ie_gid;
1905 	ibtl_cm_sm_notice_handler(portp->port_sgid0, code, &event);
1906 
1907 	mutex_exit(&ibcm_sm_notice_serialize_lock);
1908 
1909 	ibcm_dec_hca_acc_cnt(portp->port_hcap);
1910 }
1911 
1912 void
ibt_register_subnet_notices(ibt_clnt_hdl_t ibt_hdl,ibt_sm_notice_handler_t sm_notice_handler,void * private)1913 ibt_register_subnet_notices(ibt_clnt_hdl_t ibt_hdl,
1914     ibt_sm_notice_handler_t sm_notice_handler, void *private)
1915 {
1916 	ibcm_port_info_t	*portp;
1917 	ibcm_hca_info_t		*hcap;
1918 	uint8_t			port;
1919 	int			num_failed_sgids;
1920 	ibtl_cm_sm_init_fail_t	*ifail;
1921 	ib_gid_t		*sgidp;
1922 
1923 	IBTF_DPRINTF_L3(cmlog, "ibt_register_subnet_notices(%p, %s)",
1924 	    ibt_hdl, ibtl_cm_get_clnt_name(ibt_hdl));
1925 
1926 	mutex_enter(&ibcm_sm_notice_serialize_lock);
1927 
1928 	ibtl_cm_set_sm_notice_handler(ibt_hdl, sm_notice_handler, private);
1929 	if (sm_notice_handler == NULL) {
1930 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1931 		return;
1932 	}
1933 
1934 	/* for each port, if service is not available, make a call */
1935 	mutex_enter(&ibcm_global_hca_lock);
1936 	num_failed_sgids = 0;
1937 	hcap = ibcm_hca_listp;
1938 	while (hcap != NULL) {
1939 		portp = hcap->hca_port_info;
1940 		for (port = 0; port < hcap->hca_num_ports; port++) {
1941 			if (!(portp->port_event_status &
1942 			    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1943 				num_failed_sgids++;
1944 			portp++;
1945 		}
1946 		hcap = hcap->hca_next;
1947 	}
1948 	if (num_failed_sgids != 0) {
1949 		ifail = kmem_alloc(sizeof (*ifail) +
1950 		    (num_failed_sgids - 1) * sizeof (ib_gid_t), KM_SLEEP);
1951 		ifail->smf_num_sgids = num_failed_sgids;
1952 		ifail->smf_ibt_hdl = ibt_hdl;
1953 		sgidp = &ifail->smf_sgid[0];
1954 		hcap = ibcm_hca_listp;
1955 		while (hcap != NULL) {
1956 			portp = hcap->hca_port_info;
1957 			for (port = 0; port < hcap->hca_num_ports; port++) {
1958 				if (!(portp->port_event_status &
1959 				    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1960 					*sgidp++ = portp->port_sgid0;
1961 				portp++;
1962 			}
1963 			hcap = hcap->hca_next;
1964 		}
1965 	}
1966 	mutex_exit(&ibcm_global_hca_lock);
1967 
1968 	if (num_failed_sgids != 0) {
1969 		ibtl_cm_sm_notice_init_failure(ifail);
1970 		kmem_free(ifail, sizeof (*ifail) +
1971 		    (num_failed_sgids - 1) * sizeof (ib_gid_t));
1972 	}
1973 	mutex_exit(&ibcm_sm_notice_serialize_lock);
1974 }
1975 
1976 /* The following is run from a taskq because we've seen the stack overflow. */
1977 static void
ibcm_init_saa(void * arg)1978 ibcm_init_saa(void *arg)
1979 {
1980 	ibcm_port_info_t		*portp = (ibcm_port_info_t *)arg;
1981 	int				status;
1982 	ib_guid_t			port_guid;
1983 	ibmf_saa_subnet_event_args_t	event_args;
1984 
1985 	port_guid = portp->port_sgid0.gid_guid;
1986 
1987 	IBTF_DPRINTF_L3(cmlog, "ibcm_init_saa: port guid %llX", port_guid);
1988 
1989 	event_args.is_event_callback_arg = portp;
1990 	event_args.is_event_callback = ibcm_sm_notice_handler;
1991 
1992 	if ((status = ibmf_sa_session_open(port_guid, 0, &event_args,
1993 	    IBMF_VERSION, 0, &portp->port_ibmf_saa_hdl)) != IBMF_SUCCESS) {
1994 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
1995 		    "ibmf_sa_session_open failed for port guid %llX "
1996 		    "status = %d", port_guid, status);
1997 	} else {
1998 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
1999 		    "registered sa_hdl 0x%p for port guid %llX",
2000 		    portp->port_ibmf_saa_hdl, port_guid);
2001 	}
2002 
2003 	mutex_enter(&ibcm_sa_open_lock);
2004 	portp->port_saa_open_in_progress = 0;
2005 	cv_broadcast(&ibcm_sa_open_cv);
2006 	mutex_exit(&ibcm_sa_open_lock);
2007 }
2008 
2009 void
ibcm_init_saa_handle(ibcm_hca_info_t * hcap,uint8_t port)2010 ibcm_init_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
2011 {
2012 	ibmf_saa_handle_t	saa_handle;
2013 	uint8_t			port_index = port - 1;
2014 	ibcm_port_info_t	*portp = &hcap->hca_port_info[port_index];
2015 	ibt_status_t		ibt_status;
2016 
2017 	if (port_index >= hcap->hca_num_ports)
2018 		return;
2019 
2020 	mutex_enter(&ibcm_sa_open_lock);
2021 	if (portp->port_saa_open_in_progress) {
2022 		mutex_exit(&ibcm_sa_open_lock);
2023 		return;
2024 	}
2025 
2026 	saa_handle = portp->port_ibmf_saa_hdl;
2027 	if (saa_handle != NULL) {
2028 		mutex_exit(&ibcm_sa_open_lock);
2029 		return;
2030 	}
2031 
2032 	portp->port_saa_open_in_progress = 1;
2033 	mutex_exit(&ibcm_sa_open_lock);
2034 
2035 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(portp->port_event_status))
2036 
2037 	/* The assumption is that we're getting event notifications */
2038 	portp->port_event_status = IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
2039 
2040 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(portp->port_event_status))
2041 
2042 	ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
2043 	    portp->port_num, &portp->port_sgid0, NULL);
2044 	if (ibt_status != IBT_SUCCESS) {
2045 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa_handle: "
2046 		    "ibt_get_port_state_byguid failed for guid %llX "
2047 		    "with status %d", portp->port_hcap->hca_guid, ibt_status);
2048 		mutex_enter(&ibcm_sa_open_lock);
2049 		portp->port_saa_open_in_progress = 0;
2050 		cv_broadcast(&ibcm_sa_open_cv);
2051 		mutex_exit(&ibcm_sa_open_lock);
2052 		return;
2053 	}
2054 	/* if the port is UP, try sa_session_open */
2055 	(void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
2056 }
2057 
2058 
2059 ibmf_saa_handle_t
ibcm_get_saa_handle(ibcm_hca_info_t * hcap,uint8_t port)2060 ibcm_get_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
2061 {
2062 	ibmf_saa_handle_t	saa_handle;
2063 	uint8_t			port_index = port - 1;
2064 	ibcm_port_info_t	*portp = &hcap->hca_port_info[port_index];
2065 	ibt_status_t		ibt_status;
2066 
2067 	if (port_index >= hcap->hca_num_ports)
2068 		return (NULL);
2069 
2070 	mutex_enter(&ibcm_sa_open_lock);
2071 	while (portp->port_saa_open_in_progress) {
2072 		cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
2073 	}
2074 
2075 	saa_handle = portp->port_ibmf_saa_hdl;
2076 	if (saa_handle != NULL) {
2077 		mutex_exit(&ibcm_sa_open_lock);
2078 		return (saa_handle);
2079 	}
2080 
2081 	portp->port_saa_open_in_progress = 1;
2082 	mutex_exit(&ibcm_sa_open_lock);
2083 
2084 	ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
2085 	    portp->port_num, &portp->port_sgid0, NULL);
2086 	if (ibt_status != IBT_SUCCESS) {
2087 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_saa_handle: "
2088 		    "ibt_get_port_state_byguid failed for guid %llX "
2089 		    "with status %d", portp->port_hcap->hca_guid, ibt_status);
2090 		mutex_enter(&ibcm_sa_open_lock);
2091 		portp->port_saa_open_in_progress = 0;
2092 		cv_broadcast(&ibcm_sa_open_cv);
2093 		mutex_exit(&ibcm_sa_open_lock);
2094 		return (NULL);
2095 	}
2096 	/* if the port is UP, try sa_session_open */
2097 	(void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
2098 
2099 	mutex_enter(&ibcm_sa_open_lock);
2100 	while (portp->port_saa_open_in_progress) {
2101 		cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
2102 	}
2103 	saa_handle = portp->port_ibmf_saa_hdl;
2104 	mutex_exit(&ibcm_sa_open_lock);
2105 	return (saa_handle);
2106 }
2107 
2108 
2109 /*
2110  * ibcm_hca_init_port():
2111  * 	- Register port with IBMA
2112  *
2113  * Arguments:
2114  *	hcap		- HCA's guid
2115  *	port_index	- port number minus 1
2116  *
2117  * Return values:
2118  *	IBCM_SUCCESS - success
2119  */
2120 ibt_status_t
ibcm_hca_init_port(ibcm_hca_info_t * hcap,uint8_t port_index)2121 ibcm_hca_init_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2122 {
2123 	int			status;
2124 	ibmf_register_info_t	*ibmf_reg;
2125 
2126 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_init_port: hcap = 0x%p port_num %d",
2127 	    hcap, port_index + 1);
2128 
2129 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
2130 
2131 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hcap->hca_port_info))
2132 
2133 	if (hcap->hca_port_info[port_index].port_ibmf_hdl == NULL) {
2134 		/* Register with IBMF */
2135 		ibmf_reg = &hcap->hca_port_info[port_index].port_ibmf_reg;
2136 		ibmf_reg->ir_ci_guid = hcap->hca_guid;
2137 		ibmf_reg->ir_port_num = port_index + 1;
2138 		ibmf_reg->ir_client_class = COMM_MGT_MANAGER_AGENT;
2139 
2140 		/*
2141 		 * register with management framework
2142 		 */
2143 		status = ibmf_register(ibmf_reg, IBMF_VERSION,
2144 		    IBMF_REG_FLAG_NO_OFFLOAD, NULL, NULL,
2145 		    &(hcap->hca_port_info[port_index].port_ibmf_hdl),
2146 		    &(hcap->hca_port_info[port_index].port_ibmf_caps));
2147 
2148 		if (status != IBMF_SUCCESS) {
2149 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_init_port: "
2150 			    "ibmf_register failed for port_num %x, "
2151 			    "status = %x", port_index + 1, status);
2152 			return (ibcm_ibmf_analyze_error(status));
2153 		}
2154 
2155 		hcap->hca_port_info[port_index].port_qp1.qp_cm =
2156 		    IBMF_QP_HANDLE_DEFAULT;
2157 		hcap->hca_port_info[port_index].port_qp1.qp_port =
2158 		    &(hcap->hca_port_info[port_index]);
2159 
2160 		/*
2161 		 * Register the read callback with IBMF.
2162 		 * Since we just did an ibmf_register, handle is
2163 		 * valid and ibcm_recv_cb() is valid so we can
2164 		 * safely assert for success of ibmf_setup_recv_cb()
2165 		 *
2166 		 * Depending on the "state" of the HCA,
2167 		 * CM may drop incoming packets
2168 		 */
2169 		status = ibmf_setup_async_cb(
2170 		    hcap->hca_port_info[port_index].port_ibmf_hdl,
2171 		    IBMF_QP_HANDLE_DEFAULT, ibcm_recv_cb,
2172 		    &(hcap->hca_port_info[port_index].port_qp1), 0);
2173 		ASSERT(status == IBMF_SUCCESS);
2174 
2175 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_init_port: "
2176 		    "IBMF hdl[%x] = 0x%p", port_index,
2177 		    hcap->hca_port_info[port_index].port_ibmf_hdl);
2178 
2179 		/* Attempt to get the saa_handle for this port */
2180 		ibcm_init_saa_handle(hcap, port_index + 1);
2181 	}
2182 
2183 	return (IBT_SUCCESS);
2184 }
2185 
2186 /*
2187  * useful, to re attempt to initialize port ibma handles from elsewhere in
2188  * cm code
2189  */
2190 ibt_status_t
ibcm_hca_reinit_port(ibcm_hca_info_t * hcap,uint8_t port_index)2191 ibcm_hca_reinit_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2192 {
2193 	ibt_status_t	status;
2194 
2195 	IBTF_DPRINTF_L5(cmlog, "ibcm_hca_reinit_port: hcap 0x%p port_num %d",
2196 	    hcap, port_index + 1);
2197 
2198 	mutex_enter(&ibcm_global_hca_lock);
2199 	status = ibcm_hca_init_port(hcap, port_index);
2200 	mutex_exit(&ibcm_global_hca_lock);
2201 	return (status);
2202 }
2203 
2204 
2205 /*
2206  * ibcm_hca_fini_port():
2207  * 	- Deregister port with IBMA
2208  *
2209  * Arguments:
2210  *	hcap		- HCA's guid
2211  *	port_index	- port number minus 1
2212  *
2213  * Return values:
2214  *	IBCM_SUCCESS - success
2215  */
2216 static ibcm_status_t
ibcm_hca_fini_port(ibcm_hca_info_t * hcap,uint8_t port_index)2217 ibcm_hca_fini_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2218 {
2219 	int			ibmf_status;
2220 	ibcm_status_t		ibcm_status;
2221 
2222 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: hcap = 0x%p port_num %d ",
2223 	    hcap, port_index + 1);
2224 
2225 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
2226 
2227 	if (hcap->hca_port_info[port_index].port_ibmf_saa_hdl != NULL) {
2228 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
2229 		    "ibmf_sa_session_close IBMF SAA hdl %p",
2230 		    hcap->hca_port_info[port_index].port_ibmf_saa_hdl);
2231 
2232 		ibmf_status = ibmf_sa_session_close(
2233 		    &hcap->hca_port_info[port_index].port_ibmf_saa_hdl, 0);
2234 		if (ibmf_status != IBMF_SUCCESS) {
2235 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2236 			    "ibmf_sa_session_close of port %d returned %x",
2237 			    port_index + 1, ibmf_status);
2238 			return (IBCM_FAILURE);
2239 		}
2240 	}
2241 
2242 	if (hcap->hca_port_info[port_index].port_ibmf_hdl != NULL) {
2243 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
2244 		    "ibmf_unregister IBMF Hdl %p",
2245 		    hcap->hca_port_info[port_index].port_ibmf_hdl);
2246 
2247 		/* clean-up all the ibmf qp's allocated on this port */
2248 		ibcm_status = ibcm_free_allqps(hcap, port_index + 1);
2249 
2250 		if (ibcm_status != IBCM_SUCCESS) {
2251 
2252 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2253 			    "ibcm_free_allqps failed for port_num %d",
2254 			    port_index + 1);
2255 			return (IBCM_FAILURE);
2256 		}
2257 
2258 		/* Tear down the receive callback */
2259 		ibmf_status = ibmf_tear_down_async_cb(
2260 		    hcap->hca_port_info[port_index].port_ibmf_hdl,
2261 		    IBMF_QP_HANDLE_DEFAULT, 0);
2262 
2263 		if (ibmf_status != IBMF_SUCCESS) {
2264 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2265 			    "ibmf_tear_down_async_cb failed %d port_num %d",
2266 			    ibmf_status, port_index + 1);
2267 			return (IBCM_FAILURE);
2268 		}
2269 
2270 		/* Now, unregister with IBMF */
2271 		ibmf_status = ibmf_unregister(
2272 		    &hcap->hca_port_info[port_index].port_ibmf_hdl, 0);
2273 		IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: "
2274 		    "ibmf_unregister of port_num %x returned %x",
2275 		    port_index + 1, ibmf_status);
2276 
2277 		if (ibmf_status == IBMF_SUCCESS)
2278 			hcap->hca_port_info[port_index].port_ibmf_hdl = NULL;
2279 		else {
2280 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2281 			    "ibmf_unregister failed %d port_num %d",
2282 			    ibmf_status, port_index + 1);
2283 			return (IBCM_FAILURE);
2284 		}
2285 	}
2286 	return (IBCM_SUCCESS);
2287 }
2288 
2289 /*
2290  * ibcm_comm_est_handler():
2291  *	Check if the given channel is in ESTABLISHED state or not
2292  *
2293  * Arguments:
2294  *	eventp	- A pointer to an ibt_async_event_t struct
2295  *
2296  * Return values: NONE
2297  */
2298 static void
ibcm_comm_est_handler(ibt_async_event_t * eventp)2299 ibcm_comm_est_handler(ibt_async_event_t *eventp)
2300 {
2301 	ibcm_state_data_t	*statep;
2302 
2303 	IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler:");
2304 
2305 	/* Both QP and EEC handles can't be NULL */
2306 	if (eventp->ev_chan_hdl == NULL) {
2307 		IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2308 		    "both QP and EEC handles are NULL");
2309 		return;
2310 	}
2311 
2312 	/* get the "statep" from qp/eec handles */
2313 	IBCM_GET_CHAN_PRIVATE(eventp->ev_chan_hdl, statep);
2314 	if (statep == NULL) {
2315 		IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: statep is NULL");
2316 		return;
2317 	}
2318 
2319 	mutex_enter(&statep->state_mutex);
2320 
2321 	IBCM_RELEASE_CHAN_PRIVATE(eventp->ev_chan_hdl);
2322 
2323 	IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: statep = %p", statep);
2324 
2325 	IBCM_REF_CNT_INCR(statep);
2326 
2327 	if ((statep->state == IBCM_STATE_REP_SENT) ||
2328 	    (statep->state == IBCM_STATE_MRA_REP_RCVD)) {
2329 		timeout_id_t	timer_val = statep->timerid;
2330 
2331 		statep->state = IBCM_STATE_TRANSIENT_ESTABLISHED;
2332 
2333 		if (timer_val) {
2334 			statep->timerid = 0;
2335 			mutex_exit(&statep->state_mutex);
2336 			(void) untimeout(timer_val);
2337 		} else
2338 			mutex_exit(&statep->state_mutex);
2339 
2340 		/* CM doesn't have RTU message here */
2341 		ibcm_cep_state_rtu(statep, NULL);
2342 
2343 	} else {
2344 		if (statep->state == IBCM_STATE_ESTABLISHED ||
2345 		    statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) {
2346 			IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: "
2347 			    "Channel already in ESTABLISHED state");
2348 		} else {
2349 			/* An unexpected behavior from remote */
2350 			IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2351 			    "Unexpected in state = %d", statep->state);
2352 		}
2353 		mutex_exit(&statep->state_mutex);
2354 
2355 		ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_COMEST);
2356 	}
2357 
2358 	mutex_enter(&statep->state_mutex);
2359 	IBCM_REF_CNT_DECR(statep);
2360 	mutex_exit(&statep->state_mutex);
2361 }
2362 
2363 
2364 /*
2365  * ibcm_async_handler():
2366  *	CM's Async Handler
2367  *	(Handles ATTACH, DETACH, COM_EST events)
2368  *
2369  * Arguments:
2370  *	eventp	- A pointer to an ibt_async_event_t struct
2371  *
2372  * Return values: None
2373  *
2374  * NOTE : CM assumes that all HCA DR events are delivered sequentially
2375  * i.e., until ibcm_async_handler  completes for a given HCA DR, framework
2376  * shall not invoke ibcm_async_handler with another DR event for the same
2377  * HCA
2378  */
2379 /* ARGSUSED */
2380 void
ibcm_async_handler(void * clnt_hdl,ibt_hca_hdl_t hca_hdl,ibt_async_code_t code,ibt_async_event_t * eventp)2381 ibcm_async_handler(void *clnt_hdl, ibt_hca_hdl_t hca_hdl,
2382     ibt_async_code_t code, ibt_async_event_t *eventp)
2383 {
2384 	ibcm_hca_info_t		*hcap;
2385 	ibcm_port_up_t		*pup;
2386 
2387 	IBTF_DPRINTF_L3(cmlog, "ibcm_async_handler: "
2388 	    "clnt_hdl = %p, code = 0x%x, eventp = 0x%p",
2389 	    clnt_hdl, code, eventp);
2390 
2391 	mutex_enter(&ibcm_global_hca_lock);
2392 
2393 	/* If fini is going to complete successfully, then return */
2394 	if (ibcm_finit_state != IBCM_FINIT_IDLE) {
2395 
2396 		/*
2397 		 * This finit state implies one of the following:
2398 		 * Init either didn't start or didn't complete OR
2399 		 * Fini is about to return SUCCESS and release the global lock.
2400 		 * In all these cases, it is safe to ignore the async.
2401 		 */
2402 
2403 		IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler: ignoring event %x, "
2404 		    "as either init didn't complete or fini about to succeed",
2405 		    code);
2406 		mutex_exit(&ibcm_global_hca_lock);
2407 		return;
2408 	}
2409 
2410 	switch (code) {
2411 	case IBT_PORT_CHANGE_EVENT:
2412 		if ((eventp->ev_port_flags & IBT_PORT_CHANGE_SM_LID) == 0)
2413 			break;
2414 	/* FALLTHROUGH */
2415 	case IBT_CLNT_REREG_EVENT:
2416 	case IBT_EVENT_PORT_UP:
2417 		mutex_exit(&ibcm_global_hca_lock);
2418 		pup = kmem_alloc(sizeof (ibcm_port_up_t), KM_SLEEP);
2419 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pup))
2420 		pup->pup_hca_guid = eventp->ev_hca_guid;
2421 		pup->pup_port = eventp->ev_port;
2422 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pup))
2423 		(void) taskq_dispatch(ibcm_taskq,
2424 		    ibcm_service_record_rewrite_task, pup, TQ_SLEEP);
2425 		ibcm_path_cache_purge();
2426 		return;
2427 
2428 	case IBT_HCA_ATTACH_EVENT:
2429 
2430 		/* eventp->ev_hcaguid is the HCA GUID of interest */
2431 		ibcm_hca_attach(eventp->ev_hca_guid);
2432 		break;
2433 
2434 	case IBT_HCA_DETACH_EVENT:
2435 
2436 		/* eventp->ev_hca_guid is the HCA GUID of interest */
2437 		if ((hcap = ibcm_find_hcap_entry(eventp->ev_hca_guid)) ==
2438 		    NULL) {
2439 			IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler:"
2440 			    " hca %llX doesn't exist", eventp->ev_hca_guid);
2441 			break;
2442 		}
2443 
2444 		(void) ibcm_hca_detach(hcap);
2445 		break;
2446 
2447 	case IBT_EVENT_COM_EST_QP:
2448 		/* eventp->ev_qp_hdl is the ibt_qp_hdl_t of interest */
2449 	case IBT_EVENT_COM_EST_EEC:
2450 		/* eventp->ev_eec_hdl is the ibt_eec_hdl_t of interest */
2451 		ibcm_comm_est_handler(eventp);
2452 		break;
2453 	default:
2454 		break;
2455 	}
2456 
2457 	/* Unblock, any blocked fini/init operations */
2458 	mutex_exit(&ibcm_global_hca_lock);
2459 }
2460