/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * ibcm_impl.c * * contains internal functions of IB CM module. * * TBD: * 1. HCA CATASTROPHIC/RECOVERED not handled yet */ #include #include /* function prototypes */ static ibcm_status_t ibcm_init(void); static ibcm_status_t ibcm_fini(void); /* Routines to initialize and destroy CM global locks and CVs */ static void ibcm_init_locks(void); static void ibcm_fini_locks(void); /* Routines that initialize/teardown CM's global hca structures */ static void ibcm_init_hcas(); static ibcm_status_t ibcm_fini_hcas(); static void ibcm_init_classportinfo(); static void ibcm_stop_timeout_thread(); /* Routines that handle HCA attach/detach asyncs */ static void ibcm_hca_attach(ib_guid_t); static ibcm_status_t ibcm_hca_detach(ibcm_hca_info_t *); /* Routines that initialize the HCA's port related fields */ static ibt_status_t ibcm_hca_init_port(ibcm_hca_info_t *hcap, uint8_t port_index); static ibcm_status_t ibcm_hca_fini_port(ibcm_hca_info_t *hcap, uint8_t port_index); static void ibcm_rc_flow_control_init(void); static void ibcm_rc_flow_control_fini(void); /* * Routines that check if hca's avl trees and sidr lists are free of any * active client resources ie., RC or UD state structures in certain states */ static ibcm_status_t ibcm_check_avl_clean(ibcm_hca_info_t *hcap); static ibcm_status_t ibcm_check_sidr_clean(ibcm_hca_info_t *hcap); /* Add a new hca structure to CM's global hca list */ static ibcm_hca_info_t *ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports); static void ibcm_comm_est_handler(ibt_async_event_t *); void ibcm_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t, ibt_async_event_t *); /* Global variables */ char cmlog[] = "ibcm"; /* for debug log messages */ ibt_clnt_hdl_t ibcm_ibt_handle; /* IBT handle */ kmutex_t ibcm_svc_info_lock; /* list lock */ kcondvar_t ibcm_svc_info_cv; /* cv for deregister */ kmutex_t ibcm_recv_mutex; avl_tree_t ibcm_svc_avl_tree; taskq_t *ibcm_taskq = NULL; int taskq_dispatch_fail_cnt; kmutex_t ibcm_mcglist_lock; /* MCG list lock */ kmutex_t ibcm_trace_mutex; /* Trace mutex */ kmutex_t ibcm_trace_print_mutex; /* Trace print mutex */ int ibcm_conn_max_trcnt = IBCM_MAX_CONN_TRCNT; int ibcm_enable_trace = 2; /* Trace level 4 by default */ int ibcm_dtrace = 0; /* conditionally enable more dtrace */ _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_info_s::{svc_bind_list svc_ref_cnt svc_to_delete})) _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_bind_s::{sbind_link})) _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_mutex, ibcm_conn_trace_s)) _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_conn_trace_s)) _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_print_mutex, ibcm_debug_buf)) _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_debug_buf)) /* * Initial state is INIT. All hca dr's return success immediately in this * state, without adding or deleting any hca's to CM. */ ibcm_finit_state_t ibcm_finit_state = IBCM_FINIT_INIT; /* mutex and cv to manage hca's reference and resource count(s) */ kmutex_t ibcm_global_hca_lock; kcondvar_t ibcm_global_hca_cv; /* mutex and cv to sa session open */ kmutex_t ibcm_sa_open_lock; kcondvar_t ibcm_sa_open_cv; int ibcm_sa_timeout_delay = 1; /* in ticks */ _NOTE(MUTEX_PROTECTS_DATA(ibcm_sa_open_lock, ibcm_port_info_s::{port_ibmf_saa_hdl port_saa_open_in_progress})) _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_port_info_s::{port_ibmf_saa_hdl})) /* serialize sm notice callbacks */ kmutex_t ibcm_sm_notice_serialize_lock; _NOTE(LOCK_ORDER(ibcm_sm_notice_serialize_lock ibcm_global_hca_lock)) _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock, ibcm_hca_info_s::{hca_state hca_svc_cnt hca_acc_cnt hca_res_cnt hca_next})) _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock, ibcm_port_info_s::{port_ibmf_hdl})) _NOTE(MUTEX_PROTECTS_DATA(ibcm_sm_notice_serialize_lock, ibcm_port_info_s::{port_event_status})) _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_hca_info_s::{hca_state})) _NOTE(DATA_READABLE_WITHOUT_LOCK( ibcm_hca_info_s::{hca_port_info.port_ibmf_hdl})) /* mutex for CM's qp list management */ kmutex_t ibcm_qp_list_lock; _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_port_info_s::{port_qplist})) _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s)) _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s)) kcondvar_t ibcm_timeout_list_cv; kcondvar_t ibcm_timeout_thread_done_cv; kt_did_t ibcm_timeout_thread_did; ibcm_state_data_t *ibcm_timeout_list_hdr, *ibcm_timeout_list_tail; ibcm_ud_state_data_t *ibcm_ud_timeout_list_hdr, *ibcm_ud_timeout_list_tail; kmutex_t ibcm_timeout_list_lock; uint8_t ibcm_timeout_list_flags = 0; pri_t ibcm_timeout_thread_pri = MINCLSYSPRI; _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_state_data_s::timeout_next)) _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_ud_state_data_s::ud_timeout_next)) /* * Flow control logic for open_rc_channel uses the following. */ struct ibcm_open_s { kmutex_t mutex; kcondvar_t cv; uint8_t task_running; uint_t queued; uint_t exit_deferred; uint_t in_progress; uint_t in_progress_max; uint_t sends; uint_t sends_max; uint_t sends_lowat; uint_t sends_hiwat; ibcm_state_data_t *tail; ibcm_state_data_t head; } ibcm_open; /* * Flow control logic for SA access and close_rc_channel calls follows. */ int ibcm_close_simul_max = 12; int ibcm_lapr_simul_max = 12; int ibcm_saa_simul_max = 8; typedef struct ibcm_flow1_s { struct ibcm_flow1_s *link; kcondvar_t cv; uint8_t waiters; /* 1 to IBCM_FLOW_SIMUL_MAX */ } ibcm_flow1_t; typedef struct ibcm_flow_s { ibcm_flow1_t *list; uint_t simul; /* #requests currently outstanding */ uint_t simul_max; uint_t waiters_per_chunk; uint_t lowat; uint_t lowat_default; /* statistics */ uint_t total; } ibcm_flow_t; ibcm_flow_t ibcm_saa_flow; ibcm_flow_t ibcm_close_flow; ibcm_flow_t ibcm_lapr_flow; /* NONBLOCKING close requests are queued */ struct ibcm_close_s { kmutex_t mutex; ibcm_state_data_t *tail; ibcm_state_data_t head; } ibcm_close; static ibt_clnt_modinfo_t ibcm_ibt_modinfo = { /* Client's modinfop */ IBTI_V_CURR, IBT_CM, ibcm_async_handler, NULL, "IBCM" }; /* IBCM's list of HCAs registered with it */ static ibcm_hca_info_t *ibcm_hca_listp = NULL; /* CM's HCA list */ /* Array of CM state call table functions */ ibcm_state_handler_t ibcm_sm_funcs_tbl[] = { ibcm_process_req_msg, ibcm_process_mra_msg, ibcm_process_rej_msg, ibcm_process_rep_msg, ibcm_process_rtu_msg, ibcm_process_dreq_msg, ibcm_process_drep_msg, ibcm_process_sidr_req_msg, ibcm_process_sidr_rep_msg, ibcm_process_lap_msg, ibcm_process_apr_msg }; /* the following globals are CM tunables */ ibt_rnr_nak_time_t ibcm_default_rnr_nak_time = IBT_RNR_NAK_655ms; uint8_t ibcm_max_retries = IBCM_MAX_RETRIES; clock_t ibcm_local_processing_time = IBCM_LOCAL_RESPONSE_TIME; clock_t ibcm_remote_response_time = IBCM_REMOTE_RESPONSE_TIME; ib_time_t ibcm_max_sidr_rep_proctime = IBCM_MAX_SIDR_PROCESS_TIME; ib_time_t ibcm_max_sidr_pktlife_time = IBCM_MAX_SIDR_PKT_LIFE_TIME; ib_time_t ibcm_max_sidr_rep_store_time = 18; uint32_t ibcm_wait_for_acc_cnt_timeout = 2000000; /* 2 sec */ ib_time_t ibcm_max_ib_pkt_lt = IBCM_MAX_IB_PKT_LT; ib_time_t ibcm_max_ib_mad_pkt_lt = IBCM_MAX_IB_MAD_PKT_LT; /* * This delay accounts for time involved in various activities as follows : * * IBMF delays for posting the MADs in non-blocking mode * IBMF delays for receiving the MADs and delivering to CM * CM delays in processing the MADs before invoking client handlers, * Any other delays associated with HCA driver in processing the MADs and * other subsystems that CM may invoke (ex : SA, HCA driver) */ uint32_t ibcm_sw_delay = 1000; /* 1000us / 1ms */ uint32_t ibcm_max_sa_retries = IBCM_MAX_SA_RETRIES + 1; /* approx boot time */ uint32_t ibcm_adj_btime = 4; /* 4 seconds */ /* * The information in ibcm_clpinfo is kept in wireformat and is setup at * init time, and used read-only after that */ ibcm_classportinfo_msg_t ibcm_clpinfo; char *event_str[] = { "NEVER SEE THIS ", "SESSION_ID ", "CHAN_HDL ", "LOCAL_COMID/HCA/PORT ", "LOCAL_QPN ", "REMOTE_COMID/HCA ", "REMOTE_QPN ", "BASE_TIME ", "INCOMING_REQ ", "INCOMING_REP ", "INCOMING_RTU ", "INCOMING_COMEST ", "INCOMING_MRA ", "INCOMING_REJ ", "INCOMING_LAP ", "INCOMING_APR ", "INCOMING_DREQ ", "INCOMING_DREP ", "OUTGOING_REQ ", "OUTGOING_REP ", "OUTGOING_RTU ", "OUTGOING_LAP ", "OUTGOING_APR ", "OUTGOING_MRA ", "OUTGOING_REJ ", "OUTGOING_DREQ ", "OUTGOING_DREP ", "REQ_POST_COMPLETE ", "REP_POST_COMPLETE ", "RTU_POST_COMPLETE ", "MRA_POST_COMPLETE ", "REJ_POST_COMPLETE ", "LAP_POST_COMPLETE ", "APR_POST_COMPLETE ", "DREQ_POST_COMPLETE ", "DREP_POST_COMPLETE ", "TIMEOUT_REP ", "CALLED_REQ_RCVD_EVENT ", "RET_REQ_RCVD_EVENT ", "CALLED_REP_RCVD_EVENT ", "RET_REP_RCVD_EVENT ", "CALLED_CONN_EST_EVENT ", "RET_CONN_EST_EVENT ", "CALLED_CONN_FAIL_EVENT ", "RET_CONN_FAIL_EVENT ", "CALLED_CONN_CLOSE_EVENT ", "RET_CONN_CLOSE_EVENT ", "INIT_INIT ", "INIT_INIT_FAIL ", "INIT_RTR ", "INIT_RTR_FAIL ", "RTR_RTS ", "RTR_RTS_FAIL ", "RTS_RTS ", "RTS_RTS_FAIL ", "TO_ERROR ", "ERROR_FAIL ", "SET_ALT ", "SET_ALT_FAIL ", "STALE_DETECT ", "OUTGOING_REQ_RETRY ", "OUTGOING_REP_RETRY ", "OUTGOING_LAP_RETRY ", "OUTGOING_MRA_RETRY ", "OUTGOING_DREQ_RETRY ", "NEVER SEE THIS " }; char ibcm_debug_buf[IBCM_DEBUG_BUF_SIZE]; _NOTE(SCHEME_PROTECTS_DATA("used in a localized function consistently", ibcm_debug_buf)) _NOTE(READ_ONLY_DATA(ibcm_taskq)) _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_flags)) _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_hdr)) _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_ud_timeout_list_hdr)) #ifdef DEBUG int ibcm_test_mode = 0; /* set to 1, if running tests */ #endif /* Module Driver Info */ static struct modlmisc ibcm_modlmisc = { &mod_miscops, "IB Communication Manager" }; /* Module Linkage */ static struct modlinkage ibcm_modlinkage = { MODREV_1, &ibcm_modlmisc, NULL }; int _init(void) { int rval; ibcm_status_t status; status = ibcm_init(); if (status != IBCM_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "_init: ibcm failed %d", status); return (EINVAL); } rval = mod_install(&ibcm_modlinkage); if (rval != 0) { IBTF_DPRINTF_L2(cmlog, "_init: ibcm mod_install failed %d", rval); (void) ibcm_fini(); } IBTF_DPRINTF_L5(cmlog, "_init: ibcm successful"); return (rval); } int _info(struct modinfo *modinfop) { return (mod_info(&ibcm_modlinkage, modinfop)); } int _fini(void) { int status; if (ibcm_fini() != IBCM_SUCCESS) return (EBUSY); if ((status = mod_remove(&ibcm_modlinkage)) != 0) { IBTF_DPRINTF_L2(cmlog, "_fini: ibcm mod_remove failed %d", status); return (status); } IBTF_DPRINTF_L5(cmlog, "_fini: ibcm successful"); return (status); } /* Initializes all global mutex and CV in cm module */ static void ibcm_init_locks() { /* Verify CM MAD sizes */ #ifdef DEBUG if (ibcm_test_mode > 1) { IBTF_DPRINTF_L1(cmlog, "REQ MAD SIZE %d", sizeof (ibcm_req_msg_t)); IBTF_DPRINTF_L1(cmlog, "REP MAD SIZE %d", sizeof (ibcm_rep_msg_t)); IBTF_DPRINTF_L1(cmlog, "RTU MAD SIZE %d", sizeof (ibcm_rtu_msg_t)); IBTF_DPRINTF_L1(cmlog, "MRA MAD SIZE %d", sizeof (ibcm_mra_msg_t)); IBTF_DPRINTF_L1(cmlog, "REJ MAD SIZE %d", sizeof (ibcm_rej_msg_t)); IBTF_DPRINTF_L1(cmlog, "LAP MAD SIZE %d", sizeof (ibcm_lap_msg_t)); IBTF_DPRINTF_L1(cmlog, "APR MAD SIZE %d", sizeof (ibcm_apr_msg_t)); IBTF_DPRINTF_L1(cmlog, "DREQ MAD SIZE %d", sizeof (ibcm_dreq_msg_t)); IBTF_DPRINTF_L1(cmlog, "DREP MAD SIZE %d", sizeof (ibcm_drep_msg_t)); IBTF_DPRINTF_L1(cmlog, "SIDR REQ MAD SIZE %d", sizeof (ibcm_sidr_req_msg_t)); IBTF_DPRINTF_L1(cmlog, "SIDR REP MAD SIZE %d", sizeof (ibcm_sidr_rep_msg_t)); } #endif /* Create all global locks within cm module */ mutex_init(&ibcm_svc_info_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ibcm_mcglist_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ibcm_timeout_list_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ibcm_global_hca_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ibcm_sa_open_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ibcm_recv_mutex, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ibcm_sm_notice_serialize_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ibcm_qp_list_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ibcm_trace_mutex, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ibcm_trace_print_mutex, NULL, MUTEX_DEFAULT, NULL); cv_init(&ibcm_svc_info_cv, NULL, CV_DRIVER, NULL); cv_init(&ibcm_timeout_list_cv, NULL, CV_DRIVER, NULL); cv_init(&ibcm_timeout_thread_done_cv, NULL, CV_DRIVER, NULL); cv_init(&ibcm_global_hca_cv, NULL, CV_DRIVER, NULL); cv_init(&ibcm_sa_open_cv, NULL, CV_DRIVER, NULL); avl_create(&ibcm_svc_avl_tree, ibcm_svc_compare, sizeof (ibcm_svc_info_t), offsetof(struct ibcm_svc_info_s, svc_link)); IBTF_DPRINTF_L5(cmlog, "ibcm_init_locks: done"); } /* Destroys all global mutex and CV in cm module */ static void ibcm_fini_locks() { /* Destroy all global locks within cm module */ mutex_destroy(&ibcm_svc_info_lock); mutex_destroy(&ibcm_mcglist_lock); mutex_destroy(&ibcm_timeout_list_lock); mutex_destroy(&ibcm_global_hca_lock); mutex_destroy(&ibcm_sa_open_lock); mutex_destroy(&ibcm_recv_mutex); mutex_destroy(&ibcm_sm_notice_serialize_lock); mutex_destroy(&ibcm_qp_list_lock); mutex_destroy(&ibcm_trace_mutex); mutex_destroy(&ibcm_trace_print_mutex); cv_destroy(&ibcm_svc_info_cv); cv_destroy(&ibcm_timeout_list_cv); cv_destroy(&ibcm_timeout_thread_done_cv); cv_destroy(&ibcm_global_hca_cv); cv_destroy(&ibcm_sa_open_cv); avl_destroy(&ibcm_svc_avl_tree); IBTF_DPRINTF_L5(cmlog, "ibcm_fini_locks: done"); } /* Initialize CM's classport info */ static void ibcm_init_classportinfo() { _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_clpinfo)); ibcm_clpinfo.BaseVersion = IBCM_MAD_BASE_VERSION; ibcm_clpinfo.ClassVersion = IBCM_MAD_CLASS_VERSION; /* For now, CM supports same capabilities at all ports */ ibcm_clpinfo.CapabilityMask = h2b16(IBCM_CPINFO_CAP_RC | IBCM_CPINFO_CAP_SIDR); /* Bits 0-7 are all 0 for Communication Mgmt Class */ /* For now, CM has the same respvalue at all ports */ ibcm_clpinfo.RespTimeValue_plus = h2b32(ibt_usec2ib(ibcm_local_processing_time) & 0x1f); /* For now, redirect fields are set to 0 */ /* Trap fields are not applicable to CM, hence set to 0 */ _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_clpinfo)); IBTF_DPRINTF_L5(cmlog, "ibcm_init_classportinfo: done"); } /* * ibcm_init(): * - call ibt_attach() * - create AVL trees * - Attach HCA handlers that are already present before * CM got loaded. * * Arguments: NONE * * Return values: * IBCM_SUCCESS - success */ static ibcm_status_t ibcm_init(void) { ibt_status_t status; kthread_t *t; IBTF_DPRINTF_L3(cmlog, "ibcm_init:"); ibcm_init_classportinfo(); if (ibcm_init_ids() != IBCM_SUCCESS) { IBTF_DPRINTF_L1(cmlog, "ibcm_init: " "fatal error: vmem_create() failed"); return (IBCM_FAILURE); } ibcm_init_locks(); if (ibcm_ar_init() != IBCM_SUCCESS) { IBTF_DPRINTF_L1(cmlog, "ibcm_init: " "fatal error: ibcm_ar_init() failed"); ibcm_fini_ids(); ibcm_fini_locks(); return (IBCM_FAILURE); } ibcm_rc_flow_control_init(); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_taskq)) ibcm_taskq = system_taskq; _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_taskq)) _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags)) _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did)) /* Start the timeout list processing thread */ ibcm_timeout_list_flags = 0; t = thread_create(NULL, 0, ibcm_process_tlist, 0, 0, &p0, TS_RUN, ibcm_timeout_thread_pri); ibcm_timeout_thread_did = t->t_did; _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags)) _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did)) /* * NOTE : if ibt_attach is done after ibcm_init_hcas, then some * HCA DR events may be lost. CM could call re-init hca list * again, but it is more complicated. Some HCA's DR's lost may * be HCA detach, which makes hca list re-syncing and locking more * complex */ status = ibt_attach(&ibcm_ibt_modinfo, NULL, NULL, &ibcm_ibt_handle); if (status != IBT_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibcm_init(): ibt_attach failed %d", status); (void) ibcm_ar_fini(); ibcm_stop_timeout_thread(); ibcm_fini_ids(); ibcm_fini_locks(); ibcm_rc_flow_control_fini(); return (IBCM_FAILURE); } /* Block all HCA attach/detach asyncs */ mutex_enter(&ibcm_global_hca_lock); ibcm_init_hcas(); ibcm_finit_state = IBCM_FINIT_IDLE; ibcm_path_cache_init(); /* * This callback will be used by IBTL to get the Node record for a * given LID via the speccified HCA and port. */ ibtl_cm_set_node_info_cb(ibcm_ibtl_node_info); /* Unblock any waiting HCA DR asyncs in CM */ mutex_exit(&ibcm_global_hca_lock); IBTF_DPRINTF_L4(cmlog, "ibcm_init: done"); return (IBCM_SUCCESS); } /* Allocates and initializes the "per hca" global data in CM */ static void ibcm_init_hcas() { uint_t num_hcas = 0; ib_guid_t *guid_array; int i; IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas:"); /* Get the number of HCAs */ num_hcas = ibt_get_hca_list(&guid_array); IBTF_DPRINTF_L4(cmlog, "ibcm_init_hcas: ibt_get_hca_list() " "returned %d hcas", num_hcas); ASSERT(MUTEX_HELD(&ibcm_global_hca_lock)); for (i = 0; i < num_hcas; i++) ibcm_hca_attach(guid_array[i]); if (num_hcas) ibt_free_hca_list(guid_array, num_hcas); IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas: done"); } /* * ibcm_fini(): * - Deregister w/ ibt * - Cleanup IBCM HCA listp * - Destroy mutexes * * Arguments: NONE * * Return values: * IBCM_SUCCESS - success */ static ibcm_status_t ibcm_fini(void) { ibt_status_t status; IBTF_DPRINTF_L3(cmlog, "ibcm_fini:"); /* * CM assumes that the all general clients got rid of all the * established connections and service registrations, completed all * pending SIDR operations before a call to ibcm_fini() */ if (ibcm_ar_fini() != IBCM_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibcm_fini: ibcm_ar_fini failed"); return (IBCM_FAILURE); } /* cleanup the svcinfo list */ mutex_enter(&ibcm_svc_info_lock); if (avl_first(&ibcm_svc_avl_tree) != NULL) { IBTF_DPRINTF_L2(cmlog, "ibcm_fini: " "ibcm_svc_avl_tree is not empty"); mutex_exit(&ibcm_svc_info_lock); return (IBCM_FAILURE); } mutex_exit(&ibcm_svc_info_lock); /* disables any new hca attach/detaches */ mutex_enter(&ibcm_global_hca_lock); ibcm_finit_state = IBCM_FINIT_BUSY; if (ibcm_fini_hcas() != IBCM_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibcm_fini: " "some hca's still have client resources"); /* First, re-initialize the hcas */ ibcm_init_hcas(); /* and then enable the HCA asyncs */ ibcm_finit_state = IBCM_FINIT_IDLE; mutex_exit(&ibcm_global_hca_lock); if (ibcm_ar_init() != IBCM_SUCCESS) { IBTF_DPRINTF_L1(cmlog, "ibcm_fini:ibcm_ar_init failed"); } return (IBCM_FAILURE); } _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr)) _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr)) ASSERT(ibcm_timeout_list_hdr == NULL); ASSERT(ibcm_ud_timeout_list_hdr == NULL); _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr)) _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr)) /* Release any pending asyncs on ibcm_global_hca_lock */ ibcm_finit_state = IBCM_FINIT_SUCCESS; mutex_exit(&ibcm_global_hca_lock); ibcm_stop_timeout_thread(); ibtl_cm_set_node_info_cb(NULL); /* * Detach from IBTL. Waits until all pending asyncs are complete. * Above cv_broadcast wakes up any waiting hca attach/detach asyncs */ status = ibt_detach(ibcm_ibt_handle); /* if detach fails, CM didn't free up some resources, so assert */ if (status != IBT_SUCCESS) IBTF_DPRINTF_L1(cmlog, "ibcm_fini: ibt_detach failed %d", status); ibcm_rc_flow_control_fini(); ibcm_path_cache_fini(); ibcm_fini_ids(); ibcm_fini_locks(); IBTF_DPRINTF_L3(cmlog, "ibcm_fini: done"); return (IBCM_SUCCESS); } /* This routine exit's the ibcm timeout thread */ static void ibcm_stop_timeout_thread() { mutex_enter(&ibcm_timeout_list_lock); /* Stop the timeout list processing thread */ ibcm_timeout_list_flags = ibcm_timeout_list_flags | IBCM_TIMEOUT_THREAD_EXIT; /* Wake up, if the timeout thread is on a cv_wait */ cv_signal(&ibcm_timeout_list_cv); mutex_exit(&ibcm_timeout_list_lock); thread_join(ibcm_timeout_thread_did); IBTF_DPRINTF_L5(cmlog, "ibcm_stop_timeout_thread: done"); } /* Attempts to release all the hca's associated with CM */ static ibcm_status_t ibcm_fini_hcas() { ibcm_hca_info_t *hcap, *next; IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas:"); ASSERT(MUTEX_HELD(&ibcm_global_hca_lock)); hcap = ibcm_hca_listp; while (hcap != NULL) { next = hcap->hca_next; if (ibcm_hca_detach(hcap) != IBCM_SUCCESS) { ibcm_hca_listp = hcap; return (IBCM_FAILURE); } hcap = next; } IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas: SUCCEEDED"); return (IBCM_SUCCESS); } /* * ibcm_hca_attach(): * Called as an asynchronous event to notify CM of an attach of HCA. * Here ibcm_hca_info_t is initialized and all fields are * filled in along with SA Access handles and IBMA handles. * Also called from ibcm_init to initialize ibcm_hca_info_t's for each * hca's * * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE) * hca_guid - HCA's guid * * Return values: NONE */ static void ibcm_hca_attach(ib_guid_t hcaguid) { int i; ibt_status_t status; uint8_t nports = 0; ibcm_hca_info_t *hcap; ibt_hca_attr_t hca_attrs; IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: guid = 0x%llX", hcaguid); ASSERT(MUTEX_HELD(&ibcm_global_hca_lock)); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*hcap)) status = ibt_query_hca_byguid(hcaguid, &hca_attrs); if (status != IBT_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: " "ibt_query_hca_byguid failed = %d", status); return; } nports = hca_attrs.hca_nports; IBTF_DPRINTF_L4(cmlog, "ibcm_hca_attach: num ports = %x", nports); if ((hcap = ibcm_add_hca_entry(hcaguid, nports)) == NULL) return; hcap->hca_guid = hcaguid; /* Set GUID */ hcap->hca_num_ports = nports; /* Set number of ports */ if (ibcm_init_hca_ids(hcap) != IBCM_SUCCESS) { ibcm_delete_hca_entry(hcap); return; } /* Store the static hca attribute data */ hcap->hca_caps = hca_attrs.hca_flags; hcap->hca_vendor_id = hca_attrs.hca_vendor_id; hcap->hca_device_id = hca_attrs.hca_device_id; hcap->hca_ack_delay = hca_attrs.hca_local_ack_delay; hcap->hca_max_rdma_in_qp = hca_attrs.hca_max_rdma_in_qp; hcap->hca_max_rdma_out_qp = hca_attrs.hca_max_rdma_out_qp; /* loop thru nports and initialize IBMF handles */ for (i = 0; i < hcap->hca_num_ports; i++) { status = ibt_get_port_state_byguid(hcaguid, i + 1, NULL, NULL); if (status != IBT_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: " "port_num %d state DOWN", i + 1); } hcap->hca_port_info[i].port_hcap = hcap; hcap->hca_port_info[i].port_num = i+1; if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS) IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: " "ibcm_hca_init_port failed %d port_num %d", status, i+1); } /* create the "active" CM AVL tree */ avl_create(&hcap->hca_active_tree, ibcm_active_node_compare, sizeof (ibcm_state_data_t), offsetof(struct ibcm_state_data_s, avl_active_link)); /* create the "passive" CM AVL tree */ avl_create(&hcap->hca_passive_tree, ibcm_passive_node_compare, sizeof (ibcm_state_data_t), offsetof(struct ibcm_state_data_s, avl_passive_link)); /* create the "passive comid" CM AVL tree */ avl_create(&hcap->hca_passive_comid_tree, ibcm_passive_comid_node_compare, sizeof (ibcm_state_data_t), offsetof(struct ibcm_state_data_s, avl_passive_comid_link)); /* * Mark the state of the HCA to "attach" only at the end * Now CM starts accepting incoming MADs and client API calls */ hcap->hca_state = IBCM_HCA_ACTIVE; _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*hcap)) IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: ATTACH Done"); } /* * ibcm_hca_detach(): * Called as an asynchronous event to notify CM of a detach of HCA. * Here ibcm_hca_info_t is freed up and all fields that * were initialized earlier are cleaned up * * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE) * hca_guid - HCA's guid * * Return values: * IBCM_SUCCESS - able to detach HCA * IBCM_FAILURE - failed to detach HCA */ static ibcm_status_t ibcm_hca_detach(ibcm_hca_info_t *hcap) { int port_index, i; ibcm_status_t status = IBCM_SUCCESS; clock_t absolute_time; IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: hcap = 0x%p guid = 0x%llX", hcap, hcap->hca_guid); ASSERT(MUTEX_HELD(&ibcm_global_hca_lock)); /* * Declare hca is going away to all CM clients. Wait until the * access count becomes zero. */ hcap->hca_state = IBCM_HCA_NOT_ACTIVE; /* wait on response CV */ absolute_time = ddi_get_lbolt() + drv_usectohz(ibcm_wait_for_acc_cnt_timeout); while (hcap->hca_acc_cnt > 0) if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock, absolute_time) == -1) break; if (hcap->hca_acc_cnt != 0) { /* We got a timeout */ IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due" " to timeout on hca_acc_cnt %u, \n Some CM Clients are " "still active, looks like we need to wait some more time " "(ibcm_wait_for_acc_cnt_timeout).", hcap->hca_acc_cnt); hcap->hca_state = IBCM_HCA_ACTIVE; return (IBCM_FAILURE); } /* * First make sure, there are no active users of ibma handles, * and then de-register handles. */ /* make sure that there are no "Service"s registered w/ this HCA. */ if (hcap->hca_svc_cnt != 0) { IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: " "Active services still there %d", hcap->hca_svc_cnt); hcap->hca_state = IBCM_HCA_ACTIVE; return (IBCM_FAILURE); } if (ibcm_check_sidr_clean(hcap) != IBCM_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach:" "There are active SIDR operations"); hcap->hca_state = IBCM_HCA_ACTIVE; return (IBCM_FAILURE); } if (ibcm_check_avl_clean(hcap) != IBCM_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: " "There are active RC connections"); hcap->hca_state = IBCM_HCA_ACTIVE; return (IBCM_FAILURE); } /* * Now, wait until all rc and sidr stateps go away * All these stateps must be short lived ones, waiting to be cleaned * up after some timeout value, based on the current state. */ IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach:hca_guid = 0x%llX res_cnt = %d", hcap->hca_guid, hcap->hca_res_cnt); while (hcap->hca_res_cnt > 0) cv_wait(&ibcm_global_hca_cv, &ibcm_global_hca_lock); /* Re-assert the while loop step above */ ASSERT(hcap->hca_sidr_list == NULL); avl_destroy(&hcap->hca_active_tree); avl_destroy(&hcap->hca_passive_tree); avl_destroy(&hcap->hca_passive_comid_tree); /* * Unregister all ports from IBMA * If there is a failure, re-initialize any free'd ibma handles. This * is required to receive the incoming mads */ status = IBCM_SUCCESS; for (port_index = 0; port_index < hcap->hca_num_ports; port_index++) { if ((status = ibcm_hca_fini_port(hcap, port_index)) != IBCM_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: " "Failed to free IBMA Handle for port_num %d", port_index + 1); break; } } /* If detach fails, re-initialize ibma handles for incoming mads */ if (status != IBCM_SUCCESS) { for (i = 0; i < port_index; i++) { if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS) IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: " "Failed to re-allocate IBMA Handles for" " port_num %d", port_index + 1); } hcap->hca_state = IBCM_HCA_ACTIVE; return (IBCM_FAILURE); } ibcm_fini_hca_ids(hcap); ibcm_delete_hca_entry(hcap); IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: DETACH succeeded"); return (IBCM_SUCCESS); } /* Checks, if there are any active sidr state entries in the specified hca */ static ibcm_status_t ibcm_check_sidr_clean(ibcm_hca_info_t *hcap) { ibcm_ud_state_data_t *usp; uint32_t transient_cnt = 0; IBTF_DPRINTF_L5(cmlog, "ibcm_check_sidr_clean:"); rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER); usp = hcap->hca_sidr_list; /* Point to the list */ while (usp != NULL) { mutex_enter(&usp->ud_state_mutex); if ((usp->ud_state != IBCM_STATE_SIDR_REP_SENT) && (usp->ud_state != IBCM_STATE_TIMED_OUT) && (usp->ud_state != IBCM_STATE_DELETE)) { IBTF_DPRINTF_L3(cmlog, "ibcm_check_sidr_clean:" "usp = %p not in transient state = %d", usp, usp->ud_state); mutex_exit(&usp->ud_state_mutex); rw_exit(&hcap->hca_sidr_list_lock); return (IBCM_FAILURE); } else { mutex_exit(&usp->ud_state_mutex); ++transient_cnt; } usp = usp->ud_nextp; } rw_exit(&hcap->hca_sidr_list_lock); IBTF_DPRINTF_L4(cmlog, "ibcm_check_sidr_clean: transient_cnt %d", transient_cnt); return (IBCM_SUCCESS); } /* Checks, if there are any active rc state entries, in the specified hca */ static ibcm_status_t ibcm_check_avl_clean(ibcm_hca_info_t *hcap) { ibcm_state_data_t *sp; avl_tree_t *avl_tree; uint32_t transient_cnt = 0; IBTF_DPRINTF_L5(cmlog, "ibcm_check_avl_clean:"); /* * Both the trees ie., active and passive must reference to all * statep's, so let's use one */ avl_tree = &hcap->hca_active_tree; rw_enter(&hcap->hca_state_rwlock, RW_WRITER); for (sp = avl_first(avl_tree); sp != NULL; sp = avl_walk(avl_tree, sp, AVL_AFTER)) { mutex_enter(&sp->state_mutex); if ((sp->state != IBCM_STATE_TIMEWAIT) && (sp->state != IBCM_STATE_REJ_SENT) && (sp->state != IBCM_STATE_DELETE)) { IBTF_DPRINTF_L3(cmlog, "ibcm_check_avl_clean: " "sp = %p not in transient state = %d", sp, sp->state); mutex_exit(&sp->state_mutex); rw_exit(&hcap->hca_state_rwlock); return (IBCM_FAILURE); } else { mutex_exit(&sp->state_mutex); ++transient_cnt; } } rw_exit(&hcap->hca_state_rwlock); IBTF_DPRINTF_L4(cmlog, "ibcm_check_avl_clean: transient_cnt %d", transient_cnt); return (IBCM_SUCCESS); } /* Adds a new entry into CM's global hca list, if hca_guid is not there yet */ static ibcm_hca_info_t * ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports) { ibcm_hca_info_t *hcap; IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: guid = 0x%llX", hcaguid); ASSERT(MUTEX_HELD(&ibcm_global_hca_lock)); /* * Check if this hca_guid already in the list * If yes, then ignore this and return NULL */ hcap = ibcm_hca_listp; /* search for this HCA */ while (hcap != NULL) { if (hcap->hca_guid == hcaguid) { /* already exists */ IBTF_DPRINTF_L2(cmlog, "ibcm_add_hca_entry: " "hcap %p guid 0x%llX, entry already exists !!", hcap, hcap->hca_guid); return (NULL); } hcap = hcap->hca_next; } /* Allocate storage for the new HCA entry found */ hcap = kmem_zalloc(sizeof (ibcm_hca_info_t) + (nports - 1) * sizeof (ibcm_port_info_t), KM_SLEEP); /* initialize RW lock */ rw_init(&hcap->hca_state_rwlock, NULL, RW_DRIVER, NULL); /* initialize SIDR list lock */ rw_init(&hcap->hca_sidr_list_lock, NULL, RW_DRIVER, NULL); /* Insert "hcap" into the global HCA list maintained by CM */ hcap->hca_next = ibcm_hca_listp; ibcm_hca_listp = hcap; IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: done hcap = 0x%p", hcap); return (hcap); } /* deletes the given ibcm_hca_info_t from CM's global hca list */ void ibcm_delete_hca_entry(ibcm_hca_info_t *hcap) { ibcm_hca_info_t *headp, *prevp = NULL; /* ibcm_hca_global_lock is held */ IBTF_DPRINTF_L5(cmlog, "ibcm_delete_hca_entry: guid = 0x%llX " "hcap = 0x%p", hcap->hca_guid, hcap); ASSERT(MUTEX_HELD(&ibcm_global_hca_lock)); headp = ibcm_hca_listp; while (headp != NULL) { if (headp == hcap) { IBTF_DPRINTF_L3(cmlog, "ibcm_delete_hca_entry: " "deleting hcap %p hcaguid %llX", hcap, hcap->hca_guid); if (prevp) { prevp->hca_next = headp->hca_next; } else { prevp = headp->hca_next; ibcm_hca_listp = prevp; } rw_destroy(&hcap->hca_state_rwlock); rw_destroy(&hcap->hca_sidr_list_lock); kmem_free(hcap, sizeof (ibcm_hca_info_t) + (hcap->hca_num_ports - 1) * sizeof (ibcm_port_info_t)); return; } prevp = headp; headp = headp->hca_next; } } /* * ibcm_find_hca_entry: * Given a HCA's GUID find out ibcm_hca_info_t entry for that HCA * This entry can be then used to access AVL tree/SIDR list etc. * If entry exists and in HCA ATTACH state, then hca's ref cnt is * incremented and entry returned. Else NULL returned. * * All functions that use ibcm_find_hca_entry and get a non-NULL * return values must call ibcm_dec_hca_acc_cnt to decrement the * respective hca ref cnt. There shouldn't be any usage of * ibcm_hca_info_t * returned from ibcm_find_hca_entry, * after decrementing the hca_acc_cnt * * INPUTS: * hca_guid - HCA's guid * * RETURN VALUE: * hcap - if a match is found, else NULL */ ibcm_hca_info_t * ibcm_find_hca_entry(ib_guid_t hca_guid) { ibcm_hca_info_t *hcap; IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: guid = 0x%llX", hca_guid); mutex_enter(&ibcm_global_hca_lock); hcap = ibcm_hca_listp; /* search for this HCA */ while (hcap != NULL) { if (hcap->hca_guid == hca_guid) break; hcap = hcap->hca_next; } /* if no hcap for the hca_guid, return NULL */ if (hcap == NULL) { mutex_exit(&ibcm_global_hca_lock); return (NULL); } /* return hcap, only if it valid to use */ if (hcap->hca_state == IBCM_HCA_ACTIVE) { ++(hcap->hca_acc_cnt); IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: " "found hcap = 0x%p hca_acc_cnt %u", hcap, hcap->hca_acc_cnt); mutex_exit(&ibcm_global_hca_lock); return (hcap); } else { mutex_exit(&ibcm_global_hca_lock); IBTF_DPRINTF_L2(cmlog, "ibcm_find_hca_entry: " "found hcap = 0x%p not in active state", hcap); return (NULL); } } /* * Searches for ibcm_hca_info_t entry based on hca_guid, but doesn't increment * the hca's reference count. This function is used, where the calling context * is attempting to delete hcap itself and hence acc_cnt cannot be incremented * OR assumes that valid hcap must be available in ibcm's global hca list. */ ibcm_hca_info_t * ibcm_find_hcap_entry(ib_guid_t hca_guid) { ibcm_hca_info_t *hcap; IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: guid = 0x%llX", hca_guid); ASSERT(MUTEX_HELD(&ibcm_global_hca_lock)); hcap = ibcm_hca_listp; /* search for this HCA */ while (hcap != NULL) { if (hcap->hca_guid == hca_guid) break; hcap = hcap->hca_next; } if (hcap == NULL) IBTF_DPRINTF_L2(cmlog, "ibcm_find_hcap_entry: No hcap found for" " hca_guid 0x%llX", hca_guid); else IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: hcap found for" " hca_guid 0x%llX", hca_guid); return (hcap); } /* increment the hca's temporary reference count */ ibcm_status_t ibcm_inc_hca_acc_cnt(ibcm_hca_info_t *hcap) { mutex_enter(&ibcm_global_hca_lock); if (hcap->hca_state == IBCM_HCA_ACTIVE) { ++(hcap->hca_acc_cnt); IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_acc_cnt: " "hcap = 0x%p acc_cnt = %d ", hcap, hcap->hca_acc_cnt); mutex_exit(&ibcm_global_hca_lock); return (IBCM_SUCCESS); } else { IBTF_DPRINTF_L2(cmlog, "ibcm_inc_hca_acc_cnt: " "hcap INACTIVE 0x%p acc_cnt = %d ", hcap, hcap->hca_acc_cnt); mutex_exit(&ibcm_global_hca_lock); return (IBCM_FAILURE); } } /* decrement the hca's ref count, and wake up any waiting threads */ void ibcm_dec_hca_acc_cnt(ibcm_hca_info_t *hcap) { mutex_enter(&ibcm_global_hca_lock); ASSERT(hcap->hca_acc_cnt > 0); --(hcap->hca_acc_cnt); IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_acc_cnt: hcap = 0x%p " "acc_cnt = %d", hcap, hcap->hca_acc_cnt); if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) && (hcap->hca_acc_cnt == 0)) { IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_acc_cnt: " "cv_broadcast for hcap = 0x%p", hcap); cv_broadcast(&ibcm_global_hca_cv); } mutex_exit(&ibcm_global_hca_lock); } /* increment the hca's resource count */ void ibcm_inc_hca_res_cnt(ibcm_hca_info_t *hcap) { mutex_enter(&ibcm_global_hca_lock); ++(hcap->hca_res_cnt); IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_res_cnt: hcap = 0x%p " "ref_cnt = %d", hcap, hcap->hca_res_cnt); mutex_exit(&ibcm_global_hca_lock); } /* decrement the hca's resource count, and wake up any waiting threads */ void ibcm_dec_hca_res_cnt(ibcm_hca_info_t *hcap) { mutex_enter(&ibcm_global_hca_lock); ASSERT(hcap->hca_res_cnt > 0); --(hcap->hca_res_cnt); IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_res_cnt: hcap = 0x%p " "ref_cnt = %d", hcap, hcap->hca_res_cnt); if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) && (hcap->hca_res_cnt == 0)) { IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_res_cnt: " "cv_broadcast for hcap = 0x%p", hcap); cv_broadcast(&ibcm_global_hca_cv); } mutex_exit(&ibcm_global_hca_lock); } /* increment the hca's service count */ void ibcm_inc_hca_svc_cnt(ibcm_hca_info_t *hcap) { mutex_enter(&ibcm_global_hca_lock); ++(hcap->hca_svc_cnt); IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_svc_cnt: hcap = 0x%p " "svc_cnt = %d", hcap, hcap->hca_svc_cnt); mutex_exit(&ibcm_global_hca_lock); } /* decrement the hca's service count */ void ibcm_dec_hca_svc_cnt(ibcm_hca_info_t *hcap) { mutex_enter(&ibcm_global_hca_lock); ASSERT(hcap->hca_svc_cnt > 0); --(hcap->hca_svc_cnt); IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_svc_cnt: hcap = 0x%p " "svc_cnt = %d", hcap, hcap->hca_svc_cnt); mutex_exit(&ibcm_global_hca_lock); } /* * The following code manages three classes of requests that CM makes to * the fabric. Those three classes are SA_ACCESS, REQ/REP/RTU, and DREQ/DREP. * The main issue is that the fabric can become very busy, and the CM * protocols rely on responses being made based on a predefined timeout * value. By managing how many simultaneous sessions are allowed, there * is observed extremely high reliability of CM protocol succeeding when * it should. * * SA_ACCESS and DREQ/DREP are managed at the thread level, whereby the * thread blocks until there are less than some number of threads doing * similar requests. * * REQ/REP/RTU requests beyond a given limit are added to a list, * allowing the thread to return immediately to its caller in the * case where the "mode" is IBT_NONBLOCKING. This is the mode used * by uDAPL and seems to be an important feature/behavior. */ static int ibcm_ok_to_start(struct ibcm_open_s *openp) { return (openp->sends < openp->sends_hiwat && openp->in_progress < openp->in_progress_max); } void ibcm_open_done(ibcm_state_data_t *statep) { int run; ibcm_state_data_t **linkp, *tmp; ASSERT(MUTEX_HELD(&statep->state_mutex)); if (statep->open_flow == 1) { statep->open_flow = 0; mutex_enter(&ibcm_open.mutex); if (statep->open_link == NULL) { ibcm_open.in_progress--; run = ibcm_ok_to_start(&ibcm_open); } else { ibcm_open.queued--; linkp = &ibcm_open.head.open_link; while (*linkp != statep) linkp = &((*linkp)->open_link); *linkp = statep->open_link; statep->open_link = NULL; /* * If we remove what tail pointed to, we need * to reassign tail (it is never NULL). * tail points to head for the empty list. */ if (ibcm_open.tail == statep) { tmp = &ibcm_open.head; while (tmp->open_link != &ibcm_open.head) tmp = tmp->open_link; ibcm_open.tail = tmp; } run = 0; } mutex_exit(&ibcm_open.mutex); if (run) ibcm_run_tlist_thread(); } } /* dtrace */ void ibcm_open_wait(hrtime_t delta) { if (delta > 1000000) IBTF_DPRINTF_L2(cmlog, "ibcm_open_wait: flow more %lld", delta); } void ibcm_open_start(ibcm_state_data_t *statep) { ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_REQ); mutex_enter(&statep->state_mutex); ibcm_open_wait(gethrtime() - statep->post_time); mutex_exit(&statep->state_mutex); ibcm_post_rc_mad(statep, statep->stored_msg, ibcm_post_req_complete, statep); mutex_enter(&statep->state_mutex); IBCM_REF_CNT_DECR(statep); mutex_exit(&statep->state_mutex); } void ibcm_open_enqueue(ibcm_state_data_t *statep) { int run; mutex_enter(&statep->state_mutex); statep->post_time = gethrtime(); mutex_exit(&statep->state_mutex); mutex_enter(&ibcm_open.mutex); if (ibcm_open.queued == 0 && ibcm_ok_to_start(&ibcm_open)) { ibcm_open.in_progress++; mutex_exit(&ibcm_open.mutex); ibcm_open_start(statep); } else { ibcm_open.queued++; statep->open_link = &ibcm_open.head; ibcm_open.tail->open_link = statep; ibcm_open.tail = statep; run = ibcm_ok_to_start(&ibcm_open); mutex_exit(&ibcm_open.mutex); if (run) ibcm_run_tlist_thread(); } } ibcm_state_data_t * ibcm_open_dequeue(void) { ibcm_state_data_t *statep; ASSERT(MUTEX_HELD(&ibcm_open.mutex)); ibcm_open.queued--; ibcm_open.in_progress++; statep = ibcm_open.head.open_link; ibcm_open.head.open_link = statep->open_link; statep->open_link = NULL; /* * If we remove what tail pointed to, we need * to reassign tail (it is never NULL). * tail points to head for the empty list. */ if (ibcm_open.tail == statep) ibcm_open.tail = &ibcm_open.head; return (statep); } void ibcm_check_for_opens(void) { ibcm_state_data_t *statep; mutex_enter(&ibcm_open.mutex); while (ibcm_open.queued > 0) { if (ibcm_ok_to_start(&ibcm_open)) { statep = ibcm_open_dequeue(); mutex_exit(&ibcm_open.mutex); ibcm_open_start(statep); mutex_enter(&ibcm_open.mutex); } else { break; } } mutex_exit(&ibcm_open.mutex); } static void ibcm_flow_init(ibcm_flow_t *flow, uint_t simul_max) { flow->list = NULL; flow->simul = 0; flow->waiters_per_chunk = 4; flow->simul_max = simul_max; flow->lowat = simul_max - flow->waiters_per_chunk; flow->lowat_default = flow->lowat; /* stats */ flow->total = 0; } static void ibcm_rc_flow_control_init(void) { mutex_init(&ibcm_open.mutex, NULL, MUTEX_DEFAULT, NULL); mutex_enter(&ibcm_open.mutex); ibcm_flow_init(&ibcm_close_flow, ibcm_close_simul_max); ibcm_flow_init(&ibcm_lapr_flow, ibcm_lapr_simul_max); ibcm_flow_init(&ibcm_saa_flow, ibcm_saa_simul_max); ibcm_open.queued = 0; ibcm_open.exit_deferred = 0; ibcm_open.in_progress = 0; ibcm_open.in_progress_max = 16; ibcm_open.sends = 0; ibcm_open.sends_max = 0; ibcm_open.sends_lowat = 8; ibcm_open.sends_hiwat = 16; ibcm_open.tail = &ibcm_open.head; ibcm_open.head.open_link = NULL; mutex_exit(&ibcm_open.mutex); mutex_init(&ibcm_close.mutex, NULL, MUTEX_DEFAULT, NULL); mutex_enter(&ibcm_close.mutex); ibcm_close.tail = &ibcm_close.head; ibcm_close.head.close_link = NULL; mutex_exit(&ibcm_close.mutex); } static void ibcm_rc_flow_control_fini(void) { mutex_destroy(&ibcm_open.mutex); mutex_destroy(&ibcm_close.mutex); } static ibcm_flow1_t * ibcm_flow_find(ibcm_flow_t *flow) { ibcm_flow1_t *flow1; ibcm_flow1_t *f; f = flow->list; if (f) { /* most likely code path */ while (f->link != NULL) f = f->link; if (f->waiters < flow->waiters_per_chunk) return (f); } /* There was no flow1 list element ready for another waiter */ mutex_exit(&ibcm_open.mutex); flow1 = kmem_alloc(sizeof (*flow1), KM_SLEEP); mutex_enter(&ibcm_open.mutex); f = flow->list; if (f) { while (f->link != NULL) f = f->link; if (f->waiters < flow->waiters_per_chunk) { kmem_free(flow1, sizeof (*flow1)); return (f); } f->link = flow1; } else { flow->list = flow1; } cv_init(&flow1->cv, NULL, CV_DRIVER, NULL); flow1->waiters = 0; flow1->link = NULL; return (flow1); } static void ibcm_flow_enter(ibcm_flow_t *flow) { mutex_enter(&ibcm_open.mutex); if (flow->list == NULL && flow->simul < flow->simul_max) { flow->simul++; flow->total++; mutex_exit(&ibcm_open.mutex); } else { ibcm_flow1_t *flow1; flow1 = ibcm_flow_find(flow); flow1->waiters++; cv_wait(&flow1->cv, &ibcm_open.mutex); if (--flow1->waiters == 0) { cv_destroy(&flow1->cv); mutex_exit(&ibcm_open.mutex); kmem_free(flow1, sizeof (*flow1)); } else mutex_exit(&ibcm_open.mutex); } } static void ibcm_flow_exit(ibcm_flow_t *flow) { mutex_enter(&ibcm_open.mutex); if (--flow->simul < flow->lowat) { if (flow->lowat < flow->lowat_default) flow->lowat++; if (flow->list) { ibcm_flow1_t *flow1; flow1 = flow->list; flow->list = flow1->link; /* unlink */ flow1->link = NULL; /* be clean */ flow->total += flow1->waiters; flow->simul += flow1->waiters; cv_broadcast(&flow1->cv); } } mutex_exit(&ibcm_open.mutex); } void ibcm_flow_inc(void) { mutex_enter(&ibcm_open.mutex); if (++ibcm_open.sends > ibcm_open.sends_max) { ibcm_open.sends_max = ibcm_open.sends; IBTF_DPRINTF_L2(cmlog, "ibcm_flow_inc: sends max = %d", ibcm_open.sends_max); } mutex_exit(&ibcm_open.mutex); } static void ibcm_check_send_cmpltn_time(hrtime_t delta, char *event_msg) { if (delta > 4000000LL) { IBTF_DPRINTF_L2(cmlog, "ibcm_check_send_cmpltn_time: " "%s: %lldns", event_msg, delta); } } void ibcm_flow_dec(hrtime_t time, char *mad_type) { int flow_exit = 0; int run = 0; if (ibcm_dtrace) ibcm_check_send_cmpltn_time(gethrtime() - time, mad_type); mutex_enter(&ibcm_open.mutex); ibcm_open.sends--; if (ibcm_open.sends < ibcm_open.sends_lowat) { run = ibcm_ok_to_start(&ibcm_open); if (ibcm_open.exit_deferred) { ibcm_open.exit_deferred--; flow_exit = 1; } } mutex_exit(&ibcm_open.mutex); if (flow_exit) ibcm_flow_exit(&ibcm_close_flow); if (run) ibcm_run_tlist_thread(); } void ibcm_close_enqueue(ibcm_state_data_t *statep) { mutex_enter(&ibcm_close.mutex); statep->close_link = NULL; ibcm_close.tail->close_link = statep; ibcm_close.tail = statep; mutex_exit(&ibcm_close.mutex); ibcm_run_tlist_thread(); } void ibcm_check_for_async_close() { ibcm_state_data_t *statep; mutex_enter(&ibcm_close.mutex); while (ibcm_close.head.close_link) { statep = ibcm_close.head.close_link; ibcm_close.head.close_link = statep->close_link; statep->close_link = NULL; if (ibcm_close.tail == statep) ibcm_close.tail = &ibcm_close.head; mutex_exit(&ibcm_close.mutex); ibcm_close_start(statep); mutex_enter(&ibcm_close.mutex); } mutex_exit(&ibcm_close.mutex); } void ibcm_close_enter(void) { ibcm_flow_enter(&ibcm_close_flow); } void ibcm_close_exit(void) { int flow_exit; mutex_enter(&ibcm_open.mutex); if (ibcm_open.sends < ibcm_open.sends_lowat || ibcm_open.exit_deferred >= 4) flow_exit = 1; else { flow_exit = 0; ibcm_open.exit_deferred++; } mutex_exit(&ibcm_open.mutex); if (flow_exit) ibcm_flow_exit(&ibcm_close_flow); } /* * This function needs to be called twice to finish our flow * control accounting when closing down a connection. One * call has send_done set to 1, while the other has it set to 0. * Because of retries, this could get called more than once * with either 0 or 1, but additional calls have no effect. */ void ibcm_close_done(ibcm_state_data_t *statep, int send_done) { int flow_exit; ASSERT(MUTEX_HELD(&statep->state_mutex)); if (statep->close_flow == 1) { if (send_done) statep->close_flow = 3; else statep->close_flow = 2; } else if ((send_done && statep->close_flow == 2) || (!send_done && statep->close_flow == 3)) { statep->close_flow = 0; mutex_enter(&ibcm_open.mutex); if (ibcm_open.sends < ibcm_open.sends_lowat || ibcm_open.exit_deferred >= 4) flow_exit = 1; else { flow_exit = 0; ibcm_open.exit_deferred++; } mutex_exit(&ibcm_open.mutex); if (flow_exit) ibcm_flow_exit(&ibcm_close_flow); } } void ibcm_lapr_enter(void) { ibcm_flow_enter(&ibcm_lapr_flow); } void ibcm_lapr_exit(void) { ibcm_flow_exit(&ibcm_lapr_flow); } void ibcm_sa_access_enter() { ibcm_flow_enter(&ibcm_saa_flow); } void ibcm_sa_access_exit() { ibcm_flow_exit(&ibcm_saa_flow); } static void ibcm_sm_notice_handler(ibmf_saa_handle_t saa_handle, ibmf_saa_subnet_event_t saa_event_code, ibmf_saa_event_details_t *saa_event_details, void *callback_arg) { ibcm_port_info_t *portp = (ibcm_port_info_t *)callback_arg; ibt_subnet_event_code_t code; ibt_subnet_event_t event; uint8_t event_status; IBTF_DPRINTF_L3(cmlog, "ibcm_sm_notice_handler: saa_hdl %p, code = %d", saa_handle, saa_event_code); mutex_enter(&ibcm_sm_notice_serialize_lock); switch (saa_event_code) { case IBMF_SAA_EVENT_MCG_CREATED: code = IBT_SM_EVENT_MCG_CREATED; break; case IBMF_SAA_EVENT_MCG_DELETED: code = IBT_SM_EVENT_MCG_DELETED; break; case IBMF_SAA_EVENT_GID_AVAILABLE: code = IBT_SM_EVENT_GID_AVAIL; ibcm_path_cache_purge(); break; case IBMF_SAA_EVENT_GID_UNAVAILABLE: code = IBT_SM_EVENT_GID_UNAVAIL; ibcm_path_cache_purge(); break; case IBMF_SAA_EVENT_SUBSCRIBER_STATUS_CHG: event_status = saa_event_details->ie_producer_event_status_mask & IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM; if (event_status == (portp->port_event_status & IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)) { mutex_exit(&ibcm_sm_notice_serialize_lock); return; /* no change */ } portp->port_event_status = event_status; if (event_status == IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM) code = IBT_SM_EVENT_AVAILABLE; else code = IBT_SM_EVENT_UNAVAILABLE; break; default: mutex_exit(&ibcm_sm_notice_serialize_lock); return; } mutex_enter(&ibcm_global_hca_lock); /* don't send the event if we're tearing down */ if (!IBCM_ACCESS_HCA_OK(portp->port_hcap)) { mutex_exit(&ibcm_global_hca_lock); mutex_exit(&ibcm_sm_notice_serialize_lock); return; } ++(portp->port_hcap->hca_acc_cnt); mutex_exit(&ibcm_global_hca_lock); event.sm_notice_gid = saa_event_details->ie_gid; ibtl_cm_sm_notice_handler(portp->port_sgid0, code, &event); mutex_exit(&ibcm_sm_notice_serialize_lock); ibcm_dec_hca_acc_cnt(portp->port_hcap); } void ibt_register_subnet_notices(ibt_clnt_hdl_t ibt_hdl, ibt_sm_notice_handler_t sm_notice_handler, void *private) { ibcm_port_info_t *portp; ibcm_hca_info_t *hcap; uint8_t port; int num_failed_sgids; ibtl_cm_sm_init_fail_t *ifail; ib_gid_t *sgidp; IBTF_DPRINTF_L3(cmlog, "ibt_register_subnet_notices(%p, %s)", ibt_hdl, ibtl_cm_get_clnt_name(ibt_hdl)); mutex_enter(&ibcm_sm_notice_serialize_lock); ibtl_cm_set_sm_notice_handler(ibt_hdl, sm_notice_handler, private); if (sm_notice_handler == NULL) { mutex_exit(&ibcm_sm_notice_serialize_lock); return; } /* for each port, if service is not available, make a call */ mutex_enter(&ibcm_global_hca_lock); num_failed_sgids = 0; hcap = ibcm_hca_listp; while (hcap != NULL) { portp = hcap->hca_port_info; for (port = 0; port < hcap->hca_num_ports; port++) { if (!(portp->port_event_status & IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)) num_failed_sgids++; portp++; } hcap = hcap->hca_next; } if (num_failed_sgids != 0) { ifail = kmem_alloc(sizeof (*ifail) + (num_failed_sgids - 1) * sizeof (ib_gid_t), KM_SLEEP); ifail->smf_num_sgids = num_failed_sgids; ifail->smf_ibt_hdl = ibt_hdl; sgidp = &ifail->smf_sgid[0]; hcap = ibcm_hca_listp; while (hcap != NULL) { portp = hcap->hca_port_info; for (port = 0; port < hcap->hca_num_ports; port++) { if (!(portp->port_event_status & IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)) *sgidp++ = portp->port_sgid0; portp++; } hcap = hcap->hca_next; } } mutex_exit(&ibcm_global_hca_lock); if (num_failed_sgids != 0) { ibtl_cm_sm_notice_init_failure(ifail); kmem_free(ifail, sizeof (*ifail) + (num_failed_sgids - 1) * sizeof (ib_gid_t)); } mutex_exit(&ibcm_sm_notice_serialize_lock); } /* The following is run from a taskq because we've seen the stack overflow. */ static void ibcm_init_saa(void *arg) { ibcm_port_info_t *portp = (ibcm_port_info_t *)arg; int status; ib_guid_t port_guid; ibmf_saa_subnet_event_args_t event_args; port_guid = portp->port_sgid0.gid_guid; IBTF_DPRINTF_L3(cmlog, "ibcm_init_saa: port guid %llX", port_guid); event_args.is_event_callback_arg = portp; event_args.is_event_callback = ibcm_sm_notice_handler; if ((status = ibmf_sa_session_open(port_guid, 0, &event_args, IBMF_VERSION, 0, &portp->port_ibmf_saa_hdl)) != IBMF_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: " "ibmf_sa_session_open failed for port guid %llX " "status = %d", port_guid, status); } else { IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: " "registered sa_hdl 0x%p for port guid %llX", portp->port_ibmf_saa_hdl, port_guid); } mutex_enter(&ibcm_sa_open_lock); portp->port_saa_open_in_progress = 0; cv_broadcast(&ibcm_sa_open_cv); mutex_exit(&ibcm_sa_open_lock); } void ibcm_init_saa_handle(ibcm_hca_info_t *hcap, uint8_t port) { ibmf_saa_handle_t saa_handle; uint8_t port_index = port - 1; ibcm_port_info_t *portp = &hcap->hca_port_info[port_index]; ibt_status_t ibt_status; if (port_index >= hcap->hca_num_ports) return; mutex_enter(&ibcm_sa_open_lock); if (portp->port_saa_open_in_progress) { mutex_exit(&ibcm_sa_open_lock); return; } saa_handle = portp->port_ibmf_saa_hdl; if (saa_handle != NULL) { mutex_exit(&ibcm_sa_open_lock); return; } portp->port_saa_open_in_progress = 1; mutex_exit(&ibcm_sa_open_lock); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(portp->port_event_status)) /* The assumption is that we're getting event notifications */ portp->port_event_status = IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM; _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(portp->port_event_status)) ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid, portp->port_num, &portp->port_sgid0, NULL); if (ibt_status != IBT_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa_handle: " "ibt_get_port_state_byguid failed for guid %llX " "with status %d", portp->port_hcap->hca_guid, ibt_status); mutex_enter(&ibcm_sa_open_lock); portp->port_saa_open_in_progress = 0; cv_broadcast(&ibcm_sa_open_cv); mutex_exit(&ibcm_sa_open_lock); return; } /* if the port is UP, try sa_session_open */ (void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP); } ibmf_saa_handle_t ibcm_get_saa_handle(ibcm_hca_info_t *hcap, uint8_t port) { ibmf_saa_handle_t saa_handle; uint8_t port_index = port - 1; ibcm_port_info_t *portp = &hcap->hca_port_info[port_index]; ibt_status_t ibt_status; if (port_index >= hcap->hca_num_ports) return (NULL); mutex_enter(&ibcm_sa_open_lock); while (portp->port_saa_open_in_progress) { cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock); } saa_handle = portp->port_ibmf_saa_hdl; if (saa_handle != NULL) { mutex_exit(&ibcm_sa_open_lock); return (saa_handle); } portp->port_saa_open_in_progress = 1; mutex_exit(&ibcm_sa_open_lock); ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid, portp->port_num, &portp->port_sgid0, NULL); if (ibt_status != IBT_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibcm_get_saa_handle: " "ibt_get_port_state_byguid failed for guid %llX " "with status %d", portp->port_hcap->hca_guid, ibt_status); mutex_enter(&ibcm_sa_open_lock); portp->port_saa_open_in_progress = 0; cv_broadcast(&ibcm_sa_open_cv); mutex_exit(&ibcm_sa_open_lock); return (NULL); } /* if the port is UP, try sa_session_open */ (void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP); mutex_enter(&ibcm_sa_open_lock); while (portp->port_saa_open_in_progress) { cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock); } saa_handle = portp->port_ibmf_saa_hdl; mutex_exit(&ibcm_sa_open_lock); return (saa_handle); } /* * ibcm_hca_init_port(): * - Register port with IBMA * * Arguments: * hcap - HCA's guid * port_index - port number minus 1 * * Return values: * IBCM_SUCCESS - success */ ibt_status_t ibcm_hca_init_port(ibcm_hca_info_t *hcap, uint8_t port_index) { int status; ibmf_register_info_t *ibmf_reg; IBTF_DPRINTF_L4(cmlog, "ibcm_hca_init_port: hcap = 0x%p port_num %d", hcap, port_index + 1); ASSERT(MUTEX_HELD(&ibcm_global_hca_lock)); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hcap->hca_port_info)) if (hcap->hca_port_info[port_index].port_ibmf_hdl == NULL) { /* Register with IBMF */ ibmf_reg = &hcap->hca_port_info[port_index].port_ibmf_reg; ibmf_reg->ir_ci_guid = hcap->hca_guid; ibmf_reg->ir_port_num = port_index + 1; ibmf_reg->ir_client_class = COMM_MGT_MANAGER_AGENT; /* * register with management framework */ status = ibmf_register(ibmf_reg, IBMF_VERSION, IBMF_REG_FLAG_NO_OFFLOAD, NULL, NULL, &(hcap->hca_port_info[port_index].port_ibmf_hdl), &(hcap->hca_port_info[port_index].port_ibmf_caps)); if (status != IBMF_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibcm_hca_init_port: " "ibmf_register failed for port_num %x, " "status = %x", port_index + 1, status); return (ibcm_ibmf_analyze_error(status)); } hcap->hca_port_info[port_index].port_qp1.qp_cm = IBMF_QP_HANDLE_DEFAULT; hcap->hca_port_info[port_index].port_qp1.qp_port = &(hcap->hca_port_info[port_index]); /* * Register the read callback with IBMF. * Since we just did an ibmf_register, handle is * valid and ibcm_recv_cb() is valid so we can * safely assert for success of ibmf_setup_recv_cb() * * Depending on the "state" of the HCA, * CM may drop incoming packets */ status = ibmf_setup_async_cb( hcap->hca_port_info[port_index].port_ibmf_hdl, IBMF_QP_HANDLE_DEFAULT, ibcm_recv_cb, &(hcap->hca_port_info[port_index].port_qp1), 0); ASSERT(status == IBMF_SUCCESS); IBTF_DPRINTF_L5(cmlog, "ibcm_hca_init_port: " "IBMF hdl[%x] = 0x%p", port_index, hcap->hca_port_info[port_index].port_ibmf_hdl); /* Attempt to get the saa_handle for this port */ ibcm_init_saa_handle(hcap, port_index + 1); } return (IBT_SUCCESS); } /* * useful, to re attempt to initialize port ibma handles from elsewhere in * cm code */ ibt_status_t ibcm_hca_reinit_port(ibcm_hca_info_t *hcap, uint8_t port_index) { ibt_status_t status; IBTF_DPRINTF_L5(cmlog, "ibcm_hca_reinit_port: hcap 0x%p port_num %d", hcap, port_index + 1); mutex_enter(&ibcm_global_hca_lock); status = ibcm_hca_init_port(hcap, port_index); mutex_exit(&ibcm_global_hca_lock); return (status); } /* * ibcm_hca_fini_port(): * - Deregister port with IBMA * * Arguments: * hcap - HCA's guid * port_index - port number minus 1 * * Return values: * IBCM_SUCCESS - success */ static ibcm_status_t ibcm_hca_fini_port(ibcm_hca_info_t *hcap, uint8_t port_index) { int ibmf_status; ibcm_status_t ibcm_status; IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: hcap = 0x%p port_num %d ", hcap, port_index + 1); ASSERT(MUTEX_HELD(&ibcm_global_hca_lock)); if (hcap->hca_port_info[port_index].port_ibmf_saa_hdl != NULL) { IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: " "ibmf_sa_session_close IBMF SAA hdl %p", hcap->hca_port_info[port_index].port_ibmf_saa_hdl); ibmf_status = ibmf_sa_session_close( &hcap->hca_port_info[port_index].port_ibmf_saa_hdl, 0); if (ibmf_status != IBMF_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: " "ibmf_sa_session_close of port %d returned %x", port_index + 1, ibmf_status); return (IBCM_FAILURE); } } if (hcap->hca_port_info[port_index].port_ibmf_hdl != NULL) { IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: " "ibmf_unregister IBMF Hdl %p", hcap->hca_port_info[port_index].port_ibmf_hdl); /* clean-up all the ibmf qp's allocated on this port */ ibcm_status = ibcm_free_allqps(hcap, port_index + 1); if (ibcm_status != IBCM_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: " "ibcm_free_allqps failed for port_num %d", port_index + 1); return (IBCM_FAILURE); } /* Tear down the receive callback */ ibmf_status = ibmf_tear_down_async_cb( hcap->hca_port_info[port_index].port_ibmf_hdl, IBMF_QP_HANDLE_DEFAULT, 0); if (ibmf_status != IBMF_SUCCESS) { IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: " "ibmf_tear_down_async_cb failed %d port_num %d", ibmf_status, port_index + 1); return (IBCM_FAILURE); } /* Now, unregister with IBMF */ ibmf_status = ibmf_unregister( &hcap->hca_port_info[port_index].port_ibmf_hdl, 0); IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: " "ibmf_unregister of port_num %x returned %x", port_index + 1, ibmf_status); if (ibmf_status == IBMF_SUCCESS) hcap->hca_port_info[port_index].port_ibmf_hdl = NULL; else { IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: " "ibmf_unregister failed %d port_num %d", ibmf_status, port_index + 1); return (IBCM_FAILURE); } } return (IBCM_SUCCESS); } /* * ibcm_comm_est_handler(): * Check if the given channel is in ESTABLISHED state or not * * Arguments: * eventp - A pointer to an ibt_async_event_t struct * * Return values: NONE */ static void ibcm_comm_est_handler(ibt_async_event_t *eventp) { ibcm_state_data_t *statep; IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler:"); /* Both QP and EEC handles can't be NULL */ if (eventp->ev_chan_hdl == NULL) { IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: " "both QP and EEC handles are NULL"); return; } /* get the "statep" from qp/eec handles */ IBCM_GET_CHAN_PRIVATE(eventp->ev_chan_hdl, statep); if (statep == NULL) { IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: statep is NULL"); return; } mutex_enter(&statep->state_mutex); IBCM_RELEASE_CHAN_PRIVATE(eventp->ev_chan_hdl); IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: statep = %p", statep); IBCM_REF_CNT_INCR(statep); if ((statep->state == IBCM_STATE_REP_SENT) || (statep->state == IBCM_STATE_MRA_REP_RCVD)) { timeout_id_t timer_val = statep->timerid; statep->state = IBCM_STATE_TRANSIENT_ESTABLISHED; if (timer_val) { statep->timerid = 0; mutex_exit(&statep->state_mutex); (void) untimeout(timer_val); } else mutex_exit(&statep->state_mutex); /* CM doesn't have RTU message here */ ibcm_cep_state_rtu(statep, NULL); } else { if (statep->state == IBCM_STATE_ESTABLISHED || statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) { IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: " "Channel already in ESTABLISHED state"); } else { /* An unexpected behavior from remote */ IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: " "Unexpected in state = %d", statep->state); } mutex_exit(&statep->state_mutex); ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_COMEST); } mutex_enter(&statep->state_mutex); IBCM_REF_CNT_DECR(statep); mutex_exit(&statep->state_mutex); } /* * ibcm_async_handler(): * CM's Async Handler * (Handles ATTACH, DETACH, COM_EST events) * * Arguments: * eventp - A pointer to an ibt_async_event_t struct * * Return values: None * * NOTE : CM assumes that all HCA DR events are delivered sequentially * i.e., until ibcm_async_handler completes for a given HCA DR, framework * shall not invoke ibcm_async_handler with another DR event for the same * HCA */ /* ARGSUSED */ void ibcm_async_handler(void *clnt_hdl, ibt_hca_hdl_t hca_hdl, ibt_async_code_t code, ibt_async_event_t *eventp) { ibcm_hca_info_t *hcap; ibcm_port_up_t *pup; IBTF_DPRINTF_L3(cmlog, "ibcm_async_handler: " "clnt_hdl = %p, code = 0x%x, eventp = 0x%p", clnt_hdl, code, eventp); mutex_enter(&ibcm_global_hca_lock); /* If fini is going to complete successfully, then return */ if (ibcm_finit_state != IBCM_FINIT_IDLE) { /* * This finit state implies one of the following: * Init either didn't start or didn't complete OR * Fini is about to return SUCCESS and release the global lock. * In all these cases, it is safe to ignore the async. */ IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler: ignoring event %x, " "as either init didn't complete or fini about to succeed", code); mutex_exit(&ibcm_global_hca_lock); return; } switch (code) { case IBT_PORT_CHANGE_EVENT: if ((eventp->ev_port_flags & IBT_PORT_CHANGE_SM_LID) == 0) break; /* FALLTHROUGH */ case IBT_CLNT_REREG_EVENT: case IBT_EVENT_PORT_UP: mutex_exit(&ibcm_global_hca_lock); pup = kmem_alloc(sizeof (ibcm_port_up_t), KM_SLEEP); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pup)) pup->pup_hca_guid = eventp->ev_hca_guid; pup->pup_port = eventp->ev_port; _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pup)) (void) taskq_dispatch(ibcm_taskq, ibcm_service_record_rewrite_task, pup, TQ_SLEEP); ibcm_path_cache_purge(); return; case IBT_HCA_ATTACH_EVENT: /* eventp->ev_hcaguid is the HCA GUID of interest */ ibcm_hca_attach(eventp->ev_hca_guid); break; case IBT_HCA_DETACH_EVENT: /* eventp->ev_hca_guid is the HCA GUID of interest */ if ((hcap = ibcm_find_hcap_entry(eventp->ev_hca_guid)) == NULL) { IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler:" " hca %llX doesn't exist", eventp->ev_hca_guid); break; } (void) ibcm_hca_detach(hcap); break; case IBT_EVENT_COM_EST_QP: /* eventp->ev_qp_hdl is the ibt_qp_hdl_t of interest */ case IBT_EVENT_COM_EST_EEC: /* eventp->ev_eec_hdl is the ibt_eec_hdl_t of interest */ ibcm_comm_est_handler(eventp); break; default: break; } /* Unblock, any blocked fini/init operations */ mutex_exit(&ibcm_global_hca_lock); }