120c794b3Sgavinm /* 220c794b3Sgavinm * CDDL HEADER START 320c794b3Sgavinm * 420c794b3Sgavinm * The contents of this file are subject to the terms of the 520c794b3Sgavinm * Common Development and Distribution License (the "License"). 620c794b3Sgavinm * You may not use this file except in compliance with the License. 720c794b3Sgavinm * 820c794b3Sgavinm * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 920c794b3Sgavinm * or http://www.opensolaris.org/os/licensing. 1020c794b3Sgavinm * See the License for the specific language governing permissions 1120c794b3Sgavinm * and limitations under the License. 1220c794b3Sgavinm * 1320c794b3Sgavinm * When distributing Covered Code, include this CDDL HEADER in each 1420c794b3Sgavinm * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1520c794b3Sgavinm * If applicable, add the following below this CDDL HEADER, with the 1620c794b3Sgavinm * fields enclosed by brackets "[]" replaced with your own identifying 1720c794b3Sgavinm * information: Portions Copyright [yyyy] [name of copyright owner] 1820c794b3Sgavinm * 1920c794b3Sgavinm * CDDL HEADER END 2020c794b3Sgavinm */ 2120c794b3Sgavinm 2220c794b3Sgavinm /* 231b31ef1eSgavinm * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 2420c794b3Sgavinm * Use is subject to license terms. 25*918e0d92SRobert Mustacchi * Copyright (c) 2018, Joyent, Inc. 2620c794b3Sgavinm */ 27a3114836SGerry Liu /* 28a3114836SGerry Liu * Copyright (c) 2010, Intel Corporation. 29a3114836SGerry Liu * All rights reserved. 30a3114836SGerry Liu */ 3120c794b3Sgavinm 3220c794b3Sgavinm /* 33e4b86885SCheng Sean Ye * Native MCA polling. We establish an ommipresent cyclic to fire on all 34e4b86885SCheng Sean Ye * online cpus to check their MCA state and log any valid errors for 35e4b86885SCheng Sean Ye * diagnosis. 3620c794b3Sgavinm */ 3720c794b3Sgavinm 3820c794b3Sgavinm #include <sys/types.h> 39a3114836SGerry Liu #include <sys/atomic.h> 4020c794b3Sgavinm #include <sys/cyclic.h> 4120c794b3Sgavinm #include <sys/x86_archext.h> 4220c794b3Sgavinm #include <sys/mca_x86.h> 4320c794b3Sgavinm 4420c794b3Sgavinm #include "gcpu.h" 4520c794b3Sgavinm 4620c794b3Sgavinm hrtime_t gcpu_mca_poll_interval = NANOSEC * 10ULL; /* tuneable */ 47e4b86885SCheng Sean Ye static cyclic_id_t gcpu_mca_poll_cycid; 48a3114836SGerry Liu static volatile uint_t gcpu_mca_poll_inits; 49a3114836SGerry Liu extern int gcpu_poll_trace_always; 50a3114836SGerry Liu extern uint_t gcpu_poll_trace_nent; 5120c794b3Sgavinm 5220c794b3Sgavinm /* 5320c794b3Sgavinm * Return nonzero of the given handle should poll the MCH. We stick with 5420c794b3Sgavinm * the same handle as before unless the timestamp has not been updated 5520c794b3Sgavinm * for a while. There is no need to keep a hold on the mch_poll_owner 5620c794b3Sgavinm * handle. 5720c794b3Sgavinm */ 58e4b86885SCheng Sean Ye 59e4b86885SCheng Sean Ye static kmutex_t mch_poll_lock; 60e4b86885SCheng Sean Ye static hrtime_t mch_poll_timestamp; 61e4b86885SCheng Sean Ye static cmi_hdl_t mch_poll_owner; 62e4b86885SCheng Sean Ye 6320c794b3Sgavinm static int 64e4b86885SCheng Sean Ye mch_pollowner(cmi_hdl_t hdl) 6520c794b3Sgavinm { 6620c794b3Sgavinm hrtime_t now = gethrtime_waitfree(); 6720c794b3Sgavinm int dopoll = 0; 6820c794b3Sgavinm 6920c794b3Sgavinm mutex_enter(&mch_poll_lock); 7020c794b3Sgavinm if (now - mch_poll_timestamp > 2 * gcpu_mca_poll_interval || 7120c794b3Sgavinm mch_poll_timestamp == 0) { 7220c794b3Sgavinm mch_poll_owner = hdl; 7320c794b3Sgavinm dopoll = 1; 7420c794b3Sgavinm } else if (mch_poll_owner == hdl) { 7520c794b3Sgavinm dopoll = 1; 7620c794b3Sgavinm } 7720c794b3Sgavinm 7820c794b3Sgavinm if (dopoll) 7920c794b3Sgavinm mch_poll_timestamp = now; 8020c794b3Sgavinm 8120c794b3Sgavinm mutex_exit(&mch_poll_lock); 8220c794b3Sgavinm return (dopoll); 8320c794b3Sgavinm } 8420c794b3Sgavinm 8520c794b3Sgavinm 8620c794b3Sgavinm static void 8720c794b3Sgavinm gcpu_ntv_mca_poll(cmi_hdl_t hdl, int what) 8820c794b3Sgavinm { 8920c794b3Sgavinm gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl); 9020c794b3Sgavinm gcpu_mca_t *mca = &gcpu->gcpu_mca; 9120c794b3Sgavinm gcpu_mce_status_t mce; 9220c794b3Sgavinm int willpanic; 93e3d60c9bSAdrian Frost uint64_t bankmask; 9420c794b3Sgavinm 9520c794b3Sgavinm ASSERT(MUTEX_HELD(&gcpu->gcpu_shared->gcpus_poll_lock)); 9620c794b3Sgavinm 97e4b86885SCheng Sean Ye /* Enable CMCI in first poll if is supported */ 98*918e0d92SRobert Mustacchi if ((mca->gcpu_mca_flags & GCPU_MCA_F_CMCI_ENABLE) != 0 && 99*918e0d92SRobert Mustacchi (!mca->gcpu_mca_first_poll_cmci_enabled)) { 100e4b86885SCheng Sean Ye int i; 101e4b86885SCheng Sean Ye uint64_t ctl2; 102e4b86885SCheng Sean Ye 103e3d60c9bSAdrian Frost for (i = 0; i < mca->gcpu_mca_nbanks; i++) { 104e3d60c9bSAdrian Frost if (mca->gcpu_bank_cmci[i].cmci_cap) { 105728f047cSAdrian Frost (void) cmi_hdl_rdmsr(hdl, IA32_MSR_MC_CTL2(i), 106728f047cSAdrian Frost &ctl2); 107728f047cSAdrian Frost ctl2 |= MSR_MC_CTL2_EN; 108728f047cSAdrian Frost (void) cmi_hdl_wrmsr(hdl, IA32_MSR_MC_CTL2(i), 109728f047cSAdrian Frost ctl2); 110e3d60c9bSAdrian Frost mca->gcpu_bank_cmci[i].cmci_enabled = 1; 111e3d60c9bSAdrian Frost } 112e3d60c9bSAdrian Frost } 113e3d60c9bSAdrian Frost mca->gcpu_mca_first_poll_cmci_enabled = 1; 114e3d60c9bSAdrian Frost } 115e3d60c9bSAdrian Frost 11620c794b3Sgavinm if (mca->gcpu_mca_flags & GCPU_MCA_F_UNFAULTING) { 11720c794b3Sgavinm int i; 11820c794b3Sgavinm 11920c794b3Sgavinm mca->gcpu_mca_flags &= ~GCPU_MCA_F_UNFAULTING; 120e4b86885SCheng Sean Ye gcpu_poll_trace(&gcpu->gcpu_mca.gcpu_polltrace, 121e4b86885SCheng Sean Ye GCPU_MPT_WHAT_UNFAULTING, 0); 12220c794b3Sgavinm 12320c794b3Sgavinm /* 12420c794b3Sgavinm * On the first cyclic poll after unfaulting a CPU we 12520c794b3Sgavinm * clear the status registers; see gcpu_faulted_exit 12620c794b3Sgavinm * for details. We don't do this if the poll was 12720c794b3Sgavinm * initiated manually (presumably from some injection 12820c794b3Sgavinm * activity). 12920c794b3Sgavinm */ 13020c794b3Sgavinm if (what == GCPU_MPT_WHAT_CYC_ERR) { 13120c794b3Sgavinm for (i = 0; i < mca->gcpu_mca_nbanks; i++) { 13220c794b3Sgavinm (void) cmi_hdl_wrmsr(hdl, 13320c794b3Sgavinm IA32_MSR_MC(i, STATUS), 0ULL); 13420c794b3Sgavinm } 13520c794b3Sgavinm return; 13620c794b3Sgavinm } 13720c794b3Sgavinm } 13820c794b3Sgavinm 13920c794b3Sgavinm /* 14020c794b3Sgavinm * Logout errors of the MCA banks of this cpu. 14120c794b3Sgavinm */ 142e3d60c9bSAdrian Frost if (what == GCPU_MPT_WHAT_CMCI_ERR) { 143e3d60c9bSAdrian Frost /* 144e3d60c9bSAdrian Frost * for CMCI, all banks should be scanned for log out 145e3d60c9bSAdrian Frost */ 146e3d60c9bSAdrian Frost bankmask = -1ULL; 147e3d60c9bSAdrian Frost } else { 148e3d60c9bSAdrian Frost bankmask = cms_poll_ownermask(hdl, gcpu_mca_poll_interval); 149e3d60c9bSAdrian Frost } 150e3d60c9bSAdrian Frost gcpu_mca_logout(hdl, NULL, bankmask, &mce, B_TRUE, what); 15120c794b3Sgavinm 152e4b86885SCheng Sean Ye if (mce.mce_nerr != 0) 153e4b86885SCheng Sean Ye gcpu_poll_trace(&gcpu->gcpu_mca.gcpu_polltrace, what, 154e4b86885SCheng Sean Ye mce.mce_nerr); 155e4b86885SCheng Sean Ye 15620c794b3Sgavinm mca->gcpu_mca_lastpoll = gethrtime_waitfree(); 15720c794b3Sgavinm 15820c794b3Sgavinm willpanic = mce.mce_disp & CMI_ERRDISP_FORCEFATAL && cmi_panic_on_ue(); 15920c794b3Sgavinm 160e3d60c9bSAdrian Frost if (what != GCPU_MPT_WHAT_CMCI_ERR) { 161e3d60c9bSAdrian Frost /* 162e3d60c9bSAdrian Frost * Call to the memory-controller driver which may report some 163e3d60c9bSAdrian Frost * errors not visible under the MCA (for off-chip NB). 164e3d60c9bSAdrian Frost * Since there is typically a single MCH we arrange that 165e3d60c9bSAdrian Frost * just one cpu perform this task at each cyclic fire. 166e3d60c9bSAdrian Frost */ 167e4b86885SCheng Sean Ye if (mch_pollowner(hdl)) 168e3d60c9bSAdrian Frost cmi_mc_logout(hdl, 0, willpanic); 169e3d60c9bSAdrian Frost } 17020c794b3Sgavinm 17120c794b3Sgavinm /* 17220c794b3Sgavinm * In the common case any polled error is considered non-fatal, 17320c794b3Sgavinm * even if it indicates PCC or UC etc. The only condition on which 17420c794b3Sgavinm * we will panic for a polled error is if model-specific support 17520c794b3Sgavinm * forces the error to be terminal regardless of how it is 17620c794b3Sgavinm * encountered. 17720c794b3Sgavinm */ 17820c794b3Sgavinm if (willpanic) { 17920c794b3Sgavinm #ifdef DEBUG 18020c794b3Sgavinm cmn_err(CE_WARN, "MCA Poll: %u errors, disp=0x%llx, " 18120c794b3Sgavinm "%u PCC (%u ok), " 18220c794b3Sgavinm "%u UC (%u ok, %u poisoned), " 18320c794b3Sgavinm "%u forcefatal, %u ignored", 18420c794b3Sgavinm mce.mce_nerr, (u_longlong_t)mce.mce_disp, 18520c794b3Sgavinm mce.mce_npcc, mce.mce_npcc_ok, 18620c794b3Sgavinm mce.mce_nuc, mce.mce_nuc_ok, mce.mce_nuc_poisoned, 18720c794b3Sgavinm mce.mce_forcefatal, mce.mce_ignored); 18820c794b3Sgavinm 18920c794b3Sgavinm #endif 19020c794b3Sgavinm fm_panic("Unrecoverable Machine-Check Exception (Polled)"); 19120c794b3Sgavinm } 19220c794b3Sgavinm } 19320c794b3Sgavinm 19420c794b3Sgavinm /* 19520c794b3Sgavinm * See gcpu_mca_trap for an explanation of why preemption is disabled here. 19620c794b3Sgavinm * Note that we disable preemption and then contend for an adaptive mutex - 19720c794b3Sgavinm * we could block during the mutex operation, but once we return with the 19820c794b3Sgavinm * mutex held we nust perform no operation that can block and we cannot 19920c794b3Sgavinm * be preempted so we will stay on cpu for the duration. The disabling 20020c794b3Sgavinm * of preemption also means we cannot migrate cpus once we have returned 20120c794b3Sgavinm * with the mutex held - cyclic invocations can't migrate, anyway, but 20220c794b3Sgavinm * others could if they have failed to bind before this point. 20320c794b3Sgavinm */ 20420c794b3Sgavinm static void 20520c794b3Sgavinm gcpu_ntv_mca_poll_wrapper(cmi_hdl_t hdl, int what) 20620c794b3Sgavinm { 2071b31ef1eSgavinm gcpu_data_t *gcpu; 20820c794b3Sgavinm 2091b31ef1eSgavinm if (hdl == NULL || (gcpu = cmi_hdl_getcmidata(hdl)) == NULL || 2101b31ef1eSgavinm gcpu->gcpu_mca.gcpu_mca_lgsz == 0) 21125f47677Sgavinm return; 21225f47677Sgavinm 21320c794b3Sgavinm kpreempt_disable(); 21420c794b3Sgavinm mutex_enter(&gcpu->gcpu_shared->gcpus_poll_lock); 21520c794b3Sgavinm gcpu_ntv_mca_poll(hdl, what); 21620c794b3Sgavinm mutex_exit(&gcpu->gcpu_shared->gcpus_poll_lock); 21720c794b3Sgavinm kpreempt_enable(); 21820c794b3Sgavinm } 21920c794b3Sgavinm 22020c794b3Sgavinm static void 22120c794b3Sgavinm gcpu_ntv_mca_poll_cyclic(void *arg) 22220c794b3Sgavinm { 22320c794b3Sgavinm gcpu_ntv_mca_poll_wrapper((cmi_hdl_t)arg, GCPU_MPT_WHAT_CYC_ERR); 22420c794b3Sgavinm } 22520c794b3Sgavinm 22620c794b3Sgavinm /*ARGSUSED*/ 22720c794b3Sgavinm static void 22820c794b3Sgavinm gcpu_ntv_mca_poll_online(void *arg, cpu_t *cp, cyc_handler_t *cyh, 22920c794b3Sgavinm cyc_time_t *cyt) 23020c794b3Sgavinm { 23120c794b3Sgavinm cmi_hdl_t hdl; 23220c794b3Sgavinm 2331b31ef1eSgavinm /* 2341b31ef1eSgavinm * Lookup and hold a handle for this cpu (any hold released in 2351b31ef1eSgavinm * our offline function). If we chose not to initialize a handle 2361b31ef1eSgavinm * for this cpu back at cmi_init time then this lookup will return 2371b31ef1eSgavinm * NULL, so the cyh_func we appoint must be prepared for that. 2381b31ef1eSgavinm */ 2391b31ef1eSgavinm hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp), 2401b31ef1eSgavinm cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp)); 24120c794b3Sgavinm 24220c794b3Sgavinm cyt->cyt_when = 0; 24320c794b3Sgavinm cyt->cyt_interval = gcpu_mca_poll_interval; 24420c794b3Sgavinm cyh->cyh_func = gcpu_ntv_mca_poll_cyclic; 24520c794b3Sgavinm cyh->cyh_arg = (void *)hdl; 24620c794b3Sgavinm cyh->cyh_level = CY_LOW_LEVEL; 24720c794b3Sgavinm } 24820c794b3Sgavinm 24920c794b3Sgavinm /*ARGSUSED*/ 25020c794b3Sgavinm static void 25120c794b3Sgavinm gcpu_ntv_mca_poll_offline(void *arg, cpu_t *cpu, void *cyh_arg) 25220c794b3Sgavinm { 25320c794b3Sgavinm cmi_hdl_t hdl = (cmi_hdl_t)cyh_arg; 25420c794b3Sgavinm 2551b31ef1eSgavinm if (hdl != NULL) 2561b31ef1eSgavinm cmi_hdl_rele(hdl); 25720c794b3Sgavinm } 25820c794b3Sgavinm 25920c794b3Sgavinm static void 26020c794b3Sgavinm gcpu_ntv_mca_poll_start(void) 26120c794b3Sgavinm { 26220c794b3Sgavinm cyc_omni_handler_t cyo; 26320c794b3Sgavinm 26425f47677Sgavinm if (gcpu_mca_poll_interval == 0 || gcpu_mca_poll_inits == 0) 26520c794b3Sgavinm return; 26620c794b3Sgavinm 26720c794b3Sgavinm cyo.cyo_online = gcpu_ntv_mca_poll_online; 26820c794b3Sgavinm cyo.cyo_offline = gcpu_ntv_mca_poll_offline; 26920c794b3Sgavinm cyo.cyo_arg = NULL; 27020c794b3Sgavinm 27120c794b3Sgavinm mutex_enter(&cpu_lock); 27220c794b3Sgavinm gcpu_mca_poll_cycid = cyclic_add_omni(&cyo); 27320c794b3Sgavinm mutex_exit(&cpu_lock); 27420c794b3Sgavinm } 27520c794b3Sgavinm 276e4b86885SCheng Sean Ye /* 277e4b86885SCheng Sean Ye * gcpu_mca_poll_init is called from gcpu_mca_init for each cpu handle 278e4b86885SCheng Sean Ye * that we initialize for. It should prepare for polling by allocating 279e4b86885SCheng Sean Ye * control structures and the like, but must not kick polling off yet. 280e4b86885SCheng Sean Ye */ 281e4b86885SCheng Sean Ye 28220c794b3Sgavinm void 283e4b86885SCheng Sean Ye gcpu_mca_poll_init(cmi_hdl_t hdl) 28420c794b3Sgavinm { 285e4b86885SCheng Sean Ye gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl); 286e4b86885SCheng Sean Ye gcpu_poll_trace_ctl_t *ptc = &gcpu->gcpu_mca.gcpu_polltrace; 28720c794b3Sgavinm 288e4b86885SCheng Sean Ye ASSERT(cmi_hdl_class(hdl) == CMI_HDL_NATIVE); 28920c794b3Sgavinm 290e4b86885SCheng Sean Ye gcpu_poll_trace_init(ptc); 291e4b86885SCheng Sean Ye 292a3114836SGerry Liu atomic_inc_uint(&gcpu_mca_poll_inits); 293a3114836SGerry Liu } 294a3114836SGerry Liu 295a3114836SGerry Liu /* deconfigure gcpu_mca_poll_init() */ 296a3114836SGerry Liu void 297a3114836SGerry Liu gcpu_mca_poll_fini(cmi_hdl_t hdl) 298a3114836SGerry Liu { 299a3114836SGerry Liu gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl); 300a3114836SGerry Liu gcpu_poll_trace_ctl_t *ptc = &gcpu->gcpu_mca.gcpu_polltrace; 301a3114836SGerry Liu 302a3114836SGerry Liu ASSERT(cmi_hdl_class(hdl) == CMI_HDL_NATIVE); 303a3114836SGerry Liu 304a3114836SGerry Liu if (gcpu_poll_trace_always && (ptc->mptc_tbufs != NULL)) { 305a3114836SGerry Liu kmem_free(ptc->mptc_tbufs, sizeof (gcpu_poll_trace_t) * 306a3114836SGerry Liu gcpu_poll_trace_nent); 307a3114836SGerry Liu } 308a3114836SGerry Liu 309a3114836SGerry Liu atomic_dec_uint(&gcpu_mca_poll_inits); 31020c794b3Sgavinm } 31120c794b3Sgavinm 31220c794b3Sgavinm void 313e4b86885SCheng Sean Ye gcpu_mca_poll_start(cmi_hdl_t hdl) 31420c794b3Sgavinm { 315e4b86885SCheng Sean Ye ASSERT(cmi_hdl_class(hdl) == CMI_HDL_NATIVE); 316e4b86885SCheng Sean Ye gcpu_ntv_mca_poll_start(); 317e4b86885SCheng Sean Ye } 31820c794b3Sgavinm 319e4b86885SCheng Sean Ye void 320e4b86885SCheng Sean Ye gcpu_hdl_poke(cmi_hdl_t hdl) 321e4b86885SCheng Sean Ye { 322e4b86885SCheng Sean Ye ASSERT(cmi_hdl_class(hdl) == CMI_HDL_NATIVE); 323e4b86885SCheng Sean Ye gcpu_ntv_mca_poll_wrapper(hdl, GCPU_MPT_WHAT_POKE_ERR); 32420c794b3Sgavinm } 325e3d60c9bSAdrian Frost 326e3d60c9bSAdrian Frost void 327e3d60c9bSAdrian Frost gcpu_cmci_trap(cmi_hdl_t hdl) 328e3d60c9bSAdrian Frost { 329e4b86885SCheng Sean Ye gcpu_ntv_mca_poll_wrapper(hdl, GCPU_MPT_WHAT_CMCI_ERR); 330e3d60c9bSAdrian Frost } 331