120c794b3Sgavinm /*
220c794b3Sgavinm  * CDDL HEADER START
320c794b3Sgavinm  *
420c794b3Sgavinm  * The contents of this file are subject to the terms of the
520c794b3Sgavinm  * Common Development and Distribution License (the "License").
620c794b3Sgavinm  * You may not use this file except in compliance with the License.
720c794b3Sgavinm  *
820c794b3Sgavinm  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
920c794b3Sgavinm  * or http://www.opensolaris.org/os/licensing.
1020c794b3Sgavinm  * See the License for the specific language governing permissions
1120c794b3Sgavinm  * and limitations under the License.
1220c794b3Sgavinm  *
1320c794b3Sgavinm  * When distributing Covered Code, include this CDDL HEADER in each
1420c794b3Sgavinm  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1520c794b3Sgavinm  * If applicable, add the following below this CDDL HEADER, with the
1620c794b3Sgavinm  * fields enclosed by brackets "[]" replaced with your own identifying
1720c794b3Sgavinm  * information: Portions Copyright [yyyy] [name of copyright owner]
1820c794b3Sgavinm  *
1920c794b3Sgavinm  * CDDL HEADER END
2020c794b3Sgavinm  */
2120c794b3Sgavinm 
2220c794b3Sgavinm /*
231b31ef1eSgavinm  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
2420c794b3Sgavinm  * Use is subject to license terms.
25*918e0d92SRobert Mustacchi  * Copyright (c) 2018, Joyent, Inc.
2620c794b3Sgavinm  */
27a3114836SGerry Liu /*
28a3114836SGerry Liu  * Copyright (c) 2010, Intel Corporation.
29a3114836SGerry Liu  * All rights reserved.
30a3114836SGerry Liu  */
3120c794b3Sgavinm 
3220c794b3Sgavinm /*
33e4b86885SCheng Sean Ye  * Native MCA polling.  We establish an ommipresent cyclic to fire on all
34e4b86885SCheng Sean Ye  * online cpus to check their MCA state and log any valid errors for
35e4b86885SCheng Sean Ye  * diagnosis.
3620c794b3Sgavinm  */
3720c794b3Sgavinm 
3820c794b3Sgavinm #include <sys/types.h>
39a3114836SGerry Liu #include <sys/atomic.h>
4020c794b3Sgavinm #include <sys/cyclic.h>
4120c794b3Sgavinm #include <sys/x86_archext.h>
4220c794b3Sgavinm #include <sys/mca_x86.h>
4320c794b3Sgavinm 
4420c794b3Sgavinm #include "gcpu.h"
4520c794b3Sgavinm 
4620c794b3Sgavinm hrtime_t gcpu_mca_poll_interval = NANOSEC * 10ULL;	/* tuneable */
47e4b86885SCheng Sean Ye static cyclic_id_t gcpu_mca_poll_cycid;
48a3114836SGerry Liu static volatile uint_t gcpu_mca_poll_inits;
49a3114836SGerry Liu extern int gcpu_poll_trace_always;
50a3114836SGerry Liu extern uint_t gcpu_poll_trace_nent;
5120c794b3Sgavinm 
5220c794b3Sgavinm /*
5320c794b3Sgavinm  * Return nonzero of the given handle should poll the MCH.  We stick with
5420c794b3Sgavinm  * the same handle as before unless the timestamp has not been updated
5520c794b3Sgavinm  * for a while.  There is no need to keep a hold on the mch_poll_owner
5620c794b3Sgavinm  * handle.
5720c794b3Sgavinm  */
58e4b86885SCheng Sean Ye 
59e4b86885SCheng Sean Ye static kmutex_t mch_poll_lock;
60e4b86885SCheng Sean Ye static hrtime_t mch_poll_timestamp;
61e4b86885SCheng Sean Ye static cmi_hdl_t mch_poll_owner;
62e4b86885SCheng Sean Ye 
6320c794b3Sgavinm static int
mch_pollowner(cmi_hdl_t hdl)64e4b86885SCheng Sean Ye mch_pollowner(cmi_hdl_t hdl)
6520c794b3Sgavinm {
6620c794b3Sgavinm 	hrtime_t now = gethrtime_waitfree();
6720c794b3Sgavinm 	int dopoll = 0;
6820c794b3Sgavinm 
6920c794b3Sgavinm 	mutex_enter(&mch_poll_lock);
7020c794b3Sgavinm 	if (now - mch_poll_timestamp > 2 * gcpu_mca_poll_interval ||
7120c794b3Sgavinm 	    mch_poll_timestamp == 0) {
7220c794b3Sgavinm 		mch_poll_owner = hdl;
7320c794b3Sgavinm 		dopoll = 1;
7420c794b3Sgavinm 	} else if (mch_poll_owner == hdl) {
7520c794b3Sgavinm 		dopoll = 1;
7620c794b3Sgavinm 	}
7720c794b3Sgavinm 
7820c794b3Sgavinm 	if (dopoll)
7920c794b3Sgavinm 		mch_poll_timestamp = now;
8020c794b3Sgavinm 
8120c794b3Sgavinm 	mutex_exit(&mch_poll_lock);
8220c794b3Sgavinm 	return (dopoll);
8320c794b3Sgavinm }
8420c794b3Sgavinm 
8520c794b3Sgavinm 
8620c794b3Sgavinm static void
gcpu_ntv_mca_poll(cmi_hdl_t hdl,int what)8720c794b3Sgavinm gcpu_ntv_mca_poll(cmi_hdl_t hdl, int what)
8820c794b3Sgavinm {
8920c794b3Sgavinm 	gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
9020c794b3Sgavinm 	gcpu_mca_t *mca = &gcpu->gcpu_mca;
9120c794b3Sgavinm 	gcpu_mce_status_t mce;
9220c794b3Sgavinm 	int willpanic;
93e3d60c9bSAdrian Frost 	uint64_t bankmask;
9420c794b3Sgavinm 
9520c794b3Sgavinm 	ASSERT(MUTEX_HELD(&gcpu->gcpu_shared->gcpus_poll_lock));
9620c794b3Sgavinm 
97e4b86885SCheng Sean Ye 	/* Enable CMCI in first poll if is supported */
98*918e0d92SRobert Mustacchi 	if ((mca->gcpu_mca_flags & GCPU_MCA_F_CMCI_ENABLE) != 0 &&
99*918e0d92SRobert Mustacchi 	    (!mca->gcpu_mca_first_poll_cmci_enabled)) {
100e4b86885SCheng Sean Ye 		int i;
101e4b86885SCheng Sean Ye 		uint64_t ctl2;
102e4b86885SCheng Sean Ye 
103e3d60c9bSAdrian Frost 		for (i = 0; i < mca->gcpu_mca_nbanks; i++) {
104e3d60c9bSAdrian Frost 			if (mca->gcpu_bank_cmci[i].cmci_cap) {
105728f047cSAdrian Frost 				(void) cmi_hdl_rdmsr(hdl, IA32_MSR_MC_CTL2(i),
106728f047cSAdrian Frost 				    &ctl2);
107728f047cSAdrian Frost 				ctl2 |= MSR_MC_CTL2_EN;
108728f047cSAdrian Frost 				(void) cmi_hdl_wrmsr(hdl, IA32_MSR_MC_CTL2(i),
109728f047cSAdrian Frost 				    ctl2);
110e3d60c9bSAdrian Frost 				mca->gcpu_bank_cmci[i].cmci_enabled = 1;
111e3d60c9bSAdrian Frost 			}
112e3d60c9bSAdrian Frost 		}
113e3d60c9bSAdrian Frost 		mca->gcpu_mca_first_poll_cmci_enabled = 1;
114e3d60c9bSAdrian Frost 	}
115e3d60c9bSAdrian Frost 
11620c794b3Sgavinm 	if (mca->gcpu_mca_flags & GCPU_MCA_F_UNFAULTING) {
11720c794b3Sgavinm 		int i;
11820c794b3Sgavinm 
11920c794b3Sgavinm 		mca->gcpu_mca_flags &= ~GCPU_MCA_F_UNFAULTING;
120e4b86885SCheng Sean Ye 		gcpu_poll_trace(&gcpu->gcpu_mca.gcpu_polltrace,
121e4b86885SCheng Sean Ye 		    GCPU_MPT_WHAT_UNFAULTING, 0);
12220c794b3Sgavinm 
12320c794b3Sgavinm 		/*
12420c794b3Sgavinm 		 * On the first cyclic poll after unfaulting a CPU we
12520c794b3Sgavinm 		 * clear the status registers; see gcpu_faulted_exit
12620c794b3Sgavinm 		 * for details.  We don't do this if the poll was
12720c794b3Sgavinm 		 * initiated manually (presumably from some injection
12820c794b3Sgavinm 		 * activity).
12920c794b3Sgavinm 		 */
13020c794b3Sgavinm 		if (what == GCPU_MPT_WHAT_CYC_ERR) {
13120c794b3Sgavinm 			for (i = 0; i < mca->gcpu_mca_nbanks; i++) {
13220c794b3Sgavinm 				(void) cmi_hdl_wrmsr(hdl,
13320c794b3Sgavinm 				    IA32_MSR_MC(i, STATUS), 0ULL);
13420c794b3Sgavinm 			}
13520c794b3Sgavinm 			return;
13620c794b3Sgavinm 		}
13720c794b3Sgavinm 	}
13820c794b3Sgavinm 
13920c794b3Sgavinm 	/*
14020c794b3Sgavinm 	 * Logout errors of the MCA banks of this cpu.
14120c794b3Sgavinm 	 */
142e3d60c9bSAdrian Frost 	if (what == GCPU_MPT_WHAT_CMCI_ERR) {
143e3d60c9bSAdrian Frost 		/*
144e3d60c9bSAdrian Frost 		 * for CMCI, all banks should be scanned for log out
145e3d60c9bSAdrian Frost 		 */
146e3d60c9bSAdrian Frost 		bankmask = -1ULL;
147e3d60c9bSAdrian Frost 	} else {
148e3d60c9bSAdrian Frost 		bankmask = cms_poll_ownermask(hdl, gcpu_mca_poll_interval);
149e3d60c9bSAdrian Frost 	}
150e3d60c9bSAdrian Frost 	gcpu_mca_logout(hdl, NULL, bankmask, &mce, B_TRUE, what);
15120c794b3Sgavinm 
152e4b86885SCheng Sean Ye 	if (mce.mce_nerr != 0)
153e4b86885SCheng Sean Ye 		gcpu_poll_trace(&gcpu->gcpu_mca.gcpu_polltrace, what,
154e4b86885SCheng Sean Ye 		    mce.mce_nerr);
155e4b86885SCheng Sean Ye 
15620c794b3Sgavinm 	mca->gcpu_mca_lastpoll = gethrtime_waitfree();
15720c794b3Sgavinm 
15820c794b3Sgavinm 	willpanic = mce.mce_disp & CMI_ERRDISP_FORCEFATAL && cmi_panic_on_ue();
15920c794b3Sgavinm 
160e3d60c9bSAdrian Frost 	if (what != GCPU_MPT_WHAT_CMCI_ERR) {
161e3d60c9bSAdrian Frost 		/*
162e3d60c9bSAdrian Frost 		 * Call to the memory-controller driver which may report some
163e3d60c9bSAdrian Frost 		 * errors not visible under the MCA (for off-chip NB).
164e3d60c9bSAdrian Frost 		 * Since there is typically a single MCH we arrange that
165e3d60c9bSAdrian Frost 		 * just one cpu perform this task at each cyclic fire.
166e3d60c9bSAdrian Frost 		 */
167e4b86885SCheng Sean Ye 		if (mch_pollowner(hdl))
168e3d60c9bSAdrian Frost 			cmi_mc_logout(hdl, 0, willpanic);
169e3d60c9bSAdrian Frost 	}
17020c794b3Sgavinm 
17120c794b3Sgavinm 	/*
17220c794b3Sgavinm 	 * In the common case any polled error is considered non-fatal,
17320c794b3Sgavinm 	 * even if it indicates PCC or UC etc.  The only condition on which
17420c794b3Sgavinm 	 * we will panic for a polled error is if model-specific support
17520c794b3Sgavinm 	 * forces the error to be terminal regardless of how it is
17620c794b3Sgavinm 	 * encountered.
17720c794b3Sgavinm 	 */
17820c794b3Sgavinm 	if (willpanic) {
17920c794b3Sgavinm #ifdef DEBUG
18020c794b3Sgavinm 		cmn_err(CE_WARN, "MCA Poll: %u errors, disp=0x%llx, "
18120c794b3Sgavinm 		    "%u PCC (%u ok), "
18220c794b3Sgavinm 		    "%u UC (%u ok, %u poisoned), "
18320c794b3Sgavinm 		    "%u forcefatal, %u ignored",
18420c794b3Sgavinm 		    mce.mce_nerr, (u_longlong_t)mce.mce_disp,
18520c794b3Sgavinm 		    mce.mce_npcc, mce.mce_npcc_ok,
18620c794b3Sgavinm 		    mce.mce_nuc, mce.mce_nuc_ok, mce.mce_nuc_poisoned,
18720c794b3Sgavinm 		    mce.mce_forcefatal, mce.mce_ignored);
18820c794b3Sgavinm 
18920c794b3Sgavinm #endif
19020c794b3Sgavinm 		fm_panic("Unrecoverable Machine-Check Exception (Polled)");
19120c794b3Sgavinm 	}
19220c794b3Sgavinm }
19320c794b3Sgavinm 
19420c794b3Sgavinm /*
19520c794b3Sgavinm  * See gcpu_mca_trap for an explanation of why preemption is disabled here.
19620c794b3Sgavinm  * Note that we disable preemption and then contend for an adaptive mutex -
19720c794b3Sgavinm  * we could block during the mutex operation, but once we return with the
19820c794b3Sgavinm  * mutex held we nust perform no operation that can block and we cannot
19920c794b3Sgavinm  * be preempted so we will stay on cpu for the duration.  The disabling
20020c794b3Sgavinm  * of preemption also means we cannot migrate cpus once we have returned
20120c794b3Sgavinm  * with the mutex held - cyclic invocations can't migrate, anyway, but
20220c794b3Sgavinm  * others could if they have failed to bind before this point.
20320c794b3Sgavinm  */
20420c794b3Sgavinm static void
gcpu_ntv_mca_poll_wrapper(cmi_hdl_t hdl,int what)20520c794b3Sgavinm gcpu_ntv_mca_poll_wrapper(cmi_hdl_t hdl, int what)
20620c794b3Sgavinm {
2071b31ef1eSgavinm 	gcpu_data_t *gcpu;
20820c794b3Sgavinm 
2091b31ef1eSgavinm 	if (hdl == NULL || (gcpu = cmi_hdl_getcmidata(hdl)) == NULL ||
2101b31ef1eSgavinm 	    gcpu->gcpu_mca.gcpu_mca_lgsz == 0)
21125f47677Sgavinm 		return;
21225f47677Sgavinm 
21320c794b3Sgavinm 	kpreempt_disable();
21420c794b3Sgavinm 	mutex_enter(&gcpu->gcpu_shared->gcpus_poll_lock);
21520c794b3Sgavinm 	gcpu_ntv_mca_poll(hdl, what);
21620c794b3Sgavinm 	mutex_exit(&gcpu->gcpu_shared->gcpus_poll_lock);
21720c794b3Sgavinm 	kpreempt_enable();
21820c794b3Sgavinm }
21920c794b3Sgavinm 
22020c794b3Sgavinm static void
gcpu_ntv_mca_poll_cyclic(void * arg)22120c794b3Sgavinm gcpu_ntv_mca_poll_cyclic(void *arg)
22220c794b3Sgavinm {
22320c794b3Sgavinm 	gcpu_ntv_mca_poll_wrapper((cmi_hdl_t)arg, GCPU_MPT_WHAT_CYC_ERR);
22420c794b3Sgavinm }
22520c794b3Sgavinm 
22620c794b3Sgavinm /*ARGSUSED*/
22720c794b3Sgavinm static void
gcpu_ntv_mca_poll_online(void * arg,cpu_t * cp,cyc_handler_t * cyh,cyc_time_t * cyt)22820c794b3Sgavinm gcpu_ntv_mca_poll_online(void *arg, cpu_t *cp, cyc_handler_t *cyh,
22920c794b3Sgavinm     cyc_time_t *cyt)
23020c794b3Sgavinm {
23120c794b3Sgavinm 	cmi_hdl_t hdl;
23220c794b3Sgavinm 
2331b31ef1eSgavinm 	/*
2341b31ef1eSgavinm 	 * Lookup and hold a handle for this cpu (any hold released in
2351b31ef1eSgavinm 	 * our offline function).  If we chose not to initialize a handle
2361b31ef1eSgavinm 	 * for this cpu back at cmi_init time then this lookup will return
2371b31ef1eSgavinm 	 * NULL, so the cyh_func we appoint must be prepared for that.
2381b31ef1eSgavinm 	 */
2391b31ef1eSgavinm 	hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
2401b31ef1eSgavinm 	    cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
24120c794b3Sgavinm 
24220c794b3Sgavinm 	cyt->cyt_when = 0;
24320c794b3Sgavinm 	cyt->cyt_interval = gcpu_mca_poll_interval;
24420c794b3Sgavinm 	cyh->cyh_func = gcpu_ntv_mca_poll_cyclic;
24520c794b3Sgavinm 	cyh->cyh_arg = (void *)hdl;
24620c794b3Sgavinm 	cyh->cyh_level = CY_LOW_LEVEL;
24720c794b3Sgavinm }
24820c794b3Sgavinm 
24920c794b3Sgavinm /*ARGSUSED*/
25020c794b3Sgavinm static void
gcpu_ntv_mca_poll_offline(void * arg,cpu_t * cpu,void * cyh_arg)25120c794b3Sgavinm gcpu_ntv_mca_poll_offline(void *arg, cpu_t *cpu, void *cyh_arg)
25220c794b3Sgavinm {
25320c794b3Sgavinm 	cmi_hdl_t hdl = (cmi_hdl_t)cyh_arg;
25420c794b3Sgavinm 
2551b31ef1eSgavinm 	if (hdl != NULL)
2561b31ef1eSgavinm 		cmi_hdl_rele(hdl);
25720c794b3Sgavinm }
25820c794b3Sgavinm 
25920c794b3Sgavinm static void
gcpu_ntv_mca_poll_start(void)26020c794b3Sgavinm gcpu_ntv_mca_poll_start(void)
26120c794b3Sgavinm {
26220c794b3Sgavinm 	cyc_omni_handler_t cyo;
26320c794b3Sgavinm 
26425f47677Sgavinm 	if (gcpu_mca_poll_interval == 0 || gcpu_mca_poll_inits == 0)
26520c794b3Sgavinm 		return;
26620c794b3Sgavinm 
26720c794b3Sgavinm 	cyo.cyo_online = gcpu_ntv_mca_poll_online;
26820c794b3Sgavinm 	cyo.cyo_offline = gcpu_ntv_mca_poll_offline;
26920c794b3Sgavinm 	cyo.cyo_arg = NULL;
27020c794b3Sgavinm 
27120c794b3Sgavinm 	mutex_enter(&cpu_lock);
27220c794b3Sgavinm 	gcpu_mca_poll_cycid = cyclic_add_omni(&cyo);
27320c794b3Sgavinm 	mutex_exit(&cpu_lock);
27420c794b3Sgavinm }
27520c794b3Sgavinm 
276e4b86885SCheng Sean Ye /*
277e4b86885SCheng Sean Ye  * gcpu_mca_poll_init is called from gcpu_mca_init for each cpu handle
278e4b86885SCheng Sean Ye  * that we initialize for.  It should prepare for polling by allocating
279e4b86885SCheng Sean Ye  * control structures and the like, but must not kick polling off yet.
280e4b86885SCheng Sean Ye  */
281e4b86885SCheng Sean Ye 
28220c794b3Sgavinm void
gcpu_mca_poll_init(cmi_hdl_t hdl)283e4b86885SCheng Sean Ye gcpu_mca_poll_init(cmi_hdl_t hdl)
28420c794b3Sgavinm {
285e4b86885SCheng Sean Ye 	gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
286e4b86885SCheng Sean Ye 	gcpu_poll_trace_ctl_t *ptc = &gcpu->gcpu_mca.gcpu_polltrace;
28720c794b3Sgavinm 
288e4b86885SCheng Sean Ye 	ASSERT(cmi_hdl_class(hdl) == CMI_HDL_NATIVE);
28920c794b3Sgavinm 
290e4b86885SCheng Sean Ye 	gcpu_poll_trace_init(ptc);
291e4b86885SCheng Sean Ye 
292a3114836SGerry Liu 	atomic_inc_uint(&gcpu_mca_poll_inits);
293a3114836SGerry Liu }
294a3114836SGerry Liu 
295a3114836SGerry Liu /* deconfigure gcpu_mca_poll_init() */
296a3114836SGerry Liu void
gcpu_mca_poll_fini(cmi_hdl_t hdl)297a3114836SGerry Liu gcpu_mca_poll_fini(cmi_hdl_t hdl)
298a3114836SGerry Liu {
299a3114836SGerry Liu 	gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
300a3114836SGerry Liu 	gcpu_poll_trace_ctl_t *ptc = &gcpu->gcpu_mca.gcpu_polltrace;
301a3114836SGerry Liu 
302a3114836SGerry Liu 	ASSERT(cmi_hdl_class(hdl) == CMI_HDL_NATIVE);
303a3114836SGerry Liu 
304a3114836SGerry Liu 	if (gcpu_poll_trace_always && (ptc->mptc_tbufs != NULL)) {
305a3114836SGerry Liu 		kmem_free(ptc->mptc_tbufs, sizeof (gcpu_poll_trace_t) *
306a3114836SGerry Liu 		    gcpu_poll_trace_nent);
307a3114836SGerry Liu 	}
308a3114836SGerry Liu 
309a3114836SGerry Liu 	atomic_dec_uint(&gcpu_mca_poll_inits);
31020c794b3Sgavinm }
31120c794b3Sgavinm 
31220c794b3Sgavinm void
gcpu_mca_poll_start(cmi_hdl_t hdl)313e4b86885SCheng Sean Ye gcpu_mca_poll_start(cmi_hdl_t hdl)
31420c794b3Sgavinm {
315e4b86885SCheng Sean Ye 	ASSERT(cmi_hdl_class(hdl) == CMI_HDL_NATIVE);
316e4b86885SCheng Sean Ye 	gcpu_ntv_mca_poll_start();
317e4b86885SCheng Sean Ye }
31820c794b3Sgavinm 
319e4b86885SCheng Sean Ye void
gcpu_hdl_poke(cmi_hdl_t hdl)320e4b86885SCheng Sean Ye gcpu_hdl_poke(cmi_hdl_t hdl)
321e4b86885SCheng Sean Ye {
322e4b86885SCheng Sean Ye 	ASSERT(cmi_hdl_class(hdl) == CMI_HDL_NATIVE);
323e4b86885SCheng Sean Ye 	gcpu_ntv_mca_poll_wrapper(hdl, GCPU_MPT_WHAT_POKE_ERR);
32420c794b3Sgavinm }
325e3d60c9bSAdrian Frost 
326e3d60c9bSAdrian Frost void
gcpu_cmci_trap(cmi_hdl_t hdl)327e3d60c9bSAdrian Frost gcpu_cmci_trap(cmi_hdl_t hdl)
328e3d60c9bSAdrian Frost {
329e4b86885SCheng Sean Ye 	gcpu_ntv_mca_poll_wrapper(hdl, GCPU_MPT_WHAT_CMCI_ERR);
330e3d60c9bSAdrian Frost }
331