gcpu_poll_ntv.c revision 918e0d92ec24e67f572737a68faf135dc6409d26
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 * Copyright (c) 2018, Joyent, Inc.
26 */
27/*
28 * Copyright (c) 2010, Intel Corporation.
29 * All rights reserved.
30 */
31
32/*
33 * Native MCA polling.  We establish an ommipresent cyclic to fire on all
34 * online cpus to check their MCA state and log any valid errors for
35 * diagnosis.
36 */
37
38#include <sys/types.h>
39#include <sys/atomic.h>
40#include <sys/cyclic.h>
41#include <sys/x86_archext.h>
42#include <sys/mca_x86.h>
43
44#include "gcpu.h"
45
46hrtime_t gcpu_mca_poll_interval = NANOSEC * 10ULL;	/* tuneable */
47static cyclic_id_t gcpu_mca_poll_cycid;
48static volatile uint_t gcpu_mca_poll_inits;
49extern int gcpu_poll_trace_always;
50extern uint_t gcpu_poll_trace_nent;
51
52/*
53 * Return nonzero of the given handle should poll the MCH.  We stick with
54 * the same handle as before unless the timestamp has not been updated
55 * for a while.  There is no need to keep a hold on the mch_poll_owner
56 * handle.
57 */
58
59static kmutex_t mch_poll_lock;
60static hrtime_t mch_poll_timestamp;
61static cmi_hdl_t mch_poll_owner;
62
63static int
64mch_pollowner(cmi_hdl_t hdl)
65{
66	hrtime_t now = gethrtime_waitfree();
67	int dopoll = 0;
68
69	mutex_enter(&mch_poll_lock);
70	if (now - mch_poll_timestamp > 2 * gcpu_mca_poll_interval ||
71	    mch_poll_timestamp == 0) {
72		mch_poll_owner = hdl;
73		dopoll = 1;
74	} else if (mch_poll_owner == hdl) {
75		dopoll = 1;
76	}
77
78	if (dopoll)
79		mch_poll_timestamp = now;
80
81	mutex_exit(&mch_poll_lock);
82	return (dopoll);
83}
84
85
86static void
87gcpu_ntv_mca_poll(cmi_hdl_t hdl, int what)
88{
89	gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
90	gcpu_mca_t *mca = &gcpu->gcpu_mca;
91	gcpu_mce_status_t mce;
92	int willpanic;
93	uint64_t bankmask;
94
95	ASSERT(MUTEX_HELD(&gcpu->gcpu_shared->gcpus_poll_lock));
96
97	/* Enable CMCI in first poll if is supported */
98	if ((mca->gcpu_mca_flags & GCPU_MCA_F_CMCI_ENABLE) != 0 &&
99	    (!mca->gcpu_mca_first_poll_cmci_enabled)) {
100		int i;
101		uint64_t ctl2;
102
103		for (i = 0; i < mca->gcpu_mca_nbanks; i++) {
104			if (mca->gcpu_bank_cmci[i].cmci_cap) {
105				(void) cmi_hdl_rdmsr(hdl, IA32_MSR_MC_CTL2(i),
106				    &ctl2);
107				ctl2 |= MSR_MC_CTL2_EN;
108				(void) cmi_hdl_wrmsr(hdl, IA32_MSR_MC_CTL2(i),
109				    ctl2);
110				mca->gcpu_bank_cmci[i].cmci_enabled = 1;
111			}
112		}
113		mca->gcpu_mca_first_poll_cmci_enabled = 1;
114	}
115
116	if (mca->gcpu_mca_flags & GCPU_MCA_F_UNFAULTING) {
117		int i;
118
119		mca->gcpu_mca_flags &= ~GCPU_MCA_F_UNFAULTING;
120		gcpu_poll_trace(&gcpu->gcpu_mca.gcpu_polltrace,
121		    GCPU_MPT_WHAT_UNFAULTING, 0);
122
123		/*
124		 * On the first cyclic poll after unfaulting a CPU we
125		 * clear the status registers; see gcpu_faulted_exit
126		 * for details.  We don't do this if the poll was
127		 * initiated manually (presumably from some injection
128		 * activity).
129		 */
130		if (what == GCPU_MPT_WHAT_CYC_ERR) {
131			for (i = 0; i < mca->gcpu_mca_nbanks; i++) {
132				(void) cmi_hdl_wrmsr(hdl,
133				    IA32_MSR_MC(i, STATUS), 0ULL);
134			}
135			return;
136		}
137	}
138
139	/*
140	 * Logout errors of the MCA banks of this cpu.
141	 */
142	if (what == GCPU_MPT_WHAT_CMCI_ERR) {
143		/*
144		 * for CMCI, all banks should be scanned for log out
145		 */
146		bankmask = -1ULL;
147	} else {
148		bankmask = cms_poll_ownermask(hdl, gcpu_mca_poll_interval);
149	}
150	gcpu_mca_logout(hdl, NULL, bankmask, &mce, B_TRUE, what);
151
152	if (mce.mce_nerr != 0)
153		gcpu_poll_trace(&gcpu->gcpu_mca.gcpu_polltrace, what,
154		    mce.mce_nerr);
155
156	mca->gcpu_mca_lastpoll = gethrtime_waitfree();
157
158	willpanic = mce.mce_disp & CMI_ERRDISP_FORCEFATAL && cmi_panic_on_ue();
159
160	if (what != GCPU_MPT_WHAT_CMCI_ERR) {
161		/*
162		 * Call to the memory-controller driver which may report some
163		 * errors not visible under the MCA (for off-chip NB).
164		 * Since there is typically a single MCH we arrange that
165		 * just one cpu perform this task at each cyclic fire.
166		 */
167		if (mch_pollowner(hdl))
168			cmi_mc_logout(hdl, 0, willpanic);
169	}
170
171	/*
172	 * In the common case any polled error is considered non-fatal,
173	 * even if it indicates PCC or UC etc.  The only condition on which
174	 * we will panic for a polled error is if model-specific support
175	 * forces the error to be terminal regardless of how it is
176	 * encountered.
177	 */
178	if (willpanic) {
179#ifdef DEBUG
180		cmn_err(CE_WARN, "MCA Poll: %u errors, disp=0x%llx, "
181		    "%u PCC (%u ok), "
182		    "%u UC (%u ok, %u poisoned), "
183		    "%u forcefatal, %u ignored",
184		    mce.mce_nerr, (u_longlong_t)mce.mce_disp,
185		    mce.mce_npcc, mce.mce_npcc_ok,
186		    mce.mce_nuc, mce.mce_nuc_ok, mce.mce_nuc_poisoned,
187		    mce.mce_forcefatal, mce.mce_ignored);
188
189#endif
190		fm_panic("Unrecoverable Machine-Check Exception (Polled)");
191	}
192}
193
194/*
195 * See gcpu_mca_trap for an explanation of why preemption is disabled here.
196 * Note that we disable preemption and then contend for an adaptive mutex -
197 * we could block during the mutex operation, but once we return with the
198 * mutex held we nust perform no operation that can block and we cannot
199 * be preempted so we will stay on cpu for the duration.  The disabling
200 * of preemption also means we cannot migrate cpus once we have returned
201 * with the mutex held - cyclic invocations can't migrate, anyway, but
202 * others could if they have failed to bind before this point.
203 */
204static void
205gcpu_ntv_mca_poll_wrapper(cmi_hdl_t hdl, int what)
206{
207	gcpu_data_t *gcpu;
208
209	if (hdl == NULL || (gcpu = cmi_hdl_getcmidata(hdl)) == NULL ||
210	    gcpu->gcpu_mca.gcpu_mca_lgsz == 0)
211		return;
212
213	kpreempt_disable();
214	mutex_enter(&gcpu->gcpu_shared->gcpus_poll_lock);
215	gcpu_ntv_mca_poll(hdl, what);
216	mutex_exit(&gcpu->gcpu_shared->gcpus_poll_lock);
217	kpreempt_enable();
218}
219
220static void
221gcpu_ntv_mca_poll_cyclic(void *arg)
222{
223	gcpu_ntv_mca_poll_wrapper((cmi_hdl_t)arg, GCPU_MPT_WHAT_CYC_ERR);
224}
225
226/*ARGSUSED*/
227static void
228gcpu_ntv_mca_poll_online(void *arg, cpu_t *cp, cyc_handler_t *cyh,
229    cyc_time_t *cyt)
230{
231	cmi_hdl_t hdl;
232
233	/*
234	 * Lookup and hold a handle for this cpu (any hold released in
235	 * our offline function).  If we chose not to initialize a handle
236	 * for this cpu back at cmi_init time then this lookup will return
237	 * NULL, so the cyh_func we appoint must be prepared for that.
238	 */
239	hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
240	    cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
241
242	cyt->cyt_when = 0;
243	cyt->cyt_interval = gcpu_mca_poll_interval;
244	cyh->cyh_func = gcpu_ntv_mca_poll_cyclic;
245	cyh->cyh_arg = (void *)hdl;
246	cyh->cyh_level = CY_LOW_LEVEL;
247}
248
249/*ARGSUSED*/
250static void
251gcpu_ntv_mca_poll_offline(void *arg, cpu_t *cpu, void *cyh_arg)
252{
253	cmi_hdl_t hdl = (cmi_hdl_t)cyh_arg;
254
255	if (hdl != NULL)
256		cmi_hdl_rele(hdl);
257}
258
259static void
260gcpu_ntv_mca_poll_start(void)
261{
262	cyc_omni_handler_t cyo;
263
264	if (gcpu_mca_poll_interval == 0 || gcpu_mca_poll_inits == 0)
265		return;
266
267	cyo.cyo_online = gcpu_ntv_mca_poll_online;
268	cyo.cyo_offline = gcpu_ntv_mca_poll_offline;
269	cyo.cyo_arg = NULL;
270
271	mutex_enter(&cpu_lock);
272	gcpu_mca_poll_cycid = cyclic_add_omni(&cyo);
273	mutex_exit(&cpu_lock);
274}
275
276/*
277 * gcpu_mca_poll_init is called from gcpu_mca_init for each cpu handle
278 * that we initialize for.  It should prepare for polling by allocating
279 * control structures and the like, but must not kick polling off yet.
280 */
281
282void
283gcpu_mca_poll_init(cmi_hdl_t hdl)
284{
285	gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
286	gcpu_poll_trace_ctl_t *ptc = &gcpu->gcpu_mca.gcpu_polltrace;
287
288	ASSERT(cmi_hdl_class(hdl) == CMI_HDL_NATIVE);
289
290	gcpu_poll_trace_init(ptc);
291
292	atomic_inc_uint(&gcpu_mca_poll_inits);
293}
294
295/* deconfigure gcpu_mca_poll_init() */
296void
297gcpu_mca_poll_fini(cmi_hdl_t hdl)
298{
299	gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
300	gcpu_poll_trace_ctl_t *ptc = &gcpu->gcpu_mca.gcpu_polltrace;
301
302	ASSERT(cmi_hdl_class(hdl) == CMI_HDL_NATIVE);
303
304	if (gcpu_poll_trace_always && (ptc->mptc_tbufs != NULL)) {
305		kmem_free(ptc->mptc_tbufs, sizeof (gcpu_poll_trace_t) *
306		    gcpu_poll_trace_nent);
307	}
308
309	atomic_dec_uint(&gcpu_mca_poll_inits);
310}
311
312void
313gcpu_mca_poll_start(cmi_hdl_t hdl)
314{
315	ASSERT(cmi_hdl_class(hdl) == CMI_HDL_NATIVE);
316	gcpu_ntv_mca_poll_start();
317}
318
319void
320gcpu_hdl_poke(cmi_hdl_t hdl)
321{
322	ASSERT(cmi_hdl_class(hdl) == CMI_HDL_NATIVE);
323	gcpu_ntv_mca_poll_wrapper(hdl, GCPU_MPT_WHAT_POKE_ERR);
324}
325
326void
327gcpu_cmci_trap(cmi_hdl_t hdl)
328{
329	gcpu_ntv_mca_poll_wrapper(hdl, GCPU_MPT_WHAT_CMCI_ERR);
330}
331