xref: /illumos-gate/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_poll_ntv.c (revision 728f047ccdb8a96a1aecc448cb87706241a08c94)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Generic x86 CPU MCA poller.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/sysmacros.h>
33 #include <sys/cyclic.h>
34 #include <sys/x86_archext.h>
35 #include <sys/mca_x86.h>
36 #include <sys/sdt.h>
37 #include <sys/cmn_err.h>
38 
39 #include "gcpu.h"
40 
41 uint_t gcpu_mca_poll_trace_nent = 100;
42 #ifdef DEBUG
43 int gcpu_mca_poll_trace_always = 1;
44 #else
45 int gcpu_mca_poll_trace_always = 0;
46 #endif
47 
48 cyclic_id_t gcpu_mca_poll_cycid;
49 hrtime_t gcpu_mca_poll_interval = NANOSEC * 10ULL;	/* tuneable */
50 
51 static kmutex_t mch_poll_lock;
52 static hrtime_t mch_poll_timestamp;
53 static cmi_hdl_t mch_poll_owner;
54 
55 /*
56  * Return nonzero of the given handle should poll the MCH.  We stick with
57  * the same handle as before unless the timestamp has not been updated
58  * for a while.  There is no need to keep a hold on the mch_poll_owner
59  * handle.
60  */
61 static int
62 gcpu_mch_pollowner(cmi_hdl_t hdl)
63 {
64 	hrtime_t now = gethrtime_waitfree();
65 	int dopoll = 0;
66 
67 	mutex_enter(&mch_poll_lock);
68 	if (now - mch_poll_timestamp > 2 * gcpu_mca_poll_interval ||
69 	    mch_poll_timestamp == 0) {
70 		mch_poll_owner = hdl;
71 		dopoll = 1;
72 	} else if (mch_poll_owner == hdl) {
73 		dopoll = 1;
74 	}
75 
76 	if (dopoll)
77 		mch_poll_timestamp = now;
78 
79 	mutex_exit(&mch_poll_lock);
80 	return (dopoll);
81 }
82 
83 static void
84 gcpu_mca_poll_trace(gcpu_mca_poll_trace_ctl_t *ptc, uint8_t what, uint8_t nerr)
85 {
86 	uint_t next;
87 	gcpu_mca_poll_trace_t *pt;
88 
89 	DTRACE_PROBE2(gcpu__mca__poll__trace, uint32_t, what, uint32_t, nerr);
90 
91 	if (ptc->mptc_tbufs == NULL)
92 		return; /* poll trace buffer is disabled */
93 
94 	next = (ptc->mptc_curtrace + 1) % gcpu_mca_poll_trace_nent;
95 	pt = &ptc->mptc_tbufs[next];
96 
97 	pt->mpt_when = 0;
98 	pt->mpt_what = what;
99 
100 	if (what == GCPU_MPT_WHAT_CYC_ERR)
101 		pt->mpt_nerr = MIN(nerr, UINT8_MAX);
102 
103 	pt->mpt_when = gethrtime_waitfree();
104 	ptc->mptc_curtrace = next;
105 }
106 
107 #ifndef	__xpv
108 /*
109  * Perform a native poll of MCA state.
110  */
111 static void
112 gcpu_ntv_mca_poll(cmi_hdl_t hdl, int what)
113 {
114 	gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
115 	gcpu_mca_t *mca = &gcpu->gcpu_mca;
116 	gcpu_mca_poll_trace_ctl_t *ptc = &gcpu->gcpu_mca.gcpu_mca_polltrace;
117 	gcpu_mce_status_t mce;
118 	int willpanic;
119 	int i;
120 	uint64_t ctl2;
121 	uint64_t bankmask;
122 
123 	ASSERT(MUTEX_HELD(&gcpu->gcpu_shared->gcpus_poll_lock));
124 
125 	/* Enable CMCI in first poll if is support */
126 	if (cmi_enable_cmci && (!mca->gcpu_mca_first_poll_cmci_enabled)) {
127 		for (i = 0; i < mca->gcpu_mca_nbanks; i++) {
128 			if (mca->gcpu_bank_cmci[i].cmci_cap) {
129 				(void) cmi_hdl_rdmsr(hdl, IA32_MSR_MC_CTL2(i),
130 				    &ctl2);
131 				ctl2 |= MSR_MC_CTL2_EN;
132 				(void) cmi_hdl_wrmsr(hdl, IA32_MSR_MC_CTL2(i),
133 				    ctl2);
134 				mca->gcpu_bank_cmci[i].cmci_enabled = 1;
135 			}
136 		}
137 		mca->gcpu_mca_first_poll_cmci_enabled = 1;
138 	}
139 
140 	if (mca->gcpu_mca_flags & GCPU_MCA_F_UNFAULTING) {
141 		int i;
142 
143 		mca->gcpu_mca_flags &= ~GCPU_MCA_F_UNFAULTING;
144 		gcpu_mca_poll_trace(ptc, GCPU_MPT_WHAT_UNFAULTING, 0);
145 
146 		/*
147 		 * On the first cyclic poll after unfaulting a CPU we
148 		 * clear the status registers; see gcpu_faulted_exit
149 		 * for details.  We don't do this if the poll was
150 		 * initiated manually (presumably from some injection
151 		 * activity).
152 		 */
153 		if (what == GCPU_MPT_WHAT_CYC_ERR) {
154 			for (i = 0; i < mca->gcpu_mca_nbanks; i++) {
155 				(void) cmi_hdl_wrmsr(hdl,
156 				    IA32_MSR_MC(i, STATUS), 0ULL);
157 			}
158 			return;
159 		}
160 	}
161 
162 	/*
163 	 * Logout errors of the MCA banks of this cpu.
164 	 */
165 	if (what == GCPU_MPT_WHAT_CMCI_ERR) {
166 		/*
167 		 * for CMCI, all banks should be scanned for log out
168 		 */
169 		bankmask = -1ULL;
170 	} else {
171 		bankmask = cms_poll_ownermask(hdl, gcpu_mca_poll_interval);
172 	}
173 	gcpu_mca_logout(hdl, NULL, bankmask, &mce, B_TRUE, what);
174 
175 	gcpu_mca_poll_trace(ptc, what, mce.mce_nerr);
176 	mca->gcpu_mca_lastpoll = gethrtime_waitfree();
177 
178 	willpanic = mce.mce_disp & CMI_ERRDISP_FORCEFATAL && cmi_panic_on_ue();
179 
180 	if (what != GCPU_MPT_WHAT_CMCI_ERR) {
181 		/*
182 		 * Call to the memory-controller driver which may report some
183 		 * errors not visible under the MCA (for off-chip NB).
184 		 * Since there is typically a single MCH we arrange that
185 		 * just one cpu perform this task at each cyclic fire.
186 		 */
187 		if (gcpu_mch_pollowner(hdl))
188 			cmi_mc_logout(hdl, 0, willpanic);
189 	}
190 
191 	/*
192 	 * In the common case any polled error is considered non-fatal,
193 	 * even if it indicates PCC or UC etc.  The only condition on which
194 	 * we will panic for a polled error is if model-specific support
195 	 * forces the error to be terminal regardless of how it is
196 	 * encountered.
197 	 */
198 	if (willpanic) {
199 #ifdef DEBUG
200 		cmn_err(CE_WARN, "MCA Poll: %u errors, disp=0x%llx, "
201 		    "%u PCC (%u ok), "
202 		    "%u UC (%u ok, %u poisoned), "
203 		    "%u forcefatal, %u ignored",
204 		    mce.mce_nerr, (u_longlong_t)mce.mce_disp,
205 		    mce.mce_npcc, mce.mce_npcc_ok,
206 		    mce.mce_nuc, mce.mce_nuc_ok, mce.mce_nuc_poisoned,
207 		    mce.mce_forcefatal, mce.mce_ignored);
208 
209 #endif
210 		fm_panic("Unrecoverable Machine-Check Exception (Polled)");
211 	}
212 }
213 
214 /*
215  * See gcpu_mca_trap for an explanation of why preemption is disabled here.
216  * Note that we disable preemption and then contend for an adaptive mutex -
217  * we could block during the mutex operation, but once we return with the
218  * mutex held we nust perform no operation that can block and we cannot
219  * be preempted so we will stay on cpu for the duration.  The disabling
220  * of preemption also means we cannot migrate cpus once we have returned
221  * with the mutex held - cyclic invocations can't migrate, anyway, but
222  * others could if they have failed to bind before this point.
223  */
224 static void
225 gcpu_ntv_mca_poll_wrapper(cmi_hdl_t hdl, int what)
226 {
227 	gcpu_data_t *gcpu;
228 
229 	if (hdl == NULL || (gcpu = cmi_hdl_getcmidata(hdl)) == NULL ||
230 	    gcpu->gcpu_mca.gcpu_mca_lgsz == 0)
231 		return;
232 
233 	kpreempt_disable();
234 	mutex_enter(&gcpu->gcpu_shared->gcpus_poll_lock);
235 	gcpu_ntv_mca_poll(hdl, what);
236 	mutex_exit(&gcpu->gcpu_shared->gcpus_poll_lock);
237 	kpreempt_enable();
238 }
239 
240 static void
241 gcpu_ntv_mca_poll_cyclic(void *arg)
242 {
243 	gcpu_ntv_mca_poll_wrapper((cmi_hdl_t)arg, GCPU_MPT_WHAT_CYC_ERR);
244 }
245 
246 
247 /*ARGSUSED*/
248 static void
249 gcpu_ntv_mca_poll_online(void *arg, cpu_t *cp, cyc_handler_t *cyh,
250     cyc_time_t *cyt)
251 {
252 	cmi_hdl_t hdl;
253 
254 	/*
255 	 * Lookup and hold a handle for this cpu (any hold released in
256 	 * our offline function).  If we chose not to initialize a handle
257 	 * for this cpu back at cmi_init time then this lookup will return
258 	 * NULL, so the cyh_func we appoint must be prepared for that.
259 	 */
260 	hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
261 	    cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
262 
263 	cyt->cyt_when = 0;
264 	cyt->cyt_interval = gcpu_mca_poll_interval;
265 	cyh->cyh_func = gcpu_ntv_mca_poll_cyclic;
266 	cyh->cyh_arg = (void *)hdl;
267 	cyh->cyh_level = CY_LOW_LEVEL;
268 }
269 
270 /*ARGSUSED*/
271 static void
272 gcpu_ntv_mca_poll_offline(void *arg, cpu_t *cpu, void *cyh_arg)
273 {
274 	cmi_hdl_t hdl = (cmi_hdl_t)cyh_arg;
275 
276 	if (hdl != NULL)
277 		cmi_hdl_rele(hdl);
278 }
279 #endif	/* __xpv */
280 
281 /*
282  * gcpu_mca_poll_init is called from gcpu_mca_init for each cpu handle
283  * that we initialize for.  It should prepare for polling by allocating
284  * control structures and the like, but must not kick polling off yet.
285  *
286  * In the native case our polling technique (see gcpu_mca_poll_start) will
287  * be to install an omnipresent cyclic to fire on all online cpus (cpu_t),
288  * and they will poll the real hardware beneath them.
289  *
290  * In the xVM MCA case the hypervisor performs polling and makes telemetry
291  * available to dom0 -  a cyclic on each virtual cpu is inappropriate.
292  * Instead we will create a single unbound cyclic which will consume the
293  * hypervisor-provided telemetry when it fires, and submit it into
294  * common logging code.
295  */
296 
297 static int gcpu_mca_poll_inits;
298 
299 void
300 gcpu_mca_poll_init(cmi_hdl_t hdl)
301 {
302 	gcpu_mca_poll_trace_t *tbufs = NULL;
303 
304 	switch (cmi_hdl_class(hdl)) {
305 	case CMI_HDL_NATIVE: {
306 		gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
307 		gcpu_mca_t *mca = &gcpu->gcpu_mca;
308 
309 		if (gcpu_mca_poll_trace_always) {
310 			tbufs = kmem_zalloc(sizeof (gcpu_mca_poll_trace_t) *
311 			    gcpu_mca_poll_trace_nent, KM_SLEEP);
312 		}
313 		mca->gcpu_mca_polltrace.mptc_tbufs = tbufs;
314 		mca->gcpu_mca_polltrace.mptc_curtrace = 0;
315 		gcpu_mca_poll_inits++;
316 		break;
317 	}
318 
319 	case CMI_HDL_SOLARIS_xVM_MCA:
320 		/*
321 		 * Implementation should move the kmem_alloc above to before
322 		 * the switch, and stash the trace buffer and current record
323 		 * pointer in a static structure.  This should be done
324 		 * just once, despite this init function potentially being
325 		 * called multiple times.
326 		 */
327 		/*FALLTHRU*/
328 
329 	default:
330 		break;
331 	}
332 }
333 
334 static void
335 gcpu_ntv_mca_poll_start(void)
336 {
337 #ifndef	__xpv
338 	cyc_omni_handler_t cyo;
339 
340 	if (gcpu_mca_poll_interval == 0 || gcpu_mca_poll_inits == 0)
341 		return;
342 
343 	cyo.cyo_online = gcpu_ntv_mca_poll_online;
344 	cyo.cyo_offline = gcpu_ntv_mca_poll_offline;
345 	cyo.cyo_arg = NULL;
346 
347 	mutex_enter(&cpu_lock);
348 	gcpu_mca_poll_cycid = cyclic_add_omni(&cyo);
349 	mutex_exit(&cpu_lock);
350 #endif	/* __xpv */
351 }
352 
353 void
354 gcpu_mca_poll_start(cmi_hdl_t hdl)
355 {
356 	switch (cmi_hdl_class(hdl)) {
357 	case CMI_HDL_NATIVE:
358 		gcpu_ntv_mca_poll_start();
359 		break;
360 
361 	case CMI_HDL_SOLARIS_xVM_MCA:
362 		/*
363 		 * Implementation should call a new function to install
364 		 * an unbound cyclic that will process hypervisor-provided
365 		 * telemetry.
366 		 */
367 		/*FALLTHRU*/
368 
369 	default:
370 		break;
371 	}
372 }
373 
374 void
375 gcpu_hdl_poke(cmi_hdl_t hdl)
376 {
377 	switch (cmi_hdl_class(hdl)) {
378 	case CMI_HDL_NATIVE:
379 		gcpu_ntv_mca_poll_wrapper(hdl, GCPU_MPT_WHAT_POKE_ERR);
380 		break;
381 
382 	case CMI_HDL_SOLARIS_xVM_MCA:
383 		/*
384 		 * Implementation will call the xPV poll wrapper.
385 		 */
386 	default:
387 		break;
388 	}
389 }
390 
391 void
392 gcpu_cmci_trap(cmi_hdl_t hdl)
393 {
394 	switch (cmi_hdl_class(hdl)) {
395 	case CMI_HDL_NATIVE:
396 		gcpu_ntv_mca_poll_wrapper(hdl, GCPU_MPT_WHAT_CMCI_ERR);
397 		break;
398 
399 	case CMI_HDL_SOLARIS_xVM_MCA:
400 		/*
401 		 * Implementation will call the xPV poll wrapper.
402 		 */
403 	default:
404 		break;
405 	}
406 }
407