1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2018, Joyent, Inc.
25 */
26
27#ifndef _GCPU_H
28#define	_GCPU_H
29
30#include <sys/types.h>
31#include <sys/cpu_module_impl.h>
32#include <sys/cpu_module_ms.h>
33#include <sys/ksynch.h>
34#include <sys/systm.h>
35#include <sys/fm/util.h>
36
37#ifdef __cplusplus
38extern "C" {
39#endif
40
41#define	GCPU_MCA_ERRS_PERCPU	10	/* errorq slots per cpu */
42#define	GCPU_MCA_MIN_ERRORS	30	/* minimum total errorq slots */
43#define	GCPU_MCA_MAX_ERRORS	100	/* maximum total errorq slots */
44
45typedef struct gcpu_data gcpu_data_t;
46
47#define	GCPU_ERRCODE_MASK_ALL		0xffff
48
49typedef struct gcpu_error_disp {
50	const char *ged_class_fmt;	/* ereport class formatter (last bit) */
51	const char *ged_compound_fmt;	/* compound error formatter */
52	uint64_t ged_ereport_members;	/* ereport payload members */
53	uint16_t ged_errcode_mask_on;	/* errcode bits that must be set ... */
54	uint16_t ged_errcode_mask_off;	/* ... and must be clear for a match */
55} gcpu_error_disp_t;
56
57/*
58 * For errorq_dispatch we need to have a single contiguous structure
59 * capturing all our logout data.  We do not know in advance how many
60 * error detector banks there are in this cpu model, so we'll manually
61 * allocate additional space for the gcl_banks array below.
62 */
63typedef struct gcpu_bank_logout {
64	uint64_t gbl_status;		/* MCi_STATUS value */
65	uint64_t gbl_addr;		/* MCi_ADDR value */
66	uint64_t gbl_misc;		/* MCi_MISC value */
67	uint64_t gbl_disp;		/* Error disposition for this bank */
68	uint32_t gbl_clrdefcnt;		/* Count of deferred status clears */
69} gcpu_bank_logout_t;
70
71/*
72 * The data structure we "logout" all error telemetry from all banks of
73 * a cpu to.  The gcl_data array declared with 1 member below will actually
74 * have gcl_nbanks members - variable with the actual cpu model present.
75 * After the gcl_data array there is a further model-specific array that
76 * may be allocated, and gcl_ms_logout will point to that if present.
77 * This cpu logout data must form one contiguous chunk of memory for
78 * dispatch with errorq_dispatch.
79 */
80typedef struct gcpu_logout {
81	gcpu_data_t *gcl_gcpu;		/* pointer to per-cpu gcpu_data_t */
82	uintptr_t gcl_ip;		/* instruction pointer from #mc trap */
83	uint64_t gcl_timestamp;		/* gethrtime() at logout */
84	uint64_t gcl_mcg_status;	/* MCG_STATUS register value */
85	uint64_t gcl_flags;		/* Flags */
86	pc_t gcl_stack[FM_STK_DEPTH];	/* saved stack trace, if any */
87	int gcl_stackdepth;		/* saved stack trace depth */
88	int ismc;			/* is a machine check flag */
89	int gcl_nbanks;			/* number of banks in array below */
90	void *gcl_ms_logout;		/* Model-specific area after gcl_data */
91	gcpu_bank_logout_t gcl_data[1];	/* Bank logout areas - must be last */
92} gcpu_logout_t;
93
94/*
95 * gcl_flag values
96 */
97#define	GCPU_GCL_F_PRIV		0x1	/* #MC during privileged code */
98#define	GCPU_GCL_F_TES_P	0x2	/* MCG_CAP indicates TES_P */
99
100struct gcpu_bios_bankcfg {
101	uint64_t bios_bank_ctl;
102	uint64_t bios_bank_status;
103	uint64_t bios_bank_addr;
104	uint64_t bios_bank_misc;
105};
106
107struct gcpu_bios_cfg {
108	uint64_t bios_mcg_cap;
109	uint64_t bios_mcg_ctl;
110	struct gcpu_bios_bankcfg *bios_bankcfg;
111};
112
113/*
114 * Events types in poll trace records.  Keep these in sync with
115 * the generic cpu mdb module names for each (see gcpu_mpt_dump in mdb).
116 */
117#define	GCPU_MPT_WHAT_CYC_ERR		0	/* cyclic-induced poll */
118#define	GCPU_MPT_WHAT_POKE_ERR		1	/* manually-induced poll */
119#define	GCPU_MPT_WHAT_UNFAULTING	2	/* discarded error state */
120#define	GCPU_MPT_WHAT_MC_ERR		3	/* MC# */
121#define	GCPU_MPT_WHAT_CMCI_ERR		4	/* CMCI interrupt */
122#define	GCPU_MPT_WHAT_XPV_VIRQ		5	/* MCA_VIRQ in dom0 */
123#define	GCPU_MPT_WHAT_XPV_VIRQ_LOGOUT	6	/* MCA_VIRQ logout complete */
124
125typedef struct gcpu_poll_trace {
126	hrtime_t mpt_when;		/* timestamp of event */
127	uint8_t mpt_what;		/* GCPU_MPT_WHAT_* (which event?) */
128	uint8_t mpt_nerr;		/* number of errors discovered */
129	uint16_t mpt_pad1;
130	uint32_t mpt_pad2;
131} gcpu_poll_trace_t;
132
133typedef struct gcpu_poll_trace_ctl {
134	gcpu_poll_trace_t *mptc_tbufs;	/* trace buffers */
135	uint_t mptc_curtrace;			/* last buffer filled */
136} gcpu_poll_trace_ctl_t;
137
138
139/*
140 * For counting some of the important number or time for runtime
141 * cmci enable/disable
142 */
143typedef struct gcpu_mca_cmci {
144	uint32_t cmci_cap;	/* cmci capability for this bank */
145	uint32_t ncmci;		/* number of correctable errors between polls */
146	uint32_t drtcmci;	/* duration of no cmci when cmci is disabled */
147	uint32_t cmci_enabled;	/* cmci enable/disable status for this bank */
148} gcpu_mca_cmci_t;
149
150/* Index for gcpu_mca_logout array below */
151#define	GCPU_MCA_LOGOUT_EXCEPTION	0	/* area for #MC */
152#define	GCPU_MCA_LOGOUT_POLLER_1	1	/* next/prev poll area */
153#define	GCPU_MCA_LOGOUT_POLLER_2	2	/* prev/next poll area */
154#define	GCPU_MCA_LOGOUT_NUM		3
155
156typedef struct gcpu_mca {
157	gcpu_logout_t *gcpu_mca_logout[GCPU_MCA_LOGOUT_NUM];
158	uint32_t gcpu_mca_nextpoll_idx;	/* logout area for next poll */
159	struct gcpu_bios_cfg gcpu_mca_bioscfg;
160	uint_t gcpu_mca_nbanks;
161	size_t gcpu_mca_lgsz;		/* size of gcpu_mca_logout structs */
162	uint_t gcpu_mca_flags;		/* GCPU_MCA_F_* */
163	hrtime_t gcpu_mca_lastpoll;
164	gcpu_poll_trace_ctl_t gcpu_polltrace;
165	uint32_t gcpu_mca_first_poll_cmci_enabled; /* cmci on in first poll */
166	gcpu_mca_cmci_t *gcpu_bank_cmci;
167} gcpu_mca_t;
168
169typedef struct gcpu_mce_status {
170	uint_t mce_nerr;	/* total errors found in logout of all banks */
171	uint64_t mce_disp;	/* Disposition information */
172	uint_t mce_npcc;	/* number of errors with PCC */
173	uint_t mce_npcc_ok;	/* PCC with CMS_ERRSCOPE_CURCONTEXT_OK */
174	uint_t mce_nuc;		/* number of errors with UC */
175	uint_t mce_nuc_ok;	/* UC with CMS_ERRSCOPE_CLEARED_UC */
176	uint_t mce_nuc_poisoned; /* UC with CMS_ERRSCOPE_POISONED */
177	uint_t mce_forcefatal;	/* CMS_ERRSCOPE_FORCE_FATAL */
178	uint_t mce_ignored;	/* CMS_ERRSCOPE_IGNORE_ERR */
179} gcpu_mce_status_t;
180
181/*
182 * Flags for gcpu_mca_flags
183 */
184#define	GCPU_MCA_F_UNFAULTING		0x1	/* CPU exiting faulted state */
185#define	GCPU_MCA_F_CMCI_CAPABLE		0x2	/* CPU supports CMCI */
186#define	GCPU_MCA_F_CMCI_ENABLE		0x4	/* CPU CMCI enabled */
187
188/*
189 * State shared by all cpus on a chip
190 */
191struct gcpu_chipshared {
192	kmutex_t gcpus_cfglock;		/* serial MCA config from chip cores */
193	kmutex_t gcpus_poll_lock;	/* serialize pollers on the same chip */
194	uint32_t gcpus_actv_banks;	/* MCA bank numbers active on chip */
195	volatile uint32_t gcpus_actv_cnt; /* active cpu count in this chip */
196	char *gcpus_ident;		/* ident string, if available */
197};
198
199struct gcpu_data {
200	gcpu_mca_t gcpu_mca;			/* MCA state for this CPU */
201	cmi_hdl_t gcpu_hdl;			/* associated handle */
202	struct gcpu_chipshared *gcpu_shared;	/* Shared state for the chip */
203};
204
205#ifdef _KERNEL
206
207struct regs;
208
209/*
210 * CMI implementation
211 */
212extern int gcpu_init(cmi_hdl_t, void **);
213extern void gcpu_fini(cmi_hdl_t);
214extern void gcpu_post_startup(cmi_hdl_t);
215extern void gcpu_post_mpstartup(cmi_hdl_t);
216extern void gcpu_faulted_enter(cmi_hdl_t);
217extern void gcpu_faulted_exit(cmi_hdl_t);
218extern void gcpu_mca_init(cmi_hdl_t);
219extern void gcpu_mca_fini(cmi_hdl_t);
220extern void gcpu_mca_cmci_enable(cmi_hdl_t);
221extern cmi_errno_t gcpu_msrinject(cmi_hdl_t, cmi_mca_regs_t *, uint_t, int);
222#ifndef __xpv
223extern uint64_t gcpu_mca_trap(cmi_hdl_t, struct regs *);
224extern void gcpu_cmci_trap(cmi_hdl_t);
225extern void gcpu_hdl_poke(cmi_hdl_t);
226#else
227extern void gcpu_xpv_panic_callback(void);
228#endif
229
230/*
231 * Local functions
232 */
233extern void gcpu_mca_poll_init(cmi_hdl_t);
234extern void gcpu_mca_poll_fini(cmi_hdl_t);
235extern void gcpu_mca_poll_start(cmi_hdl_t);
236extern void gcpu_poll_trace_init(gcpu_poll_trace_ctl_t *);
237extern void gcpu_poll_trace(gcpu_poll_trace_ctl_t *, uint8_t, uint8_t);
238extern void gcpu_mca_logout(cmi_hdl_t, struct regs *, uint64_t,
239    gcpu_mce_status_t *, boolean_t, int);
240#ifdef __xpv
241extern void gcpu_xpv_mca_init(int);
242#endif /* __xpv */
243
244#endif /* _KERNEL */
245
246#ifdef __cplusplus
247}
248#endif
249
250#endif /* _GCPU_H */
251