xref: /illumos-gate/usr/src/uts/i86pc/cpu/generic_cpu/gcpu.h (revision e3d60c9bd991a9826cbfa63b10595d44e123b9c4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef _GCPU_H
28 #define	_GCPU_H
29 
30 #include <sys/types.h>
31 #include <sys/cpu_module_impl.h>
32 #include <sys/cpu_module_ms.h>
33 #include <sys/ksynch.h>
34 #include <sys/systm.h>
35 #include <sys/fm/util.h>
36 
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40 
41 #define	GCPU_MCA_ERRS_PERCPU	10	/* errorq slots per cpu */
42 #define	GCPU_MCA_MIN_ERRORS	30	/* minimum total errorq slots */
43 #define	GCPU_MCA_MAX_ERRORS	100	/* maximum total errorq slots */
44 
45 typedef struct gcpu_data gcpu_data_t;
46 
47 #define	GCPU_ERRCODE_MASK_ALL		0xffff
48 
49 typedef struct gcpu_error_disp {
50 	const char *ged_class_fmt;	/* ereport class formatter (last bit) */
51 	const char *ged_compound_fmt;	/* compound error formatter */
52 	uint64_t ged_ereport_members;	/* ereport payload members */
53 	uint16_t ged_errcode_mask_on;	/* errcode bits that must be set ... */
54 	uint16_t ged_errcode_mask_off;	/* ... and must be clear for a match */
55 } gcpu_error_disp_t;
56 
57 /*
58  * For errorq_dispatch we need to have a single contiguous structure
59  * capturing all our logout data.  We do not know in advance how many
60  * error detector banks there are in this cpu model, so we'll manually
61  * allocate additional space for the gcl_banks array below.
62  */
63 typedef struct gcpu_bank_logout {
64 	uint64_t gbl_status;		/* MCi_STATUS value */
65 	uint64_t gbl_addr;		/* MCi_ADDR value */
66 	uint64_t gbl_misc;		/* MCi_MISC value */
67 	uint64_t gbl_disp;		/* Error disposition for this bank */
68 	uint32_t gbl_clrdefcnt;		/* Count of deferred status clears */
69 } gcpu_bank_logout_t;
70 
71 /*
72  * The data structure we "logout" all error telemetry from all banks of
73  * a cpu to.  The gcl_data array declared with 1 member below will actually
74  * have gcl_nbanks members - variable with the actual cpu model present.
75  * After the gcl_data array there is a further model-specific array that
76  * may be allocated, and gcl_ms_logout will point to that if present.
77  * This cpu logout data must form one contiguous chunk of memory for
78  * dispatch with errorq_dispatch.
79  */
80 typedef struct gcpu_logout {
81 	gcpu_data_t *gcl_gcpu;		/* pointer to per-cpu gcpu_data_t */
82 	uintptr_t gcl_ip;		/* instruction pointer from #mc trap */
83 	uint64_t gcl_timestamp;		/* gethrtime() at logout */
84 	uint64_t gcl_mcg_status;	/* MCG_STATUS register value */
85 	uint64_t gcl_flags;		/* Flags */
86 	pc_t gcl_stack[FM_STK_DEPTH];	/* saved stack trace, if any */
87 	int gcl_stackdepth;		/* saved stack trace depth */
88 	int gcl_nbanks;			/* number of banks in array below */
89 	void *gcl_ms_logout;		/* Model-specific area after gcl_data */
90 	gcpu_bank_logout_t gcl_data[1];	/* Bank logout areas - must be last */
91 } gcpu_logout_t;
92 
93 /*
94  * gcl_flag values
95  */
96 #define	GCPU_GCL_F_PRIV		0x1	/* #MC during privileged code */
97 #define	GCPU_GCL_F_TES_P	0x2	/* MCG_CAP indicates TES_P */
98 
99 struct gcpu_bios_bankcfg {
100 	uint64_t bios_bank_ctl;
101 	uint64_t bios_bank_status;
102 	uint64_t bios_bank_addr;
103 	uint64_t bios_bank_misc;
104 };
105 
106 struct gcpu_bios_cfg {
107 	uint64_t bios_mcg_cap;
108 	uint64_t bios_mcg_ctl;
109 	struct gcpu_bios_bankcfg *bios_bankcfg;
110 };
111 
112 #define	GCPU_MPT_WHAT_CYC_ERR		0	/* cyclic-induced poll */
113 #define	GCPU_MPT_WHAT_POKE_ERR		1	/* manually-induced poll */
114 #define	GCPU_MPT_WHAT_UNFAULTING	2	/* discarded error state */
115 #define	GCPU_MPT_WHAT_MC_ERR		3	/* MC# */
116 #define	GCPU_MPT_WHAT_CMCI_ERR		4	/* CMCI interrupt */
117 
118 typedef struct gcpu_mca_poll_trace {
119 	hrtime_t mpt_when;		/* timestamp of event */
120 	uint8_t mpt_what;		/* GCPU_MPT_WHAT_* (which event?) */
121 	uint8_t mpt_nerr;		/* number of errors discovered */
122 	uint16_t mpt_pad1;
123 	uint32_t mpt_pad2;
124 } gcpu_mca_poll_trace_t;
125 
126 typedef struct gcpu_mca_poll_trace_ctl {
127 	gcpu_mca_poll_trace_t *mptc_tbufs;	/* trace buffers */
128 	uint_t mptc_curtrace;			/* last buffer filled */
129 } gcpu_mca_poll_trace_ctl_t;
130 
131 
132 /*
133  * For counting some of the important number or time for runtime
134  * cmci enable/disable
135  */
136 typedef struct gcpu_mca_cmci {
137 	uint32_t cmci_cap;	/* cmci capability for this bank */
138 	uint32_t ncmci;		/* number of correctable errors between polls */
139 	uint32_t drtcmci;	/* duration of no cmci when cmci is disabled */
140 	uint32_t cmci_enabled;	/* cmci enable/disable status for this bank */
141 } gcpu_mca_cmci_t;
142 
143 /* Index for gcpu_mca_logout array below */
144 #define	GCPU_MCA_LOGOUT_EXCEPTION	0	/* area for #MC */
145 #define	GCPU_MCA_LOGOUT_POLLER_1	1	/* next/prev poll area */
146 #define	GCPU_MCA_LOGOUT_POLLER_2	2	/* prev/next poll area */
147 #define	GCPU_MCA_LOGOUT_NUM		3
148 
149 typedef struct gcpu_mca {
150 	gcpu_logout_t *gcpu_mca_logout[GCPU_MCA_LOGOUT_NUM];
151 	uint32_t gcpu_mca_nextpoll_idx;	/* logout area for next poll */
152 	struct gcpu_bios_cfg gcpu_mca_bioscfg;
153 	uint_t gcpu_mca_nbanks;
154 	uint32_t gcpu_actv_banks;	/* MCA banks we initialized */
155 	size_t gcpu_mca_lgsz;		/* size of gcpu_mca_logout structs */
156 	uint_t gcpu_mca_flags;		/* GCPU_MCA_F_* */
157 	hrtime_t gcpu_mca_lastpoll;
158 	gcpu_mca_poll_trace_ctl_t gcpu_mca_polltrace;
159 	uint32_t gcpu_mca_first_poll_cmci_enabled; /* cmci on in first poll */
160 	gcpu_mca_cmci_t *gcpu_bank_cmci;
161 } gcpu_mca_t;
162 
163 typedef struct gcpu_mce_status {
164 	uint_t mce_nerr;	/* total errors found in logout of all banks */
165 	uint64_t mce_disp;	/* Disposition information */
166 	uint_t mce_npcc;	/* number of errors with PCC */
167 	uint_t mce_npcc_ok;	/* PCC with CMS_ERRSCOPE_CURCONTEXT_OK */
168 	uint_t mce_nuc;		/* number of errors with UC */
169 	uint_t mce_nuc_ok;	/* UC with CMS_ERRSCOPE_CLEARED_UC */
170 	uint_t mce_nuc_poisoned; /* UC with CMS_ERRSCOPE_POISONED */
171 	uint_t mce_forcefatal;	/* CMS_ERRSCOPE_FORCE_FATAL */
172 	uint_t mce_ignored;	/* CMS_ERRSCOPE_IGNORE_ERR */
173 } gcpu_mce_status_t;
174 
175 /*
176  * Flags for gcpu_mca_flags
177  */
178 #define	GCPU_MCA_F_UNFAULTING		0x1	/* CPU exiting faulted state */
179 
180 /*
181  * State shared by all cpus on a chip
182  */
183 struct gcpu_chipshared {
184 	kmutex_t gcpus_cfglock;		/* serial MCA config from chip cores */
185 	kmutex_t gcpus_poll_lock;	/* serialize pollers on the same chip */
186 	uint32_t gcpus_actv_banks;	/* MCA bank numbers active on chip */
187 };
188 
189 struct gcpu_data {
190 	gcpu_mca_t gcpu_mca;			/* MCA state for this CPU */
191 	cmi_hdl_t gcpu_hdl;			/* associated handle */
192 	struct gcpu_chipshared *gcpu_shared;	/* Shared state for the chip */
193 };
194 
195 #ifdef _KERNEL
196 
197 struct regs;
198 
199 /*
200  * CMI implementation
201  */
202 extern int gcpu_init(cmi_hdl_t, void **);
203 extern void gcpu_post_startup(cmi_hdl_t);
204 extern void gcpu_post_mpstartup(cmi_hdl_t);
205 extern void gcpu_faulted_enter(cmi_hdl_t);
206 extern void gcpu_faulted_exit(cmi_hdl_t);
207 extern void gcpu_mca_init(cmi_hdl_t);
208 extern cmi_errno_t gcpu_msrinject(cmi_hdl_t, cmi_mca_regs_t *, uint_t, int);
209 extern uint64_t gcpu_mca_trap(cmi_hdl_t, struct regs *);
210 extern void gcpu_cmci_trap(cmi_hdl_t);
211 extern void gcpu_hdl_poke(cmi_hdl_t);
212 
213 /*
214  * CMI global variable
215  */
216 extern int cmi_enable_cmci;
217 
218 /*
219  * Local functions
220  */
221 extern void gcpu_mca_poll_init(cmi_hdl_t);
222 extern void gcpu_mca_poll_start(cmi_hdl_t);
223 extern void gcpu_mca_logout(cmi_hdl_t, struct regs *, uint64_t,
224     gcpu_mce_status_t *, boolean_t, int);
225 
226 #endif /* _KERNEL */
227 
228 #ifdef __cplusplus
229 }
230 #endif
231 
232 #endif /* _GCPU_H */
233