17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
520c794b3Sgavinm  * Common Development and Distribution License (the "License").
620c794b3Sgavinm  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217aec1d6eScindi 
227c478bd9Sstevel@tonic-gate /*
23c84b7bbeSAdrian Frost  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24*918e0d92SRobert Mustacchi  * Copyright (c) 2018, Joyent, Inc.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277aec1d6eScindi #ifndef _GCPU_H
287aec1d6eScindi #define	_GCPU_H
297c478bd9Sstevel@tonic-gate 
307aec1d6eScindi #include <sys/types.h>
3120c794b3Sgavinm #include <sys/cpu_module_impl.h>
3220c794b3Sgavinm #include <sys/cpu_module_ms.h>
3320c794b3Sgavinm #include <sys/ksynch.h>
3420c794b3Sgavinm #include <sys/systm.h>
3520c794b3Sgavinm #include <sys/fm/util.h>
367aec1d6eScindi 
377aec1d6eScindi #ifdef __cplusplus
387c478bd9Sstevel@tonic-gate extern "C" {
397c478bd9Sstevel@tonic-gate #endif
407c478bd9Sstevel@tonic-gate 
4120c794b3Sgavinm #define	GCPU_MCA_ERRS_PERCPU	10	/* errorq slots per cpu */
4220c794b3Sgavinm #define	GCPU_MCA_MIN_ERRORS	30	/* minimum total errorq slots */
4320c794b3Sgavinm #define	GCPU_MCA_MAX_ERRORS	100	/* maximum total errorq slots */
44822fb41dStsien 
4520c794b3Sgavinm typedef struct gcpu_data gcpu_data_t;
4620c794b3Sgavinm 
4720c794b3Sgavinm #define	GCPU_ERRCODE_MASK_ALL		0xffff
4820c794b3Sgavinm 
4920c794b3Sgavinm typedef struct gcpu_error_disp {
5020c794b3Sgavinm 	const char *ged_class_fmt;	/* ereport class formatter (last bit) */
5120c794b3Sgavinm 	const char *ged_compound_fmt;	/* compound error formatter */
5220c794b3Sgavinm 	uint64_t ged_ereport_members;	/* ereport payload members */
5320c794b3Sgavinm 	uint16_t ged_errcode_mask_on;	/* errcode bits that must be set ... */
5420c794b3Sgavinm 	uint16_t ged_errcode_mask_off;	/* ... and must be clear for a match */
5520c794b3Sgavinm } gcpu_error_disp_t;
5620c794b3Sgavinm 
5720c794b3Sgavinm /*
5820c794b3Sgavinm  * For errorq_dispatch we need to have a single contiguous structure
5920c794b3Sgavinm  * capturing all our logout data.  We do not know in advance how many
6020c794b3Sgavinm  * error detector banks there are in this cpu model, so we'll manually
6120c794b3Sgavinm  * allocate additional space for the gcl_banks array below.
6220c794b3Sgavinm  */
6320c794b3Sgavinm typedef struct gcpu_bank_logout {
6420c794b3Sgavinm 	uint64_t gbl_status;		/* MCi_STATUS value */
6520c794b3Sgavinm 	uint64_t gbl_addr;		/* MCi_ADDR value */
6620c794b3Sgavinm 	uint64_t gbl_misc;		/* MCi_MISC value */
6720c794b3Sgavinm 	uint64_t gbl_disp;		/* Error disposition for this bank */
6820c794b3Sgavinm 	uint32_t gbl_clrdefcnt;		/* Count of deferred status clears */
6920c794b3Sgavinm } gcpu_bank_logout_t;
7020c794b3Sgavinm 
7120c794b3Sgavinm /*
7220c794b3Sgavinm  * The data structure we "logout" all error telemetry from all banks of
7320c794b3Sgavinm  * a cpu to.  The gcl_data array declared with 1 member below will actually
7420c794b3Sgavinm  * have gcl_nbanks members - variable with the actual cpu model present.
7520c794b3Sgavinm  * After the gcl_data array there is a further model-specific array that
7620c794b3Sgavinm  * may be allocated, and gcl_ms_logout will point to that if present.
7720c794b3Sgavinm  * This cpu logout data must form one contiguous chunk of memory for
7820c794b3Sgavinm  * dispatch with errorq_dispatch.
7920c794b3Sgavinm  */
8020c794b3Sgavinm typedef struct gcpu_logout {
8120c794b3Sgavinm 	gcpu_data_t *gcl_gcpu;		/* pointer to per-cpu gcpu_data_t */
8220c794b3Sgavinm 	uintptr_t gcl_ip;		/* instruction pointer from #mc trap */
8320c794b3Sgavinm 	uint64_t gcl_timestamp;		/* gethrtime() at logout */
8420c794b3Sgavinm 	uint64_t gcl_mcg_status;	/* MCG_STATUS register value */
8520c794b3Sgavinm 	uint64_t gcl_flags;		/* Flags */
8620c794b3Sgavinm 	pc_t gcl_stack[FM_STK_DEPTH];	/* saved stack trace, if any */
8720c794b3Sgavinm 	int gcl_stackdepth;		/* saved stack trace depth */
88c84b7bbeSAdrian Frost 	int ismc;			/* is a machine check flag */
8920c794b3Sgavinm 	int gcl_nbanks;			/* number of banks in array below */
9020c794b3Sgavinm 	void *gcl_ms_logout;		/* Model-specific area after gcl_data */
9120c794b3Sgavinm 	gcpu_bank_logout_t gcl_data[1];	/* Bank logout areas - must be last */
9220c794b3Sgavinm } gcpu_logout_t;
9320c794b3Sgavinm 
9420c794b3Sgavinm /*
9520c794b3Sgavinm  * gcl_flag values
9620c794b3Sgavinm  */
9720c794b3Sgavinm #define	GCPU_GCL_F_PRIV		0x1	/* #MC during privileged code */
9820c794b3Sgavinm #define	GCPU_GCL_F_TES_P	0x2	/* MCG_CAP indicates TES_P */
9920c794b3Sgavinm 
10020c794b3Sgavinm struct gcpu_bios_bankcfg {
10120c794b3Sgavinm 	uint64_t bios_bank_ctl;
10220c794b3Sgavinm 	uint64_t bios_bank_status;
10320c794b3Sgavinm 	uint64_t bios_bank_addr;
10420c794b3Sgavinm 	uint64_t bios_bank_misc;
10520c794b3Sgavinm };
10620c794b3Sgavinm 
10720c794b3Sgavinm struct gcpu_bios_cfg {
10820c794b3Sgavinm 	uint64_t bios_mcg_cap;
10920c794b3Sgavinm 	uint64_t bios_mcg_ctl;
11020c794b3Sgavinm 	struct gcpu_bios_bankcfg *bios_bankcfg;
11120c794b3Sgavinm };
11220c794b3Sgavinm 
113e4b86885SCheng Sean Ye /*
114e4b86885SCheng Sean Ye  * Events types in poll trace records.  Keep these in sync with
115e4b86885SCheng Sean Ye  * the generic cpu mdb module names for each (see gcpu_mpt_dump in mdb).
116e4b86885SCheng Sean Ye  */
11720c794b3Sgavinm #define	GCPU_MPT_WHAT_CYC_ERR		0	/* cyclic-induced poll */
11820c794b3Sgavinm #define	GCPU_MPT_WHAT_POKE_ERR		1	/* manually-induced poll */
11920c794b3Sgavinm #define	GCPU_MPT_WHAT_UNFAULTING	2	/* discarded error state */
120e3d60c9bSAdrian Frost #define	GCPU_MPT_WHAT_MC_ERR		3	/* MC# */
121e3d60c9bSAdrian Frost #define	GCPU_MPT_WHAT_CMCI_ERR		4	/* CMCI interrupt */
122e4b86885SCheng Sean Ye #define	GCPU_MPT_WHAT_XPV_VIRQ		5	/* MCA_VIRQ in dom0 */
123e4b86885SCheng Sean Ye #define	GCPU_MPT_WHAT_XPV_VIRQ_LOGOUT	6	/* MCA_VIRQ logout complete */
12420c794b3Sgavinm 
125e4b86885SCheng Sean Ye typedef struct gcpu_poll_trace {
12620c794b3Sgavinm 	hrtime_t mpt_when;		/* timestamp of event */
12720c794b3Sgavinm 	uint8_t mpt_what;		/* GCPU_MPT_WHAT_* (which event?) */
12820c794b3Sgavinm 	uint8_t mpt_nerr;		/* number of errors discovered */
12920c794b3Sgavinm 	uint16_t mpt_pad1;
13020c794b3Sgavinm 	uint32_t mpt_pad2;
131e4b86885SCheng Sean Ye } gcpu_poll_trace_t;
13220c794b3Sgavinm 
133e4b86885SCheng Sean Ye typedef struct gcpu_poll_trace_ctl {
134e4b86885SCheng Sean Ye 	gcpu_poll_trace_t *mptc_tbufs;	/* trace buffers */
13520c794b3Sgavinm 	uint_t mptc_curtrace;			/* last buffer filled */
136e4b86885SCheng Sean Ye } gcpu_poll_trace_ctl_t;
13720c794b3Sgavinm 
138e3d60c9bSAdrian Frost 
139e3d60c9bSAdrian Frost /*
140e3d60c9bSAdrian Frost  * For counting some of the important number or time for runtime
141e3d60c9bSAdrian Frost  * cmci enable/disable
142e3d60c9bSAdrian Frost  */
143e3d60c9bSAdrian Frost typedef struct gcpu_mca_cmci {
144e3d60c9bSAdrian Frost 	uint32_t cmci_cap;	/* cmci capability for this bank */
145e3d60c9bSAdrian Frost 	uint32_t ncmci;		/* number of correctable errors between polls */
146e3d60c9bSAdrian Frost 	uint32_t drtcmci;	/* duration of no cmci when cmci is disabled */
147e3d60c9bSAdrian Frost 	uint32_t cmci_enabled;	/* cmci enable/disable status for this bank */
148e3d60c9bSAdrian Frost } gcpu_mca_cmci_t;
149e3d60c9bSAdrian Frost 
15020c794b3Sgavinm /* Index for gcpu_mca_logout array below */
15120c794b3Sgavinm #define	GCPU_MCA_LOGOUT_EXCEPTION	0	/* area for #MC */
15220c794b3Sgavinm #define	GCPU_MCA_LOGOUT_POLLER_1	1	/* next/prev poll area */
15320c794b3Sgavinm #define	GCPU_MCA_LOGOUT_POLLER_2	2	/* prev/next poll area */
15420c794b3Sgavinm #define	GCPU_MCA_LOGOUT_NUM		3
155822fb41dStsien 
1567aec1d6eScindi typedef struct gcpu_mca {
15720c794b3Sgavinm 	gcpu_logout_t *gcpu_mca_logout[GCPU_MCA_LOGOUT_NUM];
15820c794b3Sgavinm 	uint32_t gcpu_mca_nextpoll_idx;	/* logout area for next poll */
15920c794b3Sgavinm 	struct gcpu_bios_cfg gcpu_mca_bioscfg;
1607aec1d6eScindi 	uint_t gcpu_mca_nbanks;
16120c794b3Sgavinm 	size_t gcpu_mca_lgsz;		/* size of gcpu_mca_logout structs */
16220c794b3Sgavinm 	uint_t gcpu_mca_flags;		/* GCPU_MCA_F_* */
16320c794b3Sgavinm 	hrtime_t gcpu_mca_lastpoll;
164e4b86885SCheng Sean Ye 	gcpu_poll_trace_ctl_t gcpu_polltrace;
165e3d60c9bSAdrian Frost 	uint32_t gcpu_mca_first_poll_cmci_enabled; /* cmci on in first poll */
166e3d60c9bSAdrian Frost 	gcpu_mca_cmci_t *gcpu_bank_cmci;
1677aec1d6eScindi } gcpu_mca_t;
168822fb41dStsien 
16920c794b3Sgavinm typedef struct gcpu_mce_status {
17020c794b3Sgavinm 	uint_t mce_nerr;	/* total errors found in logout of all banks */
17120c794b3Sgavinm 	uint64_t mce_disp;	/* Disposition information */
17220c794b3Sgavinm 	uint_t mce_npcc;	/* number of errors with PCC */
17320c794b3Sgavinm 	uint_t mce_npcc_ok;	/* PCC with CMS_ERRSCOPE_CURCONTEXT_OK */
17420c794b3Sgavinm 	uint_t mce_nuc;		/* number of errors with UC */
17520c794b3Sgavinm 	uint_t mce_nuc_ok;	/* UC with CMS_ERRSCOPE_CLEARED_UC */
17620c794b3Sgavinm 	uint_t mce_nuc_poisoned; /* UC with CMS_ERRSCOPE_POISONED */
17720c794b3Sgavinm 	uint_t mce_forcefatal;	/* CMS_ERRSCOPE_FORCE_FATAL */
17820c794b3Sgavinm 	uint_t mce_ignored;	/* CMS_ERRSCOPE_IGNORE_ERR */
17920c794b3Sgavinm } gcpu_mce_status_t;
18020c794b3Sgavinm 
18120c794b3Sgavinm /*
18220c794b3Sgavinm  * Flags for gcpu_mca_flags
18320c794b3Sgavinm  */
18420c794b3Sgavinm #define	GCPU_MCA_F_UNFAULTING		0x1	/* CPU exiting faulted state */
185*918e0d92SRobert Mustacchi #define	GCPU_MCA_F_CMCI_CAPABLE		0x2	/* CPU supports CMCI */
186*918e0d92SRobert Mustacchi #define	GCPU_MCA_F_CMCI_ENABLE		0x4	/* CPU CMCI enabled */
18720c794b3Sgavinm 
18820c794b3Sgavinm /*
18920c794b3Sgavinm  * State shared by all cpus on a chip
19020c794b3Sgavinm  */
19120c794b3Sgavinm struct gcpu_chipshared {
19220c794b3Sgavinm 	kmutex_t gcpus_cfglock;		/* serial MCA config from chip cores */
19320c794b3Sgavinm 	kmutex_t gcpus_poll_lock;	/* serialize pollers on the same chip */
19420c794b3Sgavinm 	uint32_t gcpus_actv_banks;	/* MCA bank numbers active on chip */
195a3114836SGerry Liu 	volatile uint32_t gcpus_actv_cnt; /* active cpu count in this chip */
1962a613b59SRobert Mustacchi 	char *gcpus_ident;		/* ident string, if available */
19720c794b3Sgavinm };
19820c794b3Sgavinm 
19920c794b3Sgavinm struct gcpu_data {
20020c794b3Sgavinm 	gcpu_mca_t gcpu_mca;			/* MCA state for this CPU */
20120c794b3Sgavinm 	cmi_hdl_t gcpu_hdl;			/* associated handle */
20220c794b3Sgavinm 	struct gcpu_chipshared *gcpu_shared;	/* Shared state for the chip */
20320c794b3Sgavinm };
20420c794b3Sgavinm 
20520c794b3Sgavinm #ifdef _KERNEL
206822fb41dStsien 
2077aec1d6eScindi struct regs;
2087c478bd9Sstevel@tonic-gate 
20920c794b3Sgavinm /*
21020c794b3Sgavinm  * CMI implementation
21120c794b3Sgavinm  */
21220c794b3Sgavinm extern int gcpu_init(cmi_hdl_t, void **);
213a3114836SGerry Liu extern void gcpu_fini(cmi_hdl_t);
21420c794b3Sgavinm extern void gcpu_post_startup(cmi_hdl_t);
21520c794b3Sgavinm extern void gcpu_post_mpstartup(cmi_hdl_t);
21620c794b3Sgavinm extern void gcpu_faulted_enter(cmi_hdl_t);
21720c794b3Sgavinm extern void gcpu_faulted_exit(cmi_hdl_t);
21820c794b3Sgavinm extern void gcpu_mca_init(cmi_hdl_t);
219*918e0d92SRobert Mustacchi extern void gcpu_mca_fini(cmi_hdl_t);
220*918e0d92SRobert Mustacchi extern void gcpu_mca_cmci_enable(cmi_hdl_t);
22120c794b3Sgavinm extern cmi_errno_t gcpu_msrinject(cmi_hdl_t, cmi_mca_regs_t *, uint_t, int);
222e4b86885SCheng Sean Ye #ifndef __xpv
22320c794b3Sgavinm extern uint64_t gcpu_mca_trap(cmi_hdl_t, struct regs *);
224e3d60c9bSAdrian Frost extern void gcpu_cmci_trap(cmi_hdl_t);
22520c794b3Sgavinm extern void gcpu_hdl_poke(cmi_hdl_t);
226e4b86885SCheng Sean Ye #else
227e4b86885SCheng Sean Ye extern void gcpu_xpv_panic_callback(void);
228e4b86885SCheng Sean Ye #endif
22920c794b3Sgavinm 
23020c794b3Sgavinm /*
23120c794b3Sgavinm  * Local functions
23220c794b3Sgavinm  */
23320c794b3Sgavinm extern void gcpu_mca_poll_init(cmi_hdl_t);
234a3114836SGerry Liu extern void gcpu_mca_poll_fini(cmi_hdl_t);
23520c794b3Sgavinm extern void gcpu_mca_poll_start(cmi_hdl_t);
236e4b86885SCheng Sean Ye extern void gcpu_poll_trace_init(gcpu_poll_trace_ctl_t *);
237e4b86885SCheng Sean Ye extern void gcpu_poll_trace(gcpu_poll_trace_ctl_t *, uint8_t, uint8_t);
23820c794b3Sgavinm extern void gcpu_mca_logout(cmi_hdl_t, struct regs *, uint64_t,
239e3d60c9bSAdrian Frost     gcpu_mce_status_t *, boolean_t, int);
240e4b86885SCheng Sean Ye #ifdef __xpv
241e4b86885SCheng Sean Ye extern void gcpu_xpv_mca_init(int);
242e4b86885SCheng Sean Ye #endif /* __xpv */
24320c794b3Sgavinm 
24420c794b3Sgavinm #endif /* _KERNEL */
2457c478bd9Sstevel@tonic-gate 
2467c478bd9Sstevel@tonic-gate #ifdef __cplusplus
2477c478bd9Sstevel@tonic-gate }
2487c478bd9Sstevel@tonic-gate #endif
2497c478bd9Sstevel@tonic-gate 
2507aec1d6eScindi #endif /* _GCPU_H */
251