17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 520c794b3Sgavinm * Common Development and Distribution License (the "License"). 620c794b3Sgavinm * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217aec1d6eScindi 227c478bd9Sstevel@tonic-gate /* 23c84b7bbeSAdrian Frost * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 24*918e0d92SRobert Mustacchi * Copyright (c) 2018, Joyent, Inc. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277aec1d6eScindi #ifndef _GCPU_H 287aec1d6eScindi #define _GCPU_H 297c478bd9Sstevel@tonic-gate 307aec1d6eScindi #include <sys/types.h> 3120c794b3Sgavinm #include <sys/cpu_module_impl.h> 3220c794b3Sgavinm #include <sys/cpu_module_ms.h> 3320c794b3Sgavinm #include <sys/ksynch.h> 3420c794b3Sgavinm #include <sys/systm.h> 3520c794b3Sgavinm #include <sys/fm/util.h> 367aec1d6eScindi 377aec1d6eScindi #ifdef __cplusplus 387c478bd9Sstevel@tonic-gate extern "C" { 397c478bd9Sstevel@tonic-gate #endif 407c478bd9Sstevel@tonic-gate 4120c794b3Sgavinm #define GCPU_MCA_ERRS_PERCPU 10 /* errorq slots per cpu */ 4220c794b3Sgavinm #define GCPU_MCA_MIN_ERRORS 30 /* minimum total errorq slots */ 4320c794b3Sgavinm #define GCPU_MCA_MAX_ERRORS 100 /* maximum total errorq slots */ 44822fb41dStsien 4520c794b3Sgavinm typedef struct gcpu_data gcpu_data_t; 4620c794b3Sgavinm 4720c794b3Sgavinm #define GCPU_ERRCODE_MASK_ALL 0xffff 4820c794b3Sgavinm 4920c794b3Sgavinm typedef struct gcpu_error_disp { 5020c794b3Sgavinm const char *ged_class_fmt; /* ereport class formatter (last bit) */ 5120c794b3Sgavinm const char *ged_compound_fmt; /* compound error formatter */ 5220c794b3Sgavinm uint64_t ged_ereport_members; /* ereport payload members */ 5320c794b3Sgavinm uint16_t ged_errcode_mask_on; /* errcode bits that must be set ... */ 5420c794b3Sgavinm uint16_t ged_errcode_mask_off; /* ... and must be clear for a match */ 5520c794b3Sgavinm } gcpu_error_disp_t; 5620c794b3Sgavinm 5720c794b3Sgavinm /* 5820c794b3Sgavinm * For errorq_dispatch we need to have a single contiguous structure 5920c794b3Sgavinm * capturing all our logout data. We do not know in advance how many 6020c794b3Sgavinm * error detector banks there are in this cpu model, so we'll manually 6120c794b3Sgavinm * allocate additional space for the gcl_banks array below. 6220c794b3Sgavinm */ 6320c794b3Sgavinm typedef struct gcpu_bank_logout { 6420c794b3Sgavinm uint64_t gbl_status; /* MCi_STATUS value */ 6520c794b3Sgavinm uint64_t gbl_addr; /* MCi_ADDR value */ 6620c794b3Sgavinm uint64_t gbl_misc; /* MCi_MISC value */ 6720c794b3Sgavinm uint64_t gbl_disp; /* Error disposition for this bank */ 6820c794b3Sgavinm uint32_t gbl_clrdefcnt; /* Count of deferred status clears */ 6920c794b3Sgavinm } gcpu_bank_logout_t; 7020c794b3Sgavinm 7120c794b3Sgavinm /* 7220c794b3Sgavinm * The data structure we "logout" all error telemetry from all banks of 7320c794b3Sgavinm * a cpu to. The gcl_data array declared with 1 member below will actually 7420c794b3Sgavinm * have gcl_nbanks members - variable with the actual cpu model present. 7520c794b3Sgavinm * After the gcl_data array there is a further model-specific array that 7620c794b3Sgavinm * may be allocated, and gcl_ms_logout will point to that if present. 7720c794b3Sgavinm * This cpu logout data must form one contiguous chunk of memory for 7820c794b3Sgavinm * dispatch with errorq_dispatch. 7920c794b3Sgavinm */ 8020c794b3Sgavinm typedef struct gcpu_logout { 8120c794b3Sgavinm gcpu_data_t *gcl_gcpu; /* pointer to per-cpu gcpu_data_t */ 8220c794b3Sgavinm uintptr_t gcl_ip; /* instruction pointer from #mc trap */ 8320c794b3Sgavinm uint64_t gcl_timestamp; /* gethrtime() at logout */ 8420c794b3Sgavinm uint64_t gcl_mcg_status; /* MCG_STATUS register value */ 8520c794b3Sgavinm uint64_t gcl_flags; /* Flags */ 8620c794b3Sgavinm pc_t gcl_stack[FM_STK_DEPTH]; /* saved stack trace, if any */ 8720c794b3Sgavinm int gcl_stackdepth; /* saved stack trace depth */ 88c84b7bbeSAdrian Frost int ismc; /* is a machine check flag */ 8920c794b3Sgavinm int gcl_nbanks; /* number of banks in array below */ 9020c794b3Sgavinm void *gcl_ms_logout; /* Model-specific area after gcl_data */ 9120c794b3Sgavinm gcpu_bank_logout_t gcl_data[1]; /* Bank logout areas - must be last */ 9220c794b3Sgavinm } gcpu_logout_t; 9320c794b3Sgavinm 9420c794b3Sgavinm /* 9520c794b3Sgavinm * gcl_flag values 9620c794b3Sgavinm */ 9720c794b3Sgavinm #define GCPU_GCL_F_PRIV 0x1 /* #MC during privileged code */ 9820c794b3Sgavinm #define GCPU_GCL_F_TES_P 0x2 /* MCG_CAP indicates TES_P */ 9920c794b3Sgavinm 10020c794b3Sgavinm struct gcpu_bios_bankcfg { 10120c794b3Sgavinm uint64_t bios_bank_ctl; 10220c794b3Sgavinm uint64_t bios_bank_status; 10320c794b3Sgavinm uint64_t bios_bank_addr; 10420c794b3Sgavinm uint64_t bios_bank_misc; 10520c794b3Sgavinm }; 10620c794b3Sgavinm 10720c794b3Sgavinm struct gcpu_bios_cfg { 10820c794b3Sgavinm uint64_t bios_mcg_cap; 10920c794b3Sgavinm uint64_t bios_mcg_ctl; 11020c794b3Sgavinm struct gcpu_bios_bankcfg *bios_bankcfg; 11120c794b3Sgavinm }; 11220c794b3Sgavinm 113e4b86885SCheng Sean Ye /* 114e4b86885SCheng Sean Ye * Events types in poll trace records. Keep these in sync with 115e4b86885SCheng Sean Ye * the generic cpu mdb module names for each (see gcpu_mpt_dump in mdb). 116e4b86885SCheng Sean Ye */ 11720c794b3Sgavinm #define GCPU_MPT_WHAT_CYC_ERR 0 /* cyclic-induced poll */ 11820c794b3Sgavinm #define GCPU_MPT_WHAT_POKE_ERR 1 /* manually-induced poll */ 11920c794b3Sgavinm #define GCPU_MPT_WHAT_UNFAULTING 2 /* discarded error state */ 120e3d60c9bSAdrian Frost #define GCPU_MPT_WHAT_MC_ERR 3 /* MC# */ 121e3d60c9bSAdrian Frost #define GCPU_MPT_WHAT_CMCI_ERR 4 /* CMCI interrupt */ 122e4b86885SCheng Sean Ye #define GCPU_MPT_WHAT_XPV_VIRQ 5 /* MCA_VIRQ in dom0 */ 123e4b86885SCheng Sean Ye #define GCPU_MPT_WHAT_XPV_VIRQ_LOGOUT 6 /* MCA_VIRQ logout complete */ 12420c794b3Sgavinm 125e4b86885SCheng Sean Ye typedef struct gcpu_poll_trace { 12620c794b3Sgavinm hrtime_t mpt_when; /* timestamp of event */ 12720c794b3Sgavinm uint8_t mpt_what; /* GCPU_MPT_WHAT_* (which event?) */ 12820c794b3Sgavinm uint8_t mpt_nerr; /* number of errors discovered */ 12920c794b3Sgavinm uint16_t mpt_pad1; 13020c794b3Sgavinm uint32_t mpt_pad2; 131e4b86885SCheng Sean Ye } gcpu_poll_trace_t; 13220c794b3Sgavinm 133e4b86885SCheng Sean Ye typedef struct gcpu_poll_trace_ctl { 134e4b86885SCheng Sean Ye gcpu_poll_trace_t *mptc_tbufs; /* trace buffers */ 13520c794b3Sgavinm uint_t mptc_curtrace; /* last buffer filled */ 136e4b86885SCheng Sean Ye } gcpu_poll_trace_ctl_t; 13720c794b3Sgavinm 138e3d60c9bSAdrian Frost 139e3d60c9bSAdrian Frost /* 140e3d60c9bSAdrian Frost * For counting some of the important number or time for runtime 141e3d60c9bSAdrian Frost * cmci enable/disable 142e3d60c9bSAdrian Frost */ 143e3d60c9bSAdrian Frost typedef struct gcpu_mca_cmci { 144e3d60c9bSAdrian Frost uint32_t cmci_cap; /* cmci capability for this bank */ 145e3d60c9bSAdrian Frost uint32_t ncmci; /* number of correctable errors between polls */ 146e3d60c9bSAdrian Frost uint32_t drtcmci; /* duration of no cmci when cmci is disabled */ 147e3d60c9bSAdrian Frost uint32_t cmci_enabled; /* cmci enable/disable status for this bank */ 148e3d60c9bSAdrian Frost } gcpu_mca_cmci_t; 149e3d60c9bSAdrian Frost 15020c794b3Sgavinm /* Index for gcpu_mca_logout array below */ 15120c794b3Sgavinm #define GCPU_MCA_LOGOUT_EXCEPTION 0 /* area for #MC */ 15220c794b3Sgavinm #define GCPU_MCA_LOGOUT_POLLER_1 1 /* next/prev poll area */ 15320c794b3Sgavinm #define GCPU_MCA_LOGOUT_POLLER_2 2 /* prev/next poll area */ 15420c794b3Sgavinm #define GCPU_MCA_LOGOUT_NUM 3 155822fb41dStsien 1567aec1d6eScindi typedef struct gcpu_mca { 15720c794b3Sgavinm gcpu_logout_t *gcpu_mca_logout[GCPU_MCA_LOGOUT_NUM]; 15820c794b3Sgavinm uint32_t gcpu_mca_nextpoll_idx; /* logout area for next poll */ 15920c794b3Sgavinm struct gcpu_bios_cfg gcpu_mca_bioscfg; 1607aec1d6eScindi uint_t gcpu_mca_nbanks; 16120c794b3Sgavinm size_t gcpu_mca_lgsz; /* size of gcpu_mca_logout structs */ 16220c794b3Sgavinm uint_t gcpu_mca_flags; /* GCPU_MCA_F_* */ 16320c794b3Sgavinm hrtime_t gcpu_mca_lastpoll; 164e4b86885SCheng Sean Ye gcpu_poll_trace_ctl_t gcpu_polltrace; 165e3d60c9bSAdrian Frost uint32_t gcpu_mca_first_poll_cmci_enabled; /* cmci on in first poll */ 166e3d60c9bSAdrian Frost gcpu_mca_cmci_t *gcpu_bank_cmci; 1677aec1d6eScindi } gcpu_mca_t; 168822fb41dStsien 16920c794b3Sgavinm typedef struct gcpu_mce_status { 17020c794b3Sgavinm uint_t mce_nerr; /* total errors found in logout of all banks */ 17120c794b3Sgavinm uint64_t mce_disp; /* Disposition information */ 17220c794b3Sgavinm uint_t mce_npcc; /* number of errors with PCC */ 17320c794b3Sgavinm uint_t mce_npcc_ok; /* PCC with CMS_ERRSCOPE_CURCONTEXT_OK */ 17420c794b3Sgavinm uint_t mce_nuc; /* number of errors with UC */ 17520c794b3Sgavinm uint_t mce_nuc_ok; /* UC with CMS_ERRSCOPE_CLEARED_UC */ 17620c794b3Sgavinm uint_t mce_nuc_poisoned; /* UC with CMS_ERRSCOPE_POISONED */ 17720c794b3Sgavinm uint_t mce_forcefatal; /* CMS_ERRSCOPE_FORCE_FATAL */ 17820c794b3Sgavinm uint_t mce_ignored; /* CMS_ERRSCOPE_IGNORE_ERR */ 17920c794b3Sgavinm } gcpu_mce_status_t; 18020c794b3Sgavinm 18120c794b3Sgavinm /* 18220c794b3Sgavinm * Flags for gcpu_mca_flags 18320c794b3Sgavinm */ 18420c794b3Sgavinm #define GCPU_MCA_F_UNFAULTING 0x1 /* CPU exiting faulted state */ 185*918e0d92SRobert Mustacchi #define GCPU_MCA_F_CMCI_CAPABLE 0x2 /* CPU supports CMCI */ 186*918e0d92SRobert Mustacchi #define GCPU_MCA_F_CMCI_ENABLE 0x4 /* CPU CMCI enabled */ 18720c794b3Sgavinm 18820c794b3Sgavinm /* 18920c794b3Sgavinm * State shared by all cpus on a chip 19020c794b3Sgavinm */ 19120c794b3Sgavinm struct gcpu_chipshared { 19220c794b3Sgavinm kmutex_t gcpus_cfglock; /* serial MCA config from chip cores */ 19320c794b3Sgavinm kmutex_t gcpus_poll_lock; /* serialize pollers on the same chip */ 19420c794b3Sgavinm uint32_t gcpus_actv_banks; /* MCA bank numbers active on chip */ 195a3114836SGerry Liu volatile uint32_t gcpus_actv_cnt; /* active cpu count in this chip */ 1962a613b59SRobert Mustacchi char *gcpus_ident; /* ident string, if available */ 19720c794b3Sgavinm }; 19820c794b3Sgavinm 19920c794b3Sgavinm struct gcpu_data { 20020c794b3Sgavinm gcpu_mca_t gcpu_mca; /* MCA state for this CPU */ 20120c794b3Sgavinm cmi_hdl_t gcpu_hdl; /* associated handle */ 20220c794b3Sgavinm struct gcpu_chipshared *gcpu_shared; /* Shared state for the chip */ 20320c794b3Sgavinm }; 20420c794b3Sgavinm 20520c794b3Sgavinm #ifdef _KERNEL 206822fb41dStsien 2077aec1d6eScindi struct regs; 2087c478bd9Sstevel@tonic-gate 20920c794b3Sgavinm /* 21020c794b3Sgavinm * CMI implementation 21120c794b3Sgavinm */ 21220c794b3Sgavinm extern int gcpu_init(cmi_hdl_t, void **); 213a3114836SGerry Liu extern void gcpu_fini(cmi_hdl_t); 21420c794b3Sgavinm extern void gcpu_post_startup(cmi_hdl_t); 21520c794b3Sgavinm extern void gcpu_post_mpstartup(cmi_hdl_t); 21620c794b3Sgavinm extern void gcpu_faulted_enter(cmi_hdl_t); 21720c794b3Sgavinm extern void gcpu_faulted_exit(cmi_hdl_t); 21820c794b3Sgavinm extern void gcpu_mca_init(cmi_hdl_t); 219*918e0d92SRobert Mustacchi extern void gcpu_mca_fini(cmi_hdl_t); 220*918e0d92SRobert Mustacchi extern void gcpu_mca_cmci_enable(cmi_hdl_t); 22120c794b3Sgavinm extern cmi_errno_t gcpu_msrinject(cmi_hdl_t, cmi_mca_regs_t *, uint_t, int); 222e4b86885SCheng Sean Ye #ifndef __xpv 22320c794b3Sgavinm extern uint64_t gcpu_mca_trap(cmi_hdl_t, struct regs *); 224e3d60c9bSAdrian Frost extern void gcpu_cmci_trap(cmi_hdl_t); 22520c794b3Sgavinm extern void gcpu_hdl_poke(cmi_hdl_t); 226e4b86885SCheng Sean Ye #else 227e4b86885SCheng Sean Ye extern void gcpu_xpv_panic_callback(void); 228e4b86885SCheng Sean Ye #endif 22920c794b3Sgavinm 23020c794b3Sgavinm /* 23120c794b3Sgavinm * Local functions 23220c794b3Sgavinm */ 23320c794b3Sgavinm extern void gcpu_mca_poll_init(cmi_hdl_t); 234a3114836SGerry Liu extern void gcpu_mca_poll_fini(cmi_hdl_t); 23520c794b3Sgavinm extern void gcpu_mca_poll_start(cmi_hdl_t); 236e4b86885SCheng Sean Ye extern void gcpu_poll_trace_init(gcpu_poll_trace_ctl_t *); 237e4b86885SCheng Sean Ye extern void gcpu_poll_trace(gcpu_poll_trace_ctl_t *, uint8_t, uint8_t); 23820c794b3Sgavinm extern void gcpu_mca_logout(cmi_hdl_t, struct regs *, uint64_t, 239e3d60c9bSAdrian Frost gcpu_mce_status_t *, boolean_t, int); 240e4b86885SCheng Sean Ye #ifdef __xpv 241e4b86885SCheng Sean Ye extern void gcpu_xpv_mca_init(int); 242e4b86885SCheng Sean Ye #endif /* __xpv */ 24320c794b3Sgavinm 24420c794b3Sgavinm #endif /* _KERNEL */ 2457c478bd9Sstevel@tonic-gate 2467c478bd9Sstevel@tonic-gate #ifdef __cplusplus 2477c478bd9Sstevel@tonic-gate } 2487c478bd9Sstevel@tonic-gate #endif 2497c478bd9Sstevel@tonic-gate 2507aec1d6eScindi #endif /* _GCPU_H */ 251