17aec1d6eScindi/* 27aec1d6eScindi * CDDL HEADER START 37aec1d6eScindi * 47aec1d6eScindi * The contents of this file are subject to the terms of the 5*5f25dc2aSgavinm * Common Development and Distribution License (the "License"). 6*5f25dc2aSgavinm * You may not use this file except in compliance with the License. 77aec1d6eScindi * 87aec1d6eScindi * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97aec1d6eScindi * or http://www.opensolaris.org/os/licensing. 107aec1d6eScindi * See the License for the specific language governing permissions 117aec1d6eScindi * and limitations under the License. 127aec1d6eScindi * 137aec1d6eScindi * When distributing Covered Code, include this CDDL HEADER in each 147aec1d6eScindi * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157aec1d6eScindi * If applicable, add the following below this CDDL HEADER, with the 167aec1d6eScindi * fields enclosed by brackets "[]" replaced with your own identifying 177aec1d6eScindi * information: Portions Copyright [yyyy] [name of copyright owner] 187aec1d6eScindi * 197aec1d6eScindi * CDDL HEADER END 207aec1d6eScindi */ 217aec1d6eScindi 227aec1d6eScindi/* 237aec1d6eScindi * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 247aec1d6eScindi * Use is subject to license terms. 257aec1d6eScindi */ 267aec1d6eScindi 277aec1d6eScindi#pragma ident "%Z%%M% %I% %E% SMI" 287aec1d6eScindi 297aec1d6eScindi#pragma dictionary "AMD" 307aec1d6eScindi 317aec1d6eScindi/* 327aec1d6eScindi * Eversholt rules for the AMD Opteron CPU/Memory 337aec1d6eScindi */ 347aec1d6eScindi 357aec1d6eScindifru dimm; 367aec1d6eScindiasru dimm; 377aec1d6eScindi 387aec1d6eScindifru chip; 397aec1d6eScindiasru chip/cpu; 407aec1d6eScindi 417aec1d6eScindi 427aec1d6eScindi/* #MEM# 437aec1d6eScindi * GET_ADDR relies on the fact that variables have global scope across an FME. 447aec1d6eScindi * Thus for each FME the assignment only occurs for the first invocation 457aec1d6eScindi * but the comparison happens on each. Thus if the new address matches the 467aec1d6eScindi * address of an existing open FME, then we return true running in the context 477aec1d6eScindi * of that FME. If the new address doesn't match the address of any existing 487aec1d6eScindi * open FME, then we return true in the context of a newly opened FME. 497aec1d6eScindi */ 507aec1d6eScindi#define GET_ADDR (defined($addr) ? ($addr == payloadprop("addr")) : \ 517aec1d6eScindi ($addr = payloadprop("addr"))) 527aec1d6eScindi 537aec1d6eScindi#define GET_OFFSET ($offset = payloadprop("resource[0].hc-specific.offset")) 547aec1d6eScindi 557aec1d6eScindi/* 567aec1d6eScindi * SET_ADDR is used to set a payload value in the fault that we diagnose 577aec1d6eScindi * for page faults, to record the physical address of the faulting page. 587aec1d6eScindi */ 597aec1d6eScindi#define SET_ADDR (setpayloadprop("asru-physaddr", $addr)) 607aec1d6eScindi 617aec1d6eScindi#define SET_OFFSET (setpayloadprop("asru-offset", $offset)) 627aec1d6eScindi 637aec1d6eScindi/* 647aec1d6eScindi * RESOURCE_EXISTS is true if a pair with name "resource" exists in the 657aec1d6eScindi * payload - regardless of type (e.g., nvlist or nvlist array) or value. 667aec1d6eScindi */ 677aec1d6eScindi#define RESOURCE_EXISTS (payloadprop_defined("resource")) 687aec1d6eScindi 697aec1d6eScindi/* 707aec1d6eScindi * CONTAINS_DIMM is true if the "resource" nvlist array (as used in memory 717aec1d6eScindi * ereports) exists and one if its members matches the path for the 727aec1d6eScindi * dimm node. Our memory propogation are of the form "foo@dimm -> blah@cpu" 737aec1d6eScindi * since cpus detect memory errors; in eversholt such a propogation, where 747aec1d6eScindi * the lhs path and rhs path do not match, expands to the cross-product of 757aec1d6eScindi * all dimms and cpus in the system. We use CONTAINS_DIMM to constrain 767aec1d6eScindi * the propogation such that it only happens if the payload resource 777aec1d6eScindi * matches the dimm. 787aec1d6eScindi */ 797aec1d6eScindi#define CONTAINS_DIMM (payloadprop_contains("resource", asru(dimm))) 807aec1d6eScindi 817aec1d6eScindi/* 827aec1d6eScindi * The following will tell us whether a syndrome that is known to be 837aec1d6eScindi * correctable (from a mem_ecc1) is single-bit or multi-bit. For a 847aec1d6eScindi * correctable ChipKill syndrome the number of bits set in the lowest 857aec1d6eScindi * nibble indicates how many bit were in error. 867aec1d6eScindi */ 877aec1d6eScindi 887aec1d6eScindi#define CBITMASK(synd) ((synd) & 0xf) 897aec1d6eScindi 907aec1d6eScindi#define CKSINGLE(synd) \ 917aec1d6eScindi ((synd) == 0 || \ 927aec1d6eScindi (CBITMASK(synd) == 0x1 || CBITMASK(synd) == 0x2 || \ 937aec1d6eScindi CBITMASK(synd) == 0x4 || CBITMASK(synd) == 0x8)) 947aec1d6eScindi 957aec1d6eScindi#define SINGLE_BIT_CE \ 967aec1d6eScindi (payloadprop("syndrome-type") == "E" || \ 977aec1d6eScindi (payloadprop("syndrome-type") == "C" && \ 987aec1d6eScindi CKSINGLE(payloadprop("syndrome")))) 997aec1d6eScindi 1007aec1d6eScindi#define MULTI_BIT_CE \ 1017aec1d6eScindi (payloadprop("syndrome-type") == "C" && \ 1027aec1d6eScindi !CKSINGLE(payloadprop("syndrome"))) 1037aec1d6eScindi 1047aec1d6eScindi/* 1057aec1d6eScindi * A single bit fault in a memory dimm can cause: 1067aec1d6eScindi * 1077aec1d6eScindi * - mem_ce : reported by nb for an access from a remote cpu 1087aec1d6eScindi * 1097aec1d6eScindi * Single-bit errors are fed into a per-DIMM SERD engine; if a SERD engine 1107aec1d6eScindi * trips we diagnose a fault.memory.page so that the response agent can 1117aec1d6eScindi * retire the page that caused the trip. If the total number of pages 1127aec1d6eScindi * faulted in this way on a single DIMM exceeds a threshold we will 1137aec1d6eScindi * diagnose a fault.memory.dimm_sb against the DIMM. 1147aec1d6eScindi * 1157aec1d6eScindi * Multibit ChipKill-correctable errors produce an immediate page fault 1167aec1d6eScindi * and corresponding fault.memory.dimm_ck. This is achieved through 1177aec1d6eScindi * SERD engines using N=0 so the facility is there to be a little more 1187aec1d6eScindi * tolerant of these errors. 1197aec1d6eScindi * 1207aec1d6eScindi * Uncorrectable errors produce an immediate page fault and corresponding 1217aec1d6eScindi * fault.memory.dimm_ue. 1227aec1d6eScindi * 1237aec1d6eScindi * Page faults are essentially internal - action is only required when 1247aec1d6eScindi * they are accompanied by a dimm fault. As such we include message=0 1257aec1d6eScindi * on DIMM faults. 1267aec1d6eScindi */ 1277aec1d6eScindi 1287aec1d6eScindievent ereport.cpu.amd.nb.mem_ce@cpu; 1297aec1d6eScindi 1307aec1d6eScindi/* 1317aec1d6eScindi * If the address is not valid then no resource member will be included 1327aec1d6eScindi * in a nb.mem_ce or nb.mem_ue ereport. These cases should be rare. 1337aec1d6eScindi * We will discard such ereports. An alternative may be to SERD them 1347aec1d6eScindi * on a per MC basis and trip if we see too many such events. 1357aec1d6eScindi */ 1367aec1d6eScindi 1377aec1d6eScindievent upset.memory.discard@cpu; 1387aec1d6eScindi 1397aec1d6eScindi/* #PAGE# 1407aec1d6eScindi * Page faults of all types diagnose to a single fault class and are 1417aec1d6eScindi * counted with a stat. 1427aec1d6eScindi * 1437aec1d6eScindi * Single-bit errors are diagnosed as upsets and feed into per-DIMM 1447aec1d6eScindi * SERD engines which diagnose fault.memory.page if they trip. 1457aec1d6eScindi */ 1467aec1d6eScindi 1477aec1d6eScindi#define PAGE_FIT 1 1487aec1d6eScindi#define PAGE_SB_COUNT 2 1497aec1d6eScindi#define PAGE_SB_TIME 72h 1507aec1d6eScindi#define PAGE_CK_COUNT 0 1517aec1d6eScindi#define PAGE_CK_TIME 1h 1527aec1d6eScindi 1537aec1d6eScindiengine stat.page_fault@dimm; 1547aec1d6eScindievent fault.memory.page@dimm, FITrate=PAGE_FIT, 1557aec1d6eScindi ASRU=dimm, message=0, count=stat.page_fault@dimm, 1567aec1d6eScindi action=confcall("rewrite-ASRU"); 1577aec1d6eScindievent error.memory.page_sb@dimm; 1587aec1d6eScindievent error.memory.page_ck@dimm; 1597aec1d6eScindievent error.memory.page_ue@dimm; 1607aec1d6eScindi 1617aec1d6eScindiprop fault.memory.page@dimm (1)-> 1627aec1d6eScindi error.memory.page_sb@dimm, 1637aec1d6eScindi error.memory.page_ck@dimm, 1647aec1d6eScindi error.memory.page_ue@dimm; 1657aec1d6eScindi 1667aec1d6eScindievent ereport.memory.page_sb_trip@dimm; 1677aec1d6eScindiengine serd.memory.page_sb@dimm, N=PAGE_SB_COUNT, T=PAGE_SB_TIME, 1687aec1d6eScindi method=persistent, trip=ereport.memory.page_sb_trip@dimm; 1697aec1d6eScindievent upset.memory.page_sb@dimm, engine=serd.memory.page_sb@dimm; 1707aec1d6eScindi 1717aec1d6eScindievent ereport.memory.page_ck_trip@dimm; 1727aec1d6eScindiengine serd.memory.page_ck@dimm, N=PAGE_CK_COUNT, T=PAGE_CK_TIME, 1737aec1d6eScindi method=persistent, trip=ereport.memory.page_ck_trip@dimm; 1747aec1d6eScindievent upset.memory.page_ck@dimm, engine=serd.memory.page_ck@dimm; 1757aec1d6eScindi 1767aec1d6eScindiprop upset.memory.page_sb@dimm (0)-> 1777aec1d6eScindi ereport.cpu.amd.nb.mem_ce@cpu { CONTAINS_DIMM && SINGLE_BIT_CE }; 1787aec1d6eScindi 1797aec1d6eScindiprop upset.memory.page_ck@dimm (0)-> 1807aec1d6eScindi ereport.cpu.amd.nb.mem_ce@cpu { CONTAINS_DIMM && MULTI_BIT_CE }; 1817aec1d6eScindi 1827aec1d6eScindiprop error.memory.page_sb@dimm (1)-> 1837aec1d6eScindi ereport.memory.page_sb_trip@dimm; 1847aec1d6eScindi 1857aec1d6eScindiprop error.memory.page_ck@dimm (1)-> 1867aec1d6eScindi ereport.memory.page_ck_trip@dimm; 1877aec1d6eScindi 1887aec1d6eScindiprop fault.memory.page@dimm { SET_ADDR && SET_OFFSET } (0)-> 1897aec1d6eScindi ereport.cpu.amd.nb.mem_ce@cpu { CONTAINS_DIMM && GET_ADDR && GET_OFFSET }; 1907aec1d6eScindi 1917aec1d6eScindiprop upset.memory.discard@cpu (1)-> 1927aec1d6eScindi ereport.cpu.amd.nb.mem_ce@cpu { !RESOURCE_EXISTS }; 1937aec1d6eScindi 1947aec1d6eScindi/* #DIMM_SB# 1957aec1d6eScindi * Single-bit DIMM faults are diagnosed when the number of page faults 1967aec1d6eScindi * (of all types since they all are counted in a single per-DIMM stat engine) 1977aec1d6eScindi * reaches a threshold. Since our tolerance of ChipKill and UE faults 1987aec1d6eScindi * is much lower than that for single-bit errors the threshold will only be 1997aec1d6eScindi * reached for repeated single-bit page faults. We do not stop diagnosing 2007aec1d6eScindi * further single-bit page faults once we have declared a single-bit DIMM 2017aec1d6eScindi * fault - we continue diagnosing them and response agents can continue to 2027aec1d6eScindi * retire those pages up to the system-imposed retirement limit. 2037aec1d6eScindi * 2047aec1d6eScindi * We maintain a parallel SERD engine to the page_sb engine which trips 2057aec1d6eScindi * in unison, but on trip it generates a distinct ereport which we 2067aec1d6eScindi * diagnose to a dimm_sb fault if the threshold has been reached, or 2077aec1d6eScindi * to a throwaway upset if not. 2087aec1d6eScindi */ 2097aec1d6eScindi 2107aec1d6eScindi#define DIMM_SB_FIT 2000 2117aec1d6eScindi#define DIMM_SB_THRESH 128 2127aec1d6eScindi 213*5f25dc2aSgavinmevent fault.memory.dimm_sb@dimm, FITrate=DIMM_SB_FIT, FRU=dimm, ASRU=dimm, 214*5f25dc2aSgavinm action=confcall("rewrite-ASRU"); 2157aec1d6eScindi 2167aec1d6eScindievent ereport.memory.dimm_sb_trip@dimm; 2177aec1d6eScindievent upset.memory.discard@dimm; 2187aec1d6eScindiengine serd.memory.dimm_sb@dimm, N=PAGE_SB_COUNT, T=PAGE_SB_TIME, 2197aec1d6eScindi method=persistent, trip=ereport.memory.dimm_sb_trip@dimm; 2207aec1d6eScindievent upset.memory.dimm_sb@dimm, engine=serd.memory.dimm_sb@dimm; 2217aec1d6eScindi 2227aec1d6eScindiprop upset.memory.dimm_sb@dimm (0)-> 2237aec1d6eScindi ereport.cpu.amd.nb.mem_ce@cpu { CONTAINS_DIMM }; /* sb and ck */ 2247aec1d6eScindi 2257aec1d6eScindiprop upset.memory.discard@dimm (1)-> 2267aec1d6eScindi ereport.memory.dimm_sb_trip@dimm; 2277aec1d6eScindi 2287aec1d6eScindiprop fault.memory.dimm_sb@dimm (0)-> 2297aec1d6eScindi ereport.memory.dimm_sb_trip@dimm { 2307aec1d6eScindi count(stat.page_fault@dimm) >= DIMM_SB_THRESH }; 2317aec1d6eScindi 2327aec1d6eScindi/* #DIMM_CK# 2337aec1d6eScindi * ChipKill-correctable multi-bit faults indicate a likely failing SDRAM 2347aec1d6eScindi * part. We will SERD them but with a very low/zero tolerance. 2357aec1d6eScindi */ 2367aec1d6eScindi 2377aec1d6eScindi#define DIMM_CK_FIT 4000 2387aec1d6eScindi#define DIMM_CK_COUNT 0 2397aec1d6eScindi#define DIMM_CK_TIME 1h 2407aec1d6eScindi 241*5f25dc2aSgavinmevent fault.memory.dimm_ck@dimm, FITrate=DIMM_CK_FIT, FRU=dimm, ASRU=dimm, 242*5f25dc2aSgavinm action=confcall("rewrite-ASRU"); 2437aec1d6eScindi 2447aec1d6eScindievent ereport.memory.dimm_ck_trip@dimm; 2457aec1d6eScindiengine serd.memory.dimm_ck@dimm, N=DIMM_CK_COUNT, T=DIMM_CK_TIME, 2467aec1d6eScindi method=persistent, trip=ereport.memory.dimm_ck_trip@dimm; 2477aec1d6eScindievent upset.memory.dimm_ck@dimm, engine=serd.memory.dimm_ck@dimm; 2487aec1d6eScindi 2497aec1d6eScindiprop upset.memory.dimm_ck@dimm (0)-> 2507aec1d6eScindi ereport.cpu.amd.nb.mem_ce@cpu { CONTAINS_DIMM && MULTI_BIT_CE }; 2517aec1d6eScindi 2527aec1d6eScindiprop fault.memory.dimm_ck@dimm (1)-> 2537aec1d6eScindi ereport.memory.dimm_ck_trip@dimm; 2547aec1d6eScindi 2557aec1d6eScindiprop fault.memory.page@dimm { SET_ADDR && SET_OFFSET } (0)-> 2567aec1d6eScindi ereport.cpu.amd.nb.mem_ce@cpu { CONTAINS_DIMM && MULTI_BIT_CE && 2577aec1d6eScindi GET_ADDR && GET_OFFSET }; 2587aec1d6eScindi 2597aec1d6eScindi/* #DIMM_UE# 2607aec1d6eScindi * A multi-bit fault in a memory dimm can cause: 2617aec1d6eScindi * 2627aec1d6eScindi * - ue : reported by nb for an access from a remote cpu 2637aec1d6eScindi * 2647aec1d6eScindi * Note we use a SERD engine here simply as a way of ensuring that we get 2657aec1d6eScindi * both dimm and page faults reported 2667aec1d6eScindi */ 2677aec1d6eScindi 2687aec1d6eScindi#define DIMM_UE_FIT 6000 2697aec1d6eScindi 2707aec1d6eScindievent ereport.cpu.amd.nb.mem_ue@cpu; 2717aec1d6eScindievent ereport.memory.page_ue_trip@dimm; 2727aec1d6eScindievent ereport.memory.dimm_ue_trip@dimm; 273*5f25dc2aSgavinmevent fault.memory.dimm_ue@dimm, FITrate=DIMM_UE_FIT, FRU=dimm, ASRU=dimm, 274*5f25dc2aSgavinm action=confcall("rewrite-ASRU"); 2757aec1d6eScindievent upset.memory.page_ue@dimm, engine=serd.memory.page_ue@dimm; 2767aec1d6eScindievent upset.memory.dimm_ue@dimm, engine=serd.memory.dimm_ue@dimm; 2777aec1d6eScindi 2787aec1d6eScindiengine serd.memory.dimm_ue@dimm, N=0, T=1h, 2797aec1d6eScindi method=persistent, trip=ereport.memory.dimm_ue_trip@dimm; 2807aec1d6eScindi 2817aec1d6eScindiengine serd.memory.page_ue@dimm, N=0, T=1h, 2827aec1d6eScindi method=persistent, trip=ereport.memory.page_ue_trip@dimm; 2837aec1d6eScindi 2847aec1d6eScindiprop upset.memory.page_ue@dimm (0)-> 2857aec1d6eScindi ereport.cpu.amd.nb.mem_ue@cpu { CONTAINS_DIMM }; 2867aec1d6eScindi 2877aec1d6eScindiprop upset.memory.dimm_ue@dimm (0)-> 2887aec1d6eScindi ereport.cpu.amd.nb.mem_ue@cpu { CONTAINS_DIMM }; 2897aec1d6eScindi 2907aec1d6eScindiprop error.memory.page_ue@dimm (1)-> 2917aec1d6eScindi ereport.memory.page_ue_trip@dimm; 2927aec1d6eScindi 2937aec1d6eScindiprop fault.memory.page@dimm { SET_ADDR && SET_OFFSET } (0)-> 2947aec1d6eScindi ereport.cpu.amd.nb.mem_ue@cpu { CONTAINS_DIMM && GET_ADDR & GET_OFFSET }; 2957aec1d6eScindi 2967aec1d6eScindiprop fault.memory.dimm_ue@dimm (1)-> 2977aec1d6eScindi ereport.memory.dimm_ue_trip@dimm; 2987aec1d6eScindi 2997aec1d6eScindiprop upset.memory.discard@cpu (1)-> 3007aec1d6eScindi ereport.cpu.amd.nb.mem_ce@cpu { !RESOURCE_EXISTS }; 3017aec1d6eScindi 3027aec1d6eScindi/* #L2D# 3037aec1d6eScindi * l2 cache data errors. 3047aec1d6eScindi */ 3057aec1d6eScindi 3067aec1d6eScindi#define L2CACHEDATA_FIT 1000 3077aec1d6eScindi#define L2CACHEDATA_SB_COUNT 3 3087aec1d6eScindi#define L2CACHEDATA_SB_TIME 12h 3097aec1d6eScindi 3107aec1d6eScindievent fault.cpu.amd.l2cachedata@chip/cpu, FITrate=L2CACHEDATA_FIT, 3117aec1d6eScindi FRU=chip, ASRU=chip/cpu; 3127aec1d6eScindievent error.cpu.amd.l2cachedata_sb@chip/cpu; 3137aec1d6eScindievent error.cpu.amd.l2cachedata_mb@chip/cpu; 3147aec1d6eScindi 3157aec1d6eScindiprop fault.cpu.amd.l2cachedata@chip/cpu (1)-> 3167aec1d6eScindi error.cpu.amd.l2cachedata_sb@chip/cpu, 3177aec1d6eScindi error.cpu.amd.l2cachedata_mb@chip/cpu; 3187aec1d6eScindi 3197aec1d6eScindi/* #L2D_SINGLE# 3207aec1d6eScindi * A single bit data array fault in an l2 cache can cause: 3217aec1d6eScindi * 3227aec1d6eScindi * - inf_l2_ecc1 : reported by ic on this cpu 3237aec1d6eScindi * - inf_l2_ecc1 : reported by dc on this cpu 3247aec1d6eScindi * - l2d_ecc1 : reported by bu on copyback or on snoop from another cpu 3257aec1d6eScindi * 3267aec1d6eScindi * Single-bit errors are diagnosed to cache upsets. SERD engines are used 3277aec1d6eScindi * to count upsets resulting from CEs. 3287aec1d6eScindi */ 3297aec1d6eScindi 3307aec1d6eScindievent ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu{within(5s)}; 3317aec1d6eScindievent ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu{within(5s)}; 3327aec1d6eScindievent ereport.cpu.amd.bu.l2d_ecc1@chip/cpu{within(5s)}; 3337aec1d6eScindievent ereport.cpu.amd.l2d_sb_trip@chip/cpu; 3347aec1d6eScindi 3357aec1d6eScindiengine serd.cpu.amd.l2d_sb@chip/cpu, 3367aec1d6eScindi N=L2CACHEDATA_SB_COUNT, T=L2CACHEDATA_SB_TIME, method=persistent, 3377aec1d6eScindi trip=ereport.cpu.amd.l2d_sb_trip@chip/cpu; 3387aec1d6eScindi 3397aec1d6eScindievent upset.cpu.amd.l2d_sb@chip/cpu, 3407aec1d6eScindi engine=serd.cpu.amd.l2d_sb@chip/cpu; 3417aec1d6eScindi 3427aec1d6eScindiprop upset.cpu.amd.l2d_sb@chip/cpu (1)-> 3437aec1d6eScindi ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu, 3447aec1d6eScindi ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu, 3457aec1d6eScindi ereport.cpu.amd.bu.l2d_ecc1@chip/cpu; 3467aec1d6eScindi 3477aec1d6eScindiprop error.cpu.amd.l2cachedata_sb@chip/cpu (1)-> 3487aec1d6eScindi ereport.cpu.amd.l2d_sb_trip@chip/cpu; 3497aec1d6eScindi 3507aec1d6eScindiprop fault.cpu.amd.l2cachedata@chip/cpu (0)-> 3517aec1d6eScindi ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu, 3527aec1d6eScindi ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu, 3537aec1d6eScindi ereport.cpu.amd.bu.l2d_ecc1@chip/cpu; 3547aec1d6eScindi 3557aec1d6eScindi/* #L2D_MULTI# 3567aec1d6eScindi * A multi-bit data array fault in an l2 cache can cause: 3577aec1d6eScindi * 3587aec1d6eScindi * - inf_l2_eccm : reported by ic on this cpu 3597aec1d6eScindi * - inf_l2_eccm : reported by dc on this cpu 3607aec1d6eScindi * - l2d_eccm : reported by bu on copyback or on snoop from another cpu 3617aec1d6eScindi */ 3627aec1d6eScindi 3637aec1d6eScindievent ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu; 3647aec1d6eScindievent ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu; 3657aec1d6eScindievent ereport.cpu.amd.bu.l2d_eccm@chip/cpu; 3667aec1d6eScindi 3677aec1d6eScindiprop error.cpu.amd.l2cachedata_mb@chip/cpu (1)-> 3687aec1d6eScindi ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu, 3697aec1d6eScindi ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu, 3707aec1d6eScindi ereport.cpu.amd.bu.l2d_eccm@chip/cpu; 3717aec1d6eScindi 3727aec1d6eScindiprop fault.cpu.amd.l2cachedata@chip/cpu (0)-> 3737aec1d6eScindi ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu, 3747aec1d6eScindi ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu, 3757aec1d6eScindi ereport.cpu.amd.bu.l2d_eccm@chip/cpu; 3767aec1d6eScindi 3777aec1d6eScindi/* #L2T# 3787aec1d6eScindi * l2 cache main tag errors 3797aec1d6eScindi */ 3807aec1d6eScindi 3817aec1d6eScindi#define L2CACHETAG_FIT 1000 3827aec1d6eScindi#define L2CACHETAG_SB_COUNT 3 3837aec1d6eScindi#define L2CACHETAG_SB_TIME 12h 3847aec1d6eScindi 3857aec1d6eScindievent fault.cpu.amd.l2cachetag@chip/cpu, FITrate=L2CACHETAG_FIT, 3867aec1d6eScindi FRU=chip, ASRU=chip/cpu; 3877aec1d6eScindievent error.cpu.amd.l2cachetag_sb@chip/cpu; 3887aec1d6eScindievent error.cpu.amd.l2cachetag_mb@chip/cpu; 3897aec1d6eScindi 3907aec1d6eScindiprop fault.cpu.amd.l2cachetag@chip/cpu (1)-> 3917aec1d6eScindi error.cpu.amd.l2cachetag_sb@chip/cpu, 3927aec1d6eScindi error.cpu.amd.l2cachetag_mb@chip/cpu; 3937aec1d6eScindi 3947aec1d6eScindi/* #L2T_SINGLE# 3957aec1d6eScindi * A single bit tag array fault in an l2 cache can cause: 3967aec1d6eScindi * 3977aec1d6eScindi * - l2t_ecc1 : reported by bu on this cpu when detected during snoop 3987aec1d6eScindi * - l2t_par : reported by bu on this cpu when detected other than during snoop 3997aec1d6eScindi * 4007aec1d6eScindi * Note that the bu.l2t_par ereport could be due to a single bit or multi bit 4017aec1d6eScindi * event. If the l2t_sb_trip has already triggered it will be treated as another 4027aec1d6eScindi * ce, otherwise it will be treated as a ue event. 4037aec1d6eScindi */ 4047aec1d6eScindi 4057aec1d6eScindievent ereport.cpu.amd.bu.l2t_ecc1@chip/cpu{within(5s)}; 4067aec1d6eScindievent ereport.cpu.amd.bu.l2t_par@chip/cpu; 4077aec1d6eScindievent ereport.cpu.amd.l2t_sb_trip@chip/cpu; 4087aec1d6eScindi 4097aec1d6eScindiengine serd.cpu.amd.l2t_sb@chip/cpu, 4107aec1d6eScindi N=L2CACHETAG_SB_COUNT, T=L2CACHETAG_SB_TIME, method=persistent, 4117aec1d6eScindi trip=ereport.cpu.amd.l2t_sb_trip@chip/cpu; 4127aec1d6eScindi 4137aec1d6eScindievent upset.cpu.amd.l2t_sb@chip/cpu, 4147aec1d6eScindi engine=serd.cpu.amd.l2t_sb@chip/cpu; 4157aec1d6eScindi 4167aec1d6eScindiprop upset.cpu.amd.l2t_sb@chip/cpu (1)-> 4177aec1d6eScindi ereport.cpu.amd.bu.l2t_ecc1@chip/cpu, 4187aec1d6eScindi ereport.cpu.amd.bu.l2t_par@chip/cpu; 4197aec1d6eScindi 4207aec1d6eScindiprop error.cpu.amd.l2cachetag_sb@chip/cpu (1)-> 4217aec1d6eScindi ereport.cpu.amd.l2t_sb_trip@chip/cpu; 4227aec1d6eScindi 4237aec1d6eScindiprop fault.cpu.amd.l2cachetag@chip/cpu (0)-> 4247aec1d6eScindi ereport.cpu.amd.bu.l2t_ecc1@chip/cpu, 4257aec1d6eScindi ereport.cpu.amd.bu.l2t_par@chip/cpu; 4267aec1d6eScindi 4277aec1d6eScindi/* #L2T_MULTI# 4287aec1d6eScindi * A multi-bit tag array fault in an l2 cache can cause: 4297aec1d6eScindi * 4307aec1d6eScindi * - l2t_eccm : reported by bu on this cpu when detected during snoop 4317aec1d6eScindi * - l2t_par : reported by bu on this cpu when detected other than during snoop 4327aec1d6eScindi */ 4337aec1d6eScindi 4347aec1d6eScindievent ereport.cpu.amd.bu.l2t_eccm@chip/cpu; 4357aec1d6eScindi 4367aec1d6eScindiprop error.cpu.amd.l2cachetag_mb@chip/cpu (1)-> 4377aec1d6eScindi ereport.cpu.amd.bu.l2t_eccm@chip/cpu, 4387aec1d6eScindi ereport.cpu.amd.bu.l2t_par@chip/cpu; 4397aec1d6eScindi 4407aec1d6eScindiprop fault.cpu.amd.l2cachetag@chip/cpu (0)-> 4417aec1d6eScindi ereport.cpu.amd.bu.l2t_eccm@chip/cpu, 4427aec1d6eScindi ereport.cpu.amd.bu.l2t_par@chip/cpu; 4437aec1d6eScindi 4447aec1d6eScindi/* #ICD_PAR# 4457aec1d6eScindi * A data array parity fault in an I cache can cause: 4467aec1d6eScindi * 4477aec1d6eScindi * - data_par : reported by ic on this cpu 4487aec1d6eScindi */ 4497aec1d6eScindi 4507aec1d6eScindi#define ICACHEDATA_FIT 1000 4517aec1d6eScindi#define ICACHEDATA_SB_COUNT 2 4527aec1d6eScindi#define ICACHEDATA_SB_TIME 168h 4537aec1d6eScindi 4547aec1d6eScindievent ereport.cpu.amd.ic.data_par@chip/cpu{within(5s)}; 4557aec1d6eScindievent ereport.cpu.amd.ic_dp_trip@chip/cpu; 4567aec1d6eScindi 4577aec1d6eScindievent fault.cpu.amd.icachedata@chip/cpu, FITrate=ICACHEDATA_FIT, 4587aec1d6eScindi FRU=chip, ASRU=chip/cpu; 4597aec1d6eScindi 4607aec1d6eScindiengine serd.cpu.amd.icachedata@chip/cpu, 4617aec1d6eScindi N=ICACHEDATA_SB_COUNT, T=ICACHEDATA_SB_TIME, method=persistent, 4627aec1d6eScindi trip=ereport.cpu.amd.ic_dp_trip@chip/cpu; 4637aec1d6eScindi 4647aec1d6eScindievent upset.cpu.amd.icachedata@chip/cpu, 4657aec1d6eScindi engine=serd.cpu.amd.icachedata@chip/cpu; 4667aec1d6eScindi 4677aec1d6eScindiprop upset.cpu.amd.icachedata@chip/cpu (1)-> 4687aec1d6eScindi ereport.cpu.amd.ic.data_par@chip/cpu; 4697aec1d6eScindi 4707aec1d6eScindiprop fault.cpu.amd.icachedata@chip/cpu (1)-> 4717aec1d6eScindi ereport.cpu.amd.ic_dp_trip@chip/cpu; 4727aec1d6eScindi 4737aec1d6eScindiprop fault.cpu.amd.icachedata@chip/cpu (0)-> 4747aec1d6eScindi ereport.cpu.amd.ic.data_par@chip/cpu; 4757aec1d6eScindi 4767aec1d6eScindi/* #ICT_PAR# 4777aec1d6eScindi * A tag array parity fault in an I cache can cause: 4787aec1d6eScindi * 4797aec1d6eScindi * - tag_par : reported by ic on this cpu 4807aec1d6eScindi */ 4817aec1d6eScindi 4827aec1d6eScindi#define ICACHETAG_FIT 1000 4837aec1d6eScindi#define ICACHETAG_SB_COUNT 2 4847aec1d6eScindi#define ICACHETAG_SB_TIME 168h 4857aec1d6eScindi 4867aec1d6eScindievent ereport.cpu.amd.ic.tag_par@chip/cpu{within(5s)}; 4877aec1d6eScindievent ereport.cpu.amd.ic_tp_trip@chip/cpu; 4887aec1d6eScindi 4897aec1d6eScindievent fault.cpu.amd.icachetag@chip/cpu, FITrate=ICACHETAG_FIT, 4907aec1d6eScindi FRU=chip, ASRU=chip/cpu; 4917aec1d6eScindi 4927aec1d6eScindiengine serd.cpu.amd.icachetag@chip/cpu, 4937aec1d6eScindi N=ICACHETAG_SB_COUNT, T=ICACHETAG_SB_TIME, method=persistent, 4947aec1d6eScindi trip=ereport.cpu.amd.ic_tp_trip@chip/cpu; 4957aec1d6eScindi 4967aec1d6eScindievent upset.cpu.amd.icachetag@chip/cpu, 4977aec1d6eScindi engine=serd.cpu.amd.icachetag@chip/cpu; 4987aec1d6eScindi 4997aec1d6eScindiprop upset.cpu.amd.icachetag@chip/cpu (1)-> 5007aec1d6eScindi ereport.cpu.amd.ic.tag_par@chip/cpu; 5017aec1d6eScindi 5027aec1d6eScindiprop fault.cpu.amd.icachetag@chip/cpu (1)-> 5037aec1d6eScindi ereport.cpu.amd.ic_tp_trip@chip/cpu; 5047aec1d6eScindi 5057aec1d6eScindiprop fault.cpu.amd.icachetag@chip/cpu (0)-> 5067aec1d6eScindi ereport.cpu.amd.ic.tag_par@chip/cpu; 5077aec1d6eScindi 5087aec1d6eScindi/* #ICT_SNOOP# 5097aec1d6eScindi * A snoop tag array parity fault in an I cache can cause: 5107aec1d6eScindi * 5117aec1d6eScindi * - stag_par : reported by ic on this cpu 5127aec1d6eScindi */ 5137aec1d6eScindi 5147aec1d6eScindi#define ICACHESTAG_FIT 1000 5157aec1d6eScindi 5167aec1d6eScindievent ereport.cpu.amd.ic.stag_par@chip/cpu{within(5s)}; 5177aec1d6eScindi 5187aec1d6eScindievent fault.cpu.amd.icachestag@chip/cpu, FITrate=ICACHESTAG_FIT, 5197aec1d6eScindi FRU=chip, ASRU=chip/cpu; 5207aec1d6eScindi 5217aec1d6eScindiprop fault.cpu.amd.icachestag@chip/cpu (1)-> 5227aec1d6eScindi ereport.cpu.amd.ic.stag_par@chip/cpu; 5237aec1d6eScindi 5247aec1d6eScindi/* #ICTLB_1# 5257aec1d6eScindi * An l1tlb parity fault in an I cache can cause: 5267aec1d6eScindi * 5277aec1d6eScindi * - l1tlb_par : reported by ic on this cpu 5287aec1d6eScindi */ 5297aec1d6eScindi 5307aec1d6eScindi#define ICACHEL1TLB_FIT 1000 5317aec1d6eScindi#define ICACHEL1TLB_SB_COUNT 2 5327aec1d6eScindi#define ICACHEL1TLB_SB_TIME 168h 5337aec1d6eScindi 5347aec1d6eScindievent ereport.cpu.amd.ic.l1tlb_par@chip/cpu{within(5s)}; 5357aec1d6eScindievent ereport.cpu.amd.ic_l1tlb_trip@chip/cpu; 5367aec1d6eScindi 5377aec1d6eScindievent fault.cpu.amd.l1itlb@chip/cpu, FITrate=ICACHEL1TLB_FIT, 5387aec1d6eScindi FRU=chip, ASRU=chip/cpu; 5397aec1d6eScindi 5407aec1d6eScindiengine serd.cpu.amd.l1itlb@chip/cpu, 5417aec1d6eScindi N=ICACHEL1TLB_SB_COUNT, T=ICACHEL1TLB_SB_TIME, method=persistent, 5427aec1d6eScindi trip=ereport.cpu.amd.ic_l1tlb_trip@chip/cpu; 5437aec1d6eScindi 5447aec1d6eScindievent upset.cpu.amd.l1itlb@chip/cpu, 5457aec1d6eScindi engine=serd.cpu.amd.l1itlb@chip/cpu; 5467aec1d6eScindi 5477aec1d6eScindiprop upset.cpu.amd.l1itlb@chip/cpu (1)-> 5487aec1d6eScindi ereport.cpu.amd.ic.l1tlb_par@chip/cpu; 5497aec1d6eScindi 5507aec1d6eScindiprop fault.cpu.amd.l1itlb@chip/cpu (1)-> 5517aec1d6eScindi ereport.cpu.amd.ic_l1tlb_trip@chip/cpu; 5527aec1d6eScindi 5537aec1d6eScindiprop fault.cpu.amd.l1itlb@chip/cpu (0)-> 5547aec1d6eScindi ereport.cpu.amd.ic.l1tlb_par@chip/cpu; 5557aec1d6eScindi 5567aec1d6eScindi/* #ICTLB_2# 5577aec1d6eScindi * An l2tlb parity fault in an I cache can cause: 5587aec1d6eScindi * 5597aec1d6eScindi * - l2tlb_par : reported by ic on this cpu 5607aec1d6eScindi */ 5617aec1d6eScindi 5627aec1d6eScindi#define ICACHEL2TLB_FIT 1000 5637aec1d6eScindi#define ICACHEL2TLB_SB_COUNT 2 5647aec1d6eScindi#define ICACHEL2TLB_SB_TIME 168h 5657aec1d6eScindi 5667aec1d6eScindievent ereport.cpu.amd.ic.l2tlb_par@chip/cpu{within(5s)}; 5677aec1d6eScindievent ereport.cpu.amd.ic_l2tlb_trip@chip/cpu; 5687aec1d6eScindi 5697aec1d6eScindievent fault.cpu.amd.l2itlb@chip/cpu, FITrate=ICACHEL2TLB_FIT, 5707aec1d6eScindi FRU=chip, ASRU=chip/cpu; 5717aec1d6eScindi 5727aec1d6eScindiengine serd.cpu.amd.l2itlb@chip/cpu, 5737aec1d6eScindi N=ICACHEL2TLB_SB_COUNT, T=ICACHEL2TLB_SB_TIME, method=persistent, 5747aec1d6eScindi trip=ereport.cpu.amd.ic_l2tlb_trip@chip/cpu; 5757aec1d6eScindi 5767aec1d6eScindievent upset.cpu.amd.l2itlb@chip/cpu, 5777aec1d6eScindi engine=serd.cpu.amd.l2itlb@chip/cpu; 5787aec1d6eScindi 5797aec1d6eScindiprop upset.cpu.amd.l2itlb@chip/cpu (1)-> 5807aec1d6eScindi ereport.cpu.amd.ic.l2tlb_par@chip/cpu; 5817aec1d6eScindi 5827aec1d6eScindiprop fault.cpu.amd.l2itlb@chip/cpu (1)-> 5837aec1d6eScindi ereport.cpu.amd.ic_l2tlb_trip@chip/cpu; 5847aec1d6eScindi 5857aec1d6eScindiprop fault.cpu.amd.l2itlb@chip/cpu (0)-> 5867aec1d6eScindi ereport.cpu.amd.ic.l2tlb_par@chip/cpu; 5877aec1d6eScindi 5887aec1d6eScindi/* #DCD# 5897aec1d6eScindi * dcache data errors 5907aec1d6eScindi */ 5917aec1d6eScindi 5927aec1d6eScindi#define DCACHEDATA_FIT 1000 5937aec1d6eScindi#define DCACHEDATA_SB_COUNT 2 5947aec1d6eScindi#define DCACHEDATA_SB_TIME 168h 5957aec1d6eScindi 5967aec1d6eScindievent fault.cpu.amd.dcachedata@chip/cpu, FITrate=DCACHEDATA_FIT, 5977aec1d6eScindi FRU=chip, ASRU=chip/cpu; 5987aec1d6eScindievent error.cpu.amd.dcachedata_sb@chip/cpu; 5997aec1d6eScindievent error.cpu.amd.dcachedata_mb@chip/cpu; 6007aec1d6eScindi 6017aec1d6eScindiprop fault.cpu.amd.dcachedata@chip/cpu (1)-> 6027aec1d6eScindi error.cpu.amd.dcachedata_sb@chip/cpu, 6037aec1d6eScindi error.cpu.amd.dcachedata_mb@chip/cpu; 6047aec1d6eScindi 6057aec1d6eScindi/* #DCD_SINGLE# 6067aec1d6eScindi * A single bit data array fault in an D cache can cause: 6077aec1d6eScindi * 6087aec1d6eScindi * - data_ecc1 : reported by dc on this cpu by scrubber 6097aec1d6eScindi * - data_ecc1_uc : reported by dc on this cpu other than by scrubber 6107aec1d6eScindi * 6117aec1d6eScindi * Make data_ecc1_uc fault immediately as it may have caused a panic 6127aec1d6eScindi */ 6137aec1d6eScindi 6147aec1d6eScindievent ereport.cpu.amd.dc.data_ecc1@chip/cpu{within(5s)}; 6157aec1d6eScindievent ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu{within(5s)}; 6167aec1d6eScindievent ereport.cpu.amd.dc_sb_trip@chip/cpu; 6177aec1d6eScindi 6187aec1d6eScindiengine serd.cpu.amd.dc_sb@chip/cpu, 6197aec1d6eScindi N=DCACHEDATA_SB_COUNT, T=DCACHEDATA_SB_TIME, method=persistent, 6207aec1d6eScindi trip=ereport.cpu.amd.dc_sb_trip@chip/cpu; 6217aec1d6eScindi 6227aec1d6eScindiengine serd.cpu.amd.dc_sb_uc@chip/cpu, 6237aec1d6eScindi N=0, T=1hr, method=persistent, 6247aec1d6eScindi trip=ereport.cpu.amd.dc_sb_trip@chip/cpu; 6257aec1d6eScindi 6267aec1d6eScindievent upset.cpu.amd.dc_sb@chip/cpu, 6277aec1d6eScindi engine=serd.cpu.amd.dc_sb@chip/cpu; 6287aec1d6eScindi 6297aec1d6eScindievent upset.cpu.amd.dc_sb_uc@chip/cpu, 6307aec1d6eScindi engine=serd.cpu.amd.dc_sb_uc@chip/cpu; 6317aec1d6eScindi 6327aec1d6eScindiprop upset.cpu.amd.dc_sb@chip/cpu (1)-> 6337aec1d6eScindi ereport.cpu.amd.dc.data_ecc1@chip/cpu; 6347aec1d6eScindi 6357aec1d6eScindiprop upset.cpu.amd.dc_sb_uc@chip/cpu (1)-> 6367aec1d6eScindi ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu; 6377aec1d6eScindi 6387aec1d6eScindiprop error.cpu.amd.dcachedata_sb@chip/cpu (1)-> 6397aec1d6eScindi ereport.cpu.amd.dc_sb_trip@chip/cpu; 6407aec1d6eScindi 6417aec1d6eScindiprop fault.cpu.amd.dcachedata@chip/cpu (0)-> 6427aec1d6eScindi ereport.cpu.amd.dc.data_ecc1@chip/cpu, 6437aec1d6eScindi ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu; 6447aec1d6eScindi 6457aec1d6eScindi/* #DCD_MULTI# 6467aec1d6eScindi * A multi-bit data array fault in an D cache can cause: 6477aec1d6eScindi * 6487aec1d6eScindi * - data_eccm : reported by dc on this cpu 6497aec1d6eScindi */ 6507aec1d6eScindi 6517aec1d6eScindievent ereport.cpu.amd.dc.data_eccm@chip/cpu; 6527aec1d6eScindi 6537aec1d6eScindiprop error.cpu.amd.dcachedata_mb@chip/cpu (1)-> 6547aec1d6eScindi ereport.cpu.amd.dc.data_eccm@chip/cpu; 6557aec1d6eScindi 6567aec1d6eScindiprop fault.cpu.amd.dcachedata@chip/cpu (0)-> 6577aec1d6eScindi ereport.cpu.amd.dc.data_eccm@chip/cpu; 6587aec1d6eScindi 6597aec1d6eScindi/* #DCT_PAR# 6607aec1d6eScindi * A tag array parity fault in an D cache can cause: 6617aec1d6eScindi * 6627aec1d6eScindi * - tag_par : reported by dc on this cpu 6637aec1d6eScindi */ 6647aec1d6eScindi 6657aec1d6eScindi#define DCACHETAG_FIT 1000 6667aec1d6eScindi 6677aec1d6eScindievent ereport.cpu.amd.dc.tag_par@chip/cpu{within(5s)}; 6687aec1d6eScindi 6697aec1d6eScindievent fault.cpu.amd.dcachetag@chip/cpu, FITrate=DCACHETAG_FIT, 6707aec1d6eScindi FRU=chip, ASRU=chip/cpu; 6717aec1d6eScindi 6727aec1d6eScindiprop fault.cpu.amd.dcachetag@chip/cpu (1)-> 6737aec1d6eScindi ereport.cpu.amd.dc.tag_par@chip/cpu; 6747aec1d6eScindi 6757aec1d6eScindi/* #DCT_SNOOP# 6767aec1d6eScindi * A snoop tag array parity fault in an D cache can cause: 6777aec1d6eScindi * 6787aec1d6eScindi * - stag_par : reported by dc on this cpu 6797aec1d6eScindi */ 6807aec1d6eScindi 6817aec1d6eScindi#define DCACHESTAG_FIT 1000 6827aec1d6eScindi 6837aec1d6eScindievent ereport.cpu.amd.dc.stag_par@chip/cpu{within(5s)}; 6847aec1d6eScindi 6857aec1d6eScindievent fault.cpu.amd.dcachestag@chip/cpu, FITrate=DCACHESTAG_FIT, 6867aec1d6eScindi FRU=chip, ASRU=chip/cpu; 6877aec1d6eScindi 6887aec1d6eScindiprop fault.cpu.amd.dcachestag@chip/cpu (1)-> 6897aec1d6eScindi ereport.cpu.amd.dc.stag_par@chip/cpu; 6907aec1d6eScindi 6917aec1d6eScindi/* #DCTLB_1# 6927aec1d6eScindi * An l1tlb parity fault in an D cache can cause: 6937aec1d6eScindi * 6947aec1d6eScindi * - l1tlb_par : reported by dc on this cpu 6957aec1d6eScindi */ 6967aec1d6eScindi 6977aec1d6eScindi#define L1DTLB_FIT 1000 6987aec1d6eScindi 6997aec1d6eScindievent ereport.cpu.amd.dc.l1tlb_par@chip/cpu{within(5s)}; 7007aec1d6eScindi 7017aec1d6eScindievent fault.cpu.amd.l1dtlb@chip/cpu, FITrate=L1DTLB_FIT, 7027aec1d6eScindi FRU=chip, ASRU=chip/cpu; 7037aec1d6eScindi 7047aec1d6eScindiprop fault.cpu.amd.l1dtlb@chip/cpu (1)-> 7057aec1d6eScindi ereport.cpu.amd.dc.l1tlb_par@chip/cpu; 7067aec1d6eScindi 7077aec1d6eScindi/* #DCTLB_2# 7087aec1d6eScindi * An l2tlb parity fault in an D cache can cause: 7097aec1d6eScindi * 7107aec1d6eScindi * - l2tlb_par : reported by dc on this cpu 7117aec1d6eScindi */ 7127aec1d6eScindi 7137aec1d6eScindi#define L2DTLB_FIT 1000 7147aec1d6eScindi 7157aec1d6eScindievent ereport.cpu.amd.dc.l2tlb_par@chip/cpu{within(5s)}; 7167aec1d6eScindi 7177aec1d6eScindievent fault.cpu.amd.l2dtlb@chip/cpu, FITrate=L2DTLB_FIT, 7187aec1d6eScindi FRU=chip, ASRU=chip/cpu; 7197aec1d6eScindi 7207aec1d6eScindiprop fault.cpu.amd.l2dtlb@chip/cpu (1)-> 7217aec1d6eScindi ereport.cpu.amd.dc.l2tlb_par@chip/cpu; 7227aec1d6eScindi 7237aec1d6eScindi/* #DPATH_SB# 7247aec1d6eScindi * Datapath errors between NB/MC and core. 7257aec1d6eScindi */ 7267aec1d6eScindi 7277aec1d6eScindi#define CPU_DP_FIT 1000 7287aec1d6eScindi 7297aec1d6eScindievent fault.cpu.amd.datapath@chip/cpu, FITrate=CPU_DP_FIT, FRU=chip, 7307aec1d6eScindi ASRU=chip/cpu; 7317aec1d6eScindievent error.cpu.amd.datapath_sb@chip/cpu; 7327aec1d6eScindievent error.cpu.amd.datapath_mb@chip/cpu; 7337aec1d6eScindi 7347aec1d6eScindiprop fault.cpu.amd.datapath@chip/cpu (1)-> 7357aec1d6eScindi error.cpu.amd.datapath_sb@chip/cpu, 7367aec1d6eScindi error.cpu.amd.datapath_mb@chip/cpu; 7377aec1d6eScindi 7387aec1d6eScindi/* 7397aec1d6eScindi * A single bit fault in the datapath between the NB and requesting core 7407aec1d6eScindi * can cause: 7417aec1d6eScindi * 7427aec1d6eScindi * - inf_sys_ecc1 : reported by ic on access from a local cpu 7437aec1d6eScindi * - inf_sys_ecc1 : reported by dc on access from a local cpu 7447aec1d6eScindi * - s_ecc1 : reported by bu on access from a local cpu (hw prefetch etc) 7457aec1d6eScindi */ 7467aec1d6eScindi 7477aec1d6eScindi#define CPU_DP_COUNT 3 7487aec1d6eScindi#define CPU_DP_TIME 12h 7497aec1d6eScindi 7507aec1d6eScindievent ereport.cpu.amd.ic.inf_sys_ecc1@chip/cpu{within(5s)}; 7517aec1d6eScindievent ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu{within(5s)}; 7527aec1d6eScindievent ereport.cpu.amd.bu.s_ecc1@chip/cpu{within(5s)}; 7537aec1d6eScindievent upset.cpu.dp_sb@chip/cpu, engine=serd.cpu.dp_sb@chip/cpu; 7547aec1d6eScindievent ereport.cpu.amd.dp_sb_trip@chip/cpu; 7557aec1d6eScindi 7567aec1d6eScindiengine serd.cpu.dp_sb@chip/cpu, N=CPU_DP_COUNT, T=CPU_DP_TIME, 7577aec1d6eScindi method=persistent, trip=ereport.cpu.amd.dp_sb_trip@chip/cpu; 7587aec1d6eScindi 7597aec1d6eScindiprop upset.cpu.dp_sb@chip/cpu (1)-> 7607aec1d6eScindi ereport.cpu.amd.ic.inf_sys_ecc1@chip/cpu, 7617aec1d6eScindi ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu, 7627aec1d6eScindi ereport.cpu.amd.bu.s_ecc1@chip/cpu; 7637aec1d6eScindi 7647aec1d6eScindiprop error.cpu.amd.datapath_sb@chip/cpu (1)-> 7657aec1d6eScindi ereport.cpu.amd.dp_sb_trip@chip/cpu; 7667aec1d6eScindi 7677aec1d6eScindiprop fault.cpu.amd.datapath@chip/cpu (0)-> 7687aec1d6eScindi ereport.cpu.amd.ic.inf_sys_ecc1@chip/cpu, 7697aec1d6eScindi ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu, 7707aec1d6eScindi ereport.cpu.amd.bu.s_ecc1@chip/cpu; 7717aec1d6eScindi 7727aec1d6eScindi/* #DPATH_MB# 7737aec1d6eScindi * A multi-bit fault in the datapath between the NB and requesting core 7747aec1d6eScindi * can cause: 7757aec1d6eScindi * 7767aec1d6eScindi * - inf_sys_eccm : reported by ic on access from a local cpu 7777aec1d6eScindi * - inf_sys_eccm : reported by dc on access from a local cpu 7787aec1d6eScindi * - s_eccm : reported by bu on access from a local cpu (hw prefetch etc) 7797aec1d6eScindi */ 7807aec1d6eScindi 7817aec1d6eScindievent ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu; 7827aec1d6eScindievent ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu; 7837aec1d6eScindievent ereport.cpu.amd.bu.s_eccm@chip/cpu; 7847aec1d6eScindi 7857aec1d6eScindiprop error.cpu.amd.datapath_mb@chip/cpu (1)-> 7867aec1d6eScindi ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu, 7877aec1d6eScindi ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu, 7887aec1d6eScindi ereport.cpu.amd.bu.s_eccm@chip/cpu; 7897aec1d6eScindi 7907aec1d6eScindiprop fault.cpu.amd.datapath@chip/cpu (0)-> 7917aec1d6eScindi ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu, 7927aec1d6eScindi ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu, 7937aec1d6eScindi ereport.cpu.amd.bu.s_eccm@chip/cpu; 7947aec1d6eScindi 7957aec1d6eScindi/* 7967aec1d6eScindi * Ereports that should not normally happen and which we will discard 7977aec1d6eScindi * without diagnosis if they do. These fall into a few categories: 7987aec1d6eScindi * 7997aec1d6eScindi * - the corresponding detector is not enabled, typically because 8007aec1d6eScindi * detection/handling of the event is taking place elsewhere 8017aec1d6eScindi * (nb.ma, nb.ta, ls.rde, ic.rdde, bu.s_rde, nb.gart_walk) 8027aec1d6eScindi * - the event is associated with a sync flood so even if the detector is 8037aec1d6eScindi * enabled we will never handle the event and generate an ereport *and* 8047aec1d6eScindi * even if the ereport did arrive we could perform no useful diagnosis 8057aec1d6eScindi * e.g., the NB can be configured for sync flood on nb.mem_eccm 8067aec1d6eScindi * but we don't choose to discard that ereport here since we could have 8077aec1d6eScindi * made a useful diagnosis from it had it been delivered 8087aec1d6eScindi * (nb.ht_sync, nb.ht_crc) 8097aec1d6eScindi * - events that will be accompanied by an immediate panic and 8107aec1d6eScindi * delivery of the ereport during subsequent reboot but from 8117aec1d6eScindi * which no useful diagnosis can be made. (nb.rmw, nb.wdog) 8127aec1d6eScindi * 8137aec1d6eScindi * Ereports for all of these can be generated by error simulation and 8147aec1d6eScindi * injection. We will perform a null diagnosos of all these ereports in order 8157aec1d6eScindi * to avoid "no subscription" complaints during test harness runs. 8167aec1d6eScindi */ 8177aec1d6eScindi 8187aec1d6eScindievent ereport.cpu.amd.nb.ma@cpu; 8197aec1d6eScindievent ereport.cpu.amd.nb.ta@cpu; 8207aec1d6eScindievent ereport.cpu.amd.ls.s_rde@cpu; 8217aec1d6eScindievent ereport.cpu.amd.ic.rdde@cpu; 8227aec1d6eScindievent ereport.cpu.amd.bu.s_rde@cpu; 8237aec1d6eScindievent ereport.cpu.amd.nb.gart_walk@cpu; 8247aec1d6eScindievent ereport.cpu.amd.nb.ht_sync@cpu; 8257aec1d6eScindievent ereport.cpu.amd.nb.ht_crc@cpu; 8267aec1d6eScindievent ereport.cpu.amd.nb.rmw@cpu; 8277aec1d6eScindievent ereport.cpu.amd.nb.wdog@cpu; 8287aec1d6eScindievent ereport.cpu.amd.unknown@cpu; 8297aec1d6eScindi 8307aec1d6eScindievent upset.null_diag@cpu; 8317aec1d6eScindi 8327aec1d6eScindiprop upset.null_diag@cpu (1)-> 8337aec1d6eScindi ereport.cpu.amd.nb.ma@cpu, 8347aec1d6eScindi ereport.cpu.amd.nb.ta@cpu, 8357aec1d6eScindi ereport.cpu.amd.ls.s_rde@cpu, 8367aec1d6eScindi ereport.cpu.amd.ic.rdde@cpu, 8377aec1d6eScindi ereport.cpu.amd.bu.s_rde@cpu, 8387aec1d6eScindi ereport.cpu.amd.nb.gart_walk@cpu, 8397aec1d6eScindi ereport.cpu.amd.nb.ht_sync@cpu, 8407aec1d6eScindi ereport.cpu.amd.nb.ht_crc@cpu, 8417aec1d6eScindi ereport.cpu.amd.nb.rmw@cpu, 8427aec1d6eScindi ereport.cpu.amd.nb.wdog@cpu, 8437aec1d6eScindi ereport.cpu.amd.unknown@cpu; 844