17aec1d6eScindi/*
27aec1d6eScindi * CDDL HEADER START
37aec1d6eScindi *
47aec1d6eScindi * The contents of this file are subject to the terms of the
55f25dc2aSgavinm * Common Development and Distribution License (the "License").
65f25dc2aSgavinm * You may not use this file except in compliance with the License.
77aec1d6eScindi *
87aec1d6eScindi * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97aec1d6eScindi * or http://www.opensolaris.org/os/licensing.
107aec1d6eScindi * See the License for the specific language governing permissions
117aec1d6eScindi * and limitations under the License.
127aec1d6eScindi *
137aec1d6eScindi * When distributing Covered Code, include this CDDL HEADER in each
147aec1d6eScindi * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157aec1d6eScindi * If applicable, add the following below this CDDL HEADER, with the
167aec1d6eScindi * fields enclosed by brackets "[]" replaced with your own identifying
177aec1d6eScindi * information: Portions Copyright [yyyy] [name of copyright owner]
187aec1d6eScindi *
197aec1d6eScindi * CDDL HEADER END
207aec1d6eScindi */
217aec1d6eScindi
227aec1d6eScindi/*
23b5016cbbSstephh * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
247aec1d6eScindi * Use is subject to license terms.
257aec1d6eScindi */
267aec1d6eScindi
277aec1d6eScindi#pragma ident	"%Z%%M%	%I%	%E% SMI"
287aec1d6eScindi
297aec1d6eScindi#pragma dictionary "AMD"
307aec1d6eScindi
317aec1d6eScindi/*
327aec1d6eScindi * Eversholt rules for the AMD Opteron CPU/Memory
337aec1d6eScindi */
347aec1d6eScindi
358a40a695Sgavinmfru motherboard;
368a40a695Sgavinmfru chip;
377aec1d6eScindifru dimm;
387aec1d6eScindi
397aec1d6eScindiasru chip/cpu;
408a40a695Sgavinmasru dimm;
418a40a695Sgavinmasru dimm/rank;
428a40a695Sgavinmasru dram-channel;
438a40a695Sgavinmasru chip/memory-controller/chip-select;
447aec1d6eScindi
458a40a695Sgavinm#define	MAX(x, y) ((x) >= (y) ? (x) : (y))
468a40a695Sgavinm#define	MIN(x, y) ((x) <= (y) ? (x) : (y))
478a40a695Sgavinm
487aec1d6eScindi/*
498a40a695Sgavinm * SET_ADDR and SET_OFFSET are used to set a payload value in the fault that
508a40a695Sgavinm * we diagnose for page faults, to record the physical address of the faulting
518a40a695Sgavinm * page.  The "asru-" prefix is hooked in the "rewrite-ASRU" confcalls made on
528a40a695Sgavinm * diagnosis of associated faults when the libtopo mem scheme rewrites the
538a40a695Sgavinm * asru in "mem" scheme.
547aec1d6eScindi */
55*20c794b3Sgavinm#define	SET_ADDR (setpayloadprop("asru-physaddr", payloadprop("IA32_MCi_ADDR")))
567aec1d6eScindi
57b5016cbbSstephh#define	SET_OFFSET (setpayloadprop("asru-offset", \
58b5016cbbSstephh	payloadprop("resource[0].hc-specific.offset")))
597aec1d6eScindi
607aec1d6eScindi/*
618a40a695Sgavinm * RESOURCE_EXISTS is true if a member with name "resource" exists in the
627aec1d6eScindi * payload - regardless of type (e.g., nvlist or nvlist array) or value.
637aec1d6eScindi */
647aec1d6eScindi#define	RESOURCE_EXISTS	(payloadprop_defined("resource"))
657aec1d6eScindi
667aec1d6eScindi/*
678a40a695Sgavinm * CONTAINS_RANK is true if the "resource" nvlist array (as used in memory
687aec1d6eScindi * ereports) exists and one if its members matches the path for the
698a40a695Sgavinm * rank node.  Our memory propogation are of the form
708a40a695Sgavinm *
718a40a695Sgavinm * "prop foo@chip/memory-controller/dimm/rank -> blah@chip/cpu"
728a40a695Sgavinm *
737aec1d6eScindi * since cpus detect memory errors;  in eversholt such a propogation, where
747aec1d6eScindi * the lhs path and rhs path do not match, expands to the cross-product of
758a40a695Sgavinm * all dimms, ranks and cpus on the same chip (since chip appears in the
768a40a695Sgavinm * path on both sides).  We use CONTAINS_RANK to constrain the propogation
778a40a695Sgavinm * such that it only happens if the payload resource matches the rank.
787aec1d6eScindi */
798a40a695Sgavinm#define	CONTAINS_RANK (payloadprop_contains("resource", \
80b5016cbbSstephh	asru(chip/memory-controller/dimm/rank)) \
81b5016cbbSstephh	|| payloadprop_contains("resource", \
82b5016cbbSstephh	asru(chip/memory-controller/dimm)))
837aec1d6eScindi
847aec1d6eScindi/*
857aec1d6eScindi * The following will tell us whether a syndrome that is known to be
868a40a695Sgavinm * correctable (from a mem_ce ereport) is single-bit or multi-bit.  For a
877aec1d6eScindi * correctable ChipKill syndrome the number of bits set in the lowest
888a40a695Sgavinm * nibble indicates how many bits were in error.
897aec1d6eScindi */
907aec1d6eScindi
917aec1d6eScindi#define	CBITMASK(synd) ((synd) & 0xf)
927aec1d6eScindi
937aec1d6eScindi#define	CKSINGLE(synd)							\
947aec1d6eScindi	((synd) == 0 ||							\
957aec1d6eScindi	(CBITMASK(synd) == 0x1 || CBITMASK(synd) == 0x2 ||		\
967aec1d6eScindi	CBITMASK(synd) == 0x4 || CBITMASK(synd) == 0x8))
977aec1d6eScindi
987aec1d6eScindi#define	SINGLE_BIT_CE							\
997aec1d6eScindi	(payloadprop("syndrome-type") == "E" ||				\
1007aec1d6eScindi	(payloadprop("syndrome-type") == "C" &&				\
1017aec1d6eScindi	CKSINGLE(payloadprop("syndrome"))))
1027aec1d6eScindi
1037aec1d6eScindi#define	MULTI_BIT_CE							\
1047aec1d6eScindi	(payloadprop("syndrome-type") == "C" &&				\
1057aec1d6eScindi	!CKSINGLE(payloadprop("syndrome")))
1067aec1d6eScindi
1077aec1d6eScindi/*
1088a40a695Sgavinm * A single bit fault in a memory rank can cause:
1097aec1d6eScindi *
1108a40a695Sgavinm *  - mem_ce : reported by nb
1118a40a695Sgavinm *  - inf_sys_ecc1: reported by ic or dc; inf_sys_ecc1 errors detected at the
1128a40a695Sgavinm *    ic do not record a syndrome; these errors will not be triggered in
1138a40a695Sgavinm *    ChipKill ECC mode (the NB corrects all ECC errors in that mode)
1148a40a695Sgavinm *  - s_ecc1: reported by bu; this error will not be triggered in ChipKill
1158a40a695Sgavinm *    ECC mode (the NB corrects all ECC in that mode)
1167aec1d6eScindi *
1178a40a695Sgavinm * Single-bit errors are fed into a per-rank SERD engine; if a SERD engine
1187aec1d6eScindi * trips we diagnose a fault.memory.page so that the response agent can
1197aec1d6eScindi * retire the page that caused the trip.  If the total number of pages
1208a40a695Sgavinm * faulted in this way on a single rank exceeds a threshold we will
1218a40a695Sgavinm * diagnose a fault.memory.dimm_sb against the containing.
1227aec1d6eScindi *
1238a40a695Sgavinm * Multibit ChipKill-correctable errors are treated identically to
1248a40a695Sgavinm * single-bit errors, but via separate serd engines to allow distinct
1258a40a695Sgavinm * parameters if desired.
1267aec1d6eScindi *
1277aec1d6eScindi * Uncorrectable errors produce an immediate page fault and corresponding
1287aec1d6eScindi * fault.memory.dimm_ue.
1297aec1d6eScindi *
1307aec1d6eScindi * Page faults are essentially internal - action is only required when
1317aec1d6eScindi * they are accompanied by a dimm fault.  As such we include message=0
1328a40a695Sgavinm * on page faults.
1337aec1d6eScindi */
1347aec1d6eScindi
1358a40a695Sgavinmevent ereport.cpu.amd.ic.inf_sys_ecc1@chip/cpu{within(5s)};
1368a40a695Sgavinmevent ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu{within(5s)};
1378a40a695Sgavinmevent ereport.cpu.amd.bu.s_ecc1@chip/cpu{within(5s)};
1388a40a695Sgavinmevent ereport.cpu.amd.nb.mem_ce@chip/cpu{within(5s)};
1397aec1d6eScindi
1407aec1d6eScindi/*
1417aec1d6eScindi * If the address is not valid then no resource member will be included
1427aec1d6eScindi * in a nb.mem_ce or nb.mem_ue ereport.  These cases should be rare.
1438a40a695Sgavinm * We will also discard all inf_sys_ecc1 events detected at the ic since they
1448a40a695Sgavinm * have no syndrome and therefore no resource information.
1457aec1d6eScindi * We will discard such ereports.  An alternative may be to SERD them
1467aec1d6eScindi * on a per MC basis and trip if we see too many such events.
1477aec1d6eScindi */
1487aec1d6eScindi
1498a40a695Sgavinmevent upset.memory.discard1@chip/cpu;
1507aec1d6eScindi
1517aec1d6eScindi/*								#PAGE#
1528a40a695Sgavinm * Single-bit correctable errors are diagnosed as upsets and feed into per-rank
1538a40a695Sgavinm * SERD engines which diagnose fault.memory.page_sb if they trip.
1548a40a695Sgavinm *
1558a40a695Sgavinm * Multi-bit correctable (via ChipKill) errors are diagnosed as upsets and feed
1568a40a695Sgavinm * into additional per-rank SERD engines which diagnose fault.memory.page_ck
1578a40a695Sgavinm * if they trip.
1587aec1d6eScindi *
1598a40a695Sgavinm * The number of fault.memory.page and fault.memory.page_ck diagnosed is
1608a40a695Sgavinm * counted in stat engines for each type.  These are used in deciding
1618a40a695Sgavinm * whether to declare a dimm faulty after repeated page faults.
1627aec1d6eScindi */
1637aec1d6eScindi
1647aec1d6eScindi#define PAGE_FIT		1
1657aec1d6eScindi#define PAGE_SB_COUNT		2
1667aec1d6eScindi#define PAGE_SB_TIME		72h
1678a40a695Sgavinm#define	PAGE_CK_COUNT		2
1688a40a695Sgavinm#define	PAGE_CK_TIME		72h
1698a40a695Sgavinm
1708a40a695Sgavinm/*
1718a40a695Sgavinm * The fraction of pages on a single rank that must be diagnosed as faulty
1728a40a695Sgavinm * with single correctable unit faults before we will fault the rank.
1738a40a695Sgavinm * Once we have faulted the rank we will continue to diagnose any further page
1748a40a695Sgavinm * faults on the rank up to some maximum multiple of the threshold at which
1758a40a695Sgavinm * we faulted the dimm.  This allows us to potentially contain some fairly
1768a40a695Sgavinm * far-reaching but still limited-extent fault (such as a partial column
1778a40a695Sgavinm * failure) without getting carried away and allowing a single faulty rank to
1788a40a695Sgavinm * use up the entire system-imposed page retirenment limit (which, once
1798a40a695Sgavinm * reached, causes retirement request to have no effect other than to fill
1808a40a695Sgavinm * the fault manager cache and logs).
1818a40a695Sgavinm *
1828a40a695Sgavinm * This fraction is specified in basis points, where 100 basis points are
1838a40a695Sgavinm * equivalent to 1 percent.  It is applied on a per-rank basis.
1847aec1d6eScindi *
1858a40a695Sgavinm * The system imposes an absolute maximum on the number of pages it will
1868a40a695Sgavinm * retire;  the current value is 10 basis points, or 0.1% of 'physmem'.  Note
1878a40a695Sgavinm * that 'physmem' is reduced from installed memory pages by an amount
1888a40a695Sgavinm * reflecting permanent kernel memory allocations.  This system page retire
1898a40a695Sgavinm * limit bounds the maximum real response to page faults across all ranks
1908a40a695Sgavinm * that fault manager response agents can effect, but it should not be confused
1918a40a695Sgavinm * with any diagnosis threshold (i.e., the number of faulty pages we are
1928a40a695Sgavinm * prepared to tolerate from a single rank before faulting the rank is
1938a40a695Sgavinm * distinct from the total number of pages we are prepared to retire from use
1948a40a695Sgavinm * in response to that and other faults).  It is, however, desirable to
1958a40a695Sgavinm * arrange that the maximum number of pages we are prepared to fault from
1968a40a695Sgavinm * any one rank is less than the system-wide quota.
1977aec1d6eScindi */
1988a40a695Sgavinm#define	PAGE_RETIRE_LIMIT_BPS	5		/* or 0.05%; ~ 131 pages/GB %/
1997aec1d6eScindi
2008a40a695Sgavinm/*
2018a40a695Sgavinm * A macro to manipulate the above fraction.  Given a size in bytes convert
2028a40a695Sgavinm * this to pages (4K pagesize) and calculate the number of those pages
2038a40a695Sgavinm * indicated by PAGE_RETIRE_LIMIT_BPS basis points.
2048a40a695Sgavinm */
2058a40a695Sgavinm#define	_BPS_PGCNT(totalbytes) \
2068a40a695Sgavinm	((((totalbytes) / 4096 ) * PAGE_RETIRE_LIMIT_BPS) / 10000)
2078a40a695Sgavinm
2088a40a695Sgavinm/*
2098a40a695Sgavinm * The single-correctable-unit threshold at which number of faulted pages
2108a40a695Sgavinm * on a rank we we fault the rank.  We insist that this be at least 128 and
2118a40a695Sgavinm * never more than 512.
2128a40a695Sgavinm */
2138a40a695Sgavinm#define	RANK_THRESH MIN(512, MAX(128, \
2148a40a695Sgavinm	_BPS_PGCNT(confprop(asru(chip/memory-controller/dimm/rank), "size"))))
2158a40a695Sgavinm
2168a40a695Sgavinm/*
2178a40a695Sgavinm * The maximum number of single-correctable-unit page faults we will diagnose
2188a40a695Sgavinm * on a single rank (must be greater than RANK_THRESH).  We set
2198a40a695Sgavinm * this at twice the rank fault threshold.
2208a40a695Sgavinm */
2218a40a695Sgavinm#define	RANK_PGFLT_MAX (2 * RANK_THRESH)
2228a40a695Sgavinm
2238a40a695Sgavinmengine stat.sbpgflt@chip/memory-controller/dimm/rank;
2248a40a695Sgavinmengine stat.ckpgflt@chip/memory-controller/dimm/rank;
2258a40a695Sgavinm
2268a40a695Sgavinmevent fault.memory.page_sb@chip/memory-controller/dimm/rank,
2278a40a695Sgavinm    FITrate=PAGE_FIT, ASRU=dimm/rank, message=0,
2288a40a695Sgavinm    count=stat.sbpgflt@chip/memory-controller/dimm/rank,
2298a40a695Sgavinm    action=confcall("rewrite-ASRU"); /* rewrite ASRU to identify page in rank */
2308a40a695Sgavinm
2318a40a695Sgavinm#define	SB_PGFLTS (count(stat.sbpgflt@chip/memory-controller/dimm/rank))
2328a40a695Sgavinm
2338a40a695Sgavinmevent fault.memory.page_ck@chip/memory-controller/dimm/rank,
2348a40a695Sgavinm    FITrate=PAGE_FIT, ASRU=dimm/rank, message=0,
2358a40a695Sgavinm    count=stat.ckpgflt@chip/memory-controller/dimm/rank,
2368a40a695Sgavinm    action=confcall("rewrite-ASRU"); /* rewrite ASRU to identify page in rank */
2378a40a695Sgavinm
2388a40a695Sgavinm#define	CK_PGFLTS (count(stat.ckpgflt@chip/memory-controller/dimm/rank))
2398a40a695Sgavinm
2408a40a695Sgavinm#define	RANK_PGFLT_LIMIT_REACHED \
2418a40a695Sgavinm    (SB_PGFLTS + CK_PGFLTS > RANK_PGFLT_MAX)
2428a40a695Sgavinm
243b5016cbbSstephhevent ereport.memory.page_sb_trip@chip/memory-controller/dimm/rank{within(5s)};
2448a40a695Sgavinmengine serd.memory.page_sb@chip/memory-controller/dimm/rank,
2458a40a695Sgavinm    N=PAGE_SB_COUNT, T=PAGE_SB_TIME, method=persistent,
2468a40a695Sgavinm    trip=ereport.memory.page_sb_trip@chip/memory-controller/dimm/rank;
2478a40a695Sgavinmevent upset.memory.page_sb@chip/memory-controller/dimm/rank,
2488a40a695Sgavinm    engine=serd.memory.page_sb@chip/memory-controller/dimm/rank;
2498a40a695Sgavinm
250b5016cbbSstephhevent ereport.memory.page_ck_trip@chip/memory-controller/dimm/rank{within(5s)};
2518a40a695Sgavinmengine serd.memory.page_ck@chip/memory-controller/dimm/rank,
2528a40a695Sgavinm    N=PAGE_CK_COUNT, T=PAGE_CK_TIME, method=persistent,
2538a40a695Sgavinm    trip=ereport.memory.page_ck_trip@chip/memory-controller/dimm/rank;
2548a40a695Sgavinmevent upset.memory.page_ck@chip/memory-controller/dimm/rank,
2558a40a695Sgavinm    engine=serd.memory.page_ck@chip/memory-controller/dimm/rank;
2568a40a695Sgavinm
2578a40a695Sgavinmevent upset.memory.overpgfltlimit@chip/memory-controller/dimm/rank;
2588a40a695Sgavinm
2598a40a695Sgavinm/*
2608a40a695Sgavinm * If we have not reached the per-rank limit on faulted pages then
2618a40a695Sgavinm * continue to explain ereport observations as upsets which can lead
2628a40a695Sgavinm * lead to page fault diagnoses if the serd engine trips.
2638a40a695Sgavinm */
264b5016cbbSstephhprop upset.memory.page_sb@chip/memory-controller/dimm/rank
265b5016cbbSstephh    { CONTAINS_RANK && SINGLE_BIT_CE && !RANK_PGFLT_LIMIT_REACHED } (0)->
266b5016cbbSstephh    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu,
267b5016cbbSstephh    ereport.cpu.amd.bu.s_ecc1@chip/cpu,
268b5016cbbSstephh    ereport.cpu.amd.nb.mem_ce@chip/cpu;
269b5016cbbSstephh
270b5016cbbSstephhprop upset.memory.page_ck@chip/memory-controller/dimm/rank
271b5016cbbSstephh    { CONTAINS_RANK && MULTI_BIT_CE && !RANK_PGFLT_LIMIT_REACHED } (0)->
2728a40a695Sgavinm    /* no dc.inf_sys_ecc1 or bu.s_ecc1 in ChipKill mode */
273b5016cbbSstephh    ereport.cpu.amd.nb.mem_ce@chip/cpu;
2748a40a695Sgavinm
2758a40a695Sgavinm/*
2768a40a695Sgavinm * If we have reached the per-rank limit on faulted pages then diagnose
2778a40a695Sgavinm * further observations on the rank to a engine-less upset (i.e., discard
2788a40a695Sgavinm * them).
2798a40a695Sgavinm */
280b5016cbbSstephhprop upset.memory.overpgfltlimit@chip/memory-controller/dimm/rank
281b5016cbbSstephh    { CONTAINS_RANK && RANK_PGFLT_LIMIT_REACHED } (1)->
282b5016cbbSstephh    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu,
283b5016cbbSstephh    ereport.cpu.amd.bu.s_ecc1@chip/cpu,
284b5016cbbSstephh    ereport.cpu.amd.nb.mem_ce@chip/cpu;
2858a40a695Sgavinm
2868a40a695Sgavinmprop fault.memory.page_sb@chip/memory-controller/dimm/rank (1)->
2878a40a695Sgavinm    ereport.memory.page_sb_trip@chip/memory-controller/dimm/rank;
2888a40a695Sgavinm
2898a40a695Sgavinmprop fault.memory.page_ck@chip/memory-controller/dimm/rank (1)->
2908a40a695Sgavinm    ereport.memory.page_ck_trip@chip/memory-controller/dimm/rank;
2918a40a695Sgavinm
2928a40a695Sgavinmprop fault.memory.page_sb@chip/memory-controller/dimm/rank
293b5016cbbSstephh    { CONTAINS_RANK && SET_ADDR && SET_OFFSET } (0)->
294b5016cbbSstephh    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu,
295b5016cbbSstephh    ereport.cpu.amd.bu.s_ecc1@chip/cpu,
296b5016cbbSstephh    ereport.cpu.amd.nb.mem_ce@chip/cpu;
2978a40a695Sgavinm
2988a40a695Sgavinmprop fault.memory.page_ck@chip/memory-controller/dimm/rank
299b5016cbbSstephh    { CONTAINS_RANK && SET_ADDR && SET_OFFSET } (0)->
300b5016cbbSstephh    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu,
301b5016cbbSstephh    ereport.cpu.amd.bu.s_ecc1@chip/cpu,
302b5016cbbSstephh    ereport.cpu.amd.nb.mem_ce@chip/cpu;
3038a40a695Sgavinm
3044156fc34Sgavinm/*
3054156fc34Sgavinm * Discard memory ereports that do not indicate a resource.
3064156fc34Sgavinm */
307b5016cbbSstephhprop upset.memory.discard1@chip/cpu
308b5016cbbSstephh    { !RESOURCE_EXISTS } (1)->
309b5016cbbSstephh    ereport.cpu.amd.ic.inf_sys_ecc1@chip/cpu,
310b5016cbbSstephh    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu,
311b5016cbbSstephh    ereport.cpu.amd.bu.s_ecc1@chip/cpu,
312b5016cbbSstephh    ereport.cpu.amd.nb.mem_ce@chip/cpu;
3138a40a695Sgavinm
3148a40a695Sgavinm/*								#DIMM_SCU#
3158a40a695Sgavinm * "Single-correctable-unit" DIMM faults are diagnosed when the total number of
3168a40a695Sgavinm * page faults (diagnosed from repeated single-bit or multibit-chipkills)
3178a40a695Sgavinm * from any one rank on that DIMM reaches a threshold.  A "correctable unit"
3188a40a695Sgavinm * is a single bit in normal 64/8 ECC mode, or a single symbol in ChipKill
3198a40a695Sgavinm * 128/16 mode (i.e., nibble-aligned nibble for the code used on Opteron).
3208a40a695Sgavinm *
3218a40a695Sgavinm * We do not stop diagnosing further single-bit page faults once we have
3228a40a695Sgavinm * declared a single-bit DIMM fault - we continue diagnosing them and
3238a40a695Sgavinm * response agents can continue to retire those pages up to the system-imposed
3248a40a695Sgavinm * retirement limit.
3258a40a695Sgavinm *
3268a40a695Sgavinm * Two distinct fault types may be diagnosed - fault.memory.dimm_sb and
3278a40a695Sgavinm * fault.memory.dimm_ck.  Which one is diagnosed depends on whether we
3288a40a695Sgavinm * have reached the threshold for a majority of single-bit page faults or
3298a40a695Sgavinm * multibit page faults.
3308a40a695Sgavinm *
3318a40a695Sgavinm * Implementation: we maintain parallel SERD engines to the page_sb and
3328a40a695Sgavinm * page_ck engines, which trip in unison.  On trip it generates a distinct
3338a40a695Sgavinm * ereport which we diagnose to a fault if the threshold has been
3348a40a695Sgavinm * reached, or to a throwaway upset if not.
3358a40a695Sgavinm *
3367aec1d6eScindi */
3377aec1d6eScindi
3388a40a695Sgavinm#define DIMM_SB_FIT		2000
3397aec1d6eScindi#define DIMM_CK_FIT		4000
3407aec1d6eScindi
3418a40a695Sgavinmevent fault.memory.dimm_sb@chip/memory-controller/dimm/rank,
3428a40a695Sgavinm    FITrate=DIMM_SB_FIT, FRU=dimm, ASRU=dimm,
3438a40a695Sgavinm    action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */
3447aec1d6eScindi
3458a40a695Sgavinmevent fault.memory.dimm_ck@chip/memory-controller/dimm/rank,
3468a40a695Sgavinm    FITrate=DIMM_CK_FIT, FRU=dimm, ASRU=dimm,
3478a40a695Sgavinm    action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */
3487aec1d6eScindi
3498a40a695Sgavinmevent ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank
3508a40a695Sgavinm	{ within(5s) };
3518a40a695Sgavinmengine serd.memory.dimm_sb@chip/memory-controller/dimm/rank,
3528a40a695Sgavinm    N=PAGE_SB_COUNT, T=PAGE_SB_TIME, method=persistent,
3538a40a695Sgavinm    trip=ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank;
3548a40a695Sgavinmevent upset.memory.dimm_sb@chip/memory-controller/dimm/rank,
3558a40a695Sgavinm    engine=serd.memory.dimm_sb@chip/memory-controller/dimm/rank;
3567aec1d6eScindi
3578a40a695Sgavinmevent ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank
3588a40a695Sgavinm	{ within(5s) };
3598a40a695Sgavinmengine serd.memory.dimm_ck@chip/memory-controller/dimm/rank,
3608a40a695Sgavinm    N=PAGE_CK_COUNT, T=PAGE_CK_TIME, method=persistent,
3618a40a695Sgavinm    trip=ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank;
3628a40a695Sgavinmevent upset.memory.dimm_ck@chip/memory-controller/dimm/rank,
3638a40a695Sgavinm    engine=serd.memory.dimm_ck@chip/memory-controller/dimm/rank;
3647aec1d6eScindi
3658a40a695Sgavinmevent upset.memory.discard2@chip/memory-controller/dimm/rank;
3668a40a695Sgavinm
367b5016cbbSstephhprop upset.memory.dimm_sb@chip/memory-controller/dimm/rank
368b5016cbbSstephh    { CONTAINS_RANK && SINGLE_BIT_CE } (0)->
369b5016cbbSstephh    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu,
370b5016cbbSstephh    ereport.cpu.amd.bu.s_ecc1@chip/cpu,
371b5016cbbSstephh    ereport.cpu.amd.nb.mem_ce@chip/cpu;
3728a40a695Sgavinm
373b5016cbbSstephhprop upset.memory.dimm_ck@chip/memory-controller/dimm/rank
374b5016cbbSstephh    { CONTAINS_RANK && MULTI_BIT_CE } (0)->
375b5016cbbSstephh    ereport.cpu.amd.nb.mem_ce@chip/cpu;
3768a40a695Sgavinm
3778a40a695Sgavinm/*
3788a40a695Sgavinm * The following two propogations diagnose a fault.memory.dimm_sb when
3798a40a695Sgavinm * either the dimm_sb or dimm_ck engine trips (for a new page fault)
3808a40a695Sgavinm * and the total number of page faults (sb and ck) exceeds the threshold
3818a40a695Sgavinm * value with the majority being from sb page faults.
3828a40a695Sgavinm */
3838a40a695Sgavinmprop fault.memory.dimm_sb@chip/memory-controller/dimm/rank (0)->
3848a40a695Sgavinm    ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank
3858a40a695Sgavinm    { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && SB_PGFLTS > RANK_THRESH / 2 };
3868a40a695Sgavinm
3878a40a695Sgavinmprop fault.memory.dimm_sb@chip/memory-controller/dimm/rank (0)->
3888a40a695Sgavinm    ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank
3898a40a695Sgavinm    { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && SB_PGFLTS > RANK_THRESH / 2 };
3908a40a695Sgavinm
3918a40a695Sgavinm/*
3928a40a695Sgavinm * The following two propogation diagnose a fault.memory.dimm_ck when
3938a40a695Sgavinm * either the dimm_sb or dimm_ck engine trip (for a new page fault)
3948a40a695Sgavinm * and the total number of page faults (sb and ck) exceeds the threshold
3958a40a695Sgavinm * value with the majority  being from ck page faults.
3968a40a695Sgavinm */
3978a40a695Sgavinmprop fault.memory.dimm_ck@chip/memory-controller/dimm/rank (0)->
3988a40a695Sgavinm    ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank
3998a40a695Sgavinm    { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && CK_PGFLTS > RANK_THRESH / 2 };
4008a40a695Sgavinm
4018a40a695Sgavinmprop fault.memory.dimm_ck@chip/memory-controller/dimm/rank (0)->
4028a40a695Sgavinm    ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank
4038a40a695Sgavinm    { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && CK_PGFLTS > RANK_THRESH / 2 };
4048a40a695Sgavinm
4058a40a695Sgavinmprop upset.memory.discard2@chip/memory-controller/dimm/rank (1)->
4068a40a695Sgavinm    ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank,
4078a40a695Sgavinm    ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank;
4087aec1d6eScindi
4097aec1d6eScindi/* 								#DIMM_UE#
4108a40a695Sgavinm *								#PAGE_UE#
4118a40a695Sgavinm * An uncorrectable multi-bit fault in a memory dimm can cause:
4127aec1d6eScindi *
4138a40a695Sgavinm *  - mem_ue    	   : reported by nb for an access from a remote cpu
4148a40a695Sgavinm *  - inf_sys_eccm : reported by ic or dc; the ic does not report a syndrome
4158a40a695Sgavinm *  - s_eccm	   : reported by bu
4167aec1d6eScindi *
4177aec1d6eScindi * Note we use a SERD engine here simply as a way of ensuring that we get
4188a40a695Sgavinm * both dimm and page faults reported.
4198a40a695Sgavinm *
4208a40a695Sgavinm * Since on production systems we force HT Sync Flood on uncorrectable
4218a40a695Sgavinm * memory errors (if not already set as such by the BIOS, as it should be)
4228a40a695Sgavinm * we won't actually receive these ereports since the system will be reset.
4237aec1d6eScindi */
4247aec1d6eScindi
4257aec1d6eScindi#define DIMM_UE_FIT		6000
4267aec1d6eScindi
4278a40a695Sgavinmevent ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu{within(5s)};
4288a40a695Sgavinmevent ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu{within(5s)};
4298a40a695Sgavinmevent ereport.cpu.amd.bu.s_eccm@chip/cpu{within(5s)};
4308a40a695Sgavinmevent ereport.cpu.amd.nb.mem_ue@chip/cpu{within(5s)};
4318a40a695Sgavinm
4328a40a695Sgavinmevent fault.memory.dimm_ue@chip/memory-controller/dimm/rank,
4338a40a695Sgavinm    FITrate=DIMM_UE_FIT, FRU=dimm, ASRU=dimm,
4348a40a695Sgavinm    action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */
4358a40a695Sgavinm
4368a40a695Sgavinmevent fault.memory.page_ue@chip/memory-controller/dimm/rank,
4378a40a695Sgavinm    FITrate=PAGE_FIT, ASRU=dimm/rank, message=0,
4388a40a695Sgavinm    action=confcall("rewrite-ASRU"); /* rewrite ASRU to identify page in rank */
4398a40a695Sgavinm
440b5016cbbSstephhevent ereport.memory.dimm_ue_trip@chip/memory-controller/dimm/rank{within(5s)};
4418a40a695Sgavinmengine serd.memory.dimm_ue@chip/memory-controller/dimm/rank,
4428a40a695Sgavinm    N=0, T=1h, method=persistent,
4438a40a695Sgavinm    trip=ereport.memory.dimm_ue_trip@chip/memory-controller/dimm/rank;
4448a40a695Sgavinmevent upset.memory.dimm_ue@chip/memory-controller/dimm/rank,
4458a40a695Sgavinm    engine=serd.memory.dimm_ue@chip/memory-controller/dimm/rank;
4468a40a695Sgavinm
447b5016cbbSstephhevent ereport.memory.page_ue_trip@chip/memory-controller/dimm/rank{within(5s)};
4488a40a695Sgavinmengine serd.memory.page_ue@chip/memory-controller/dimm/rank,
4498a40a695Sgavinm    N=0, T=1h, method=persistent,
4508a40a695Sgavinm    trip=ereport.memory.page_ue_trip@chip/memory-controller/dimm/rank;
4518a40a695Sgavinmevent upset.memory.page_ue@chip/memory-controller/dimm/rank,
4528a40a695Sgavinm    engine=serd.memory.page_ue@chip/memory-controller/dimm/rank;
4538a40a695Sgavinm
4548a40a695Sgavinmevent upset.memory.discard3@chip/cpu;
4558a40a695Sgavinm
456b5016cbbSstephhprop upset.memory.page_ue@chip/memory-controller/dimm/rank
457b5016cbbSstephh    { CONTAINS_RANK } (0)->
458b5016cbbSstephh    ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu,
459b5016cbbSstephh    ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu,
460b5016cbbSstephh    ereport.cpu.amd.bu.s_eccm@chip/cpu,
461b5016cbbSstephh    ereport.cpu.amd.nb.mem_ue@chip/cpu;
4628a40a695Sgavinm
463b5016cbbSstephhprop upset.memory.dimm_ue@chip/memory-controller/dimm/rank
464b5016cbbSstephh    { CONTAINS_RANK } (0)->
465b5016cbbSstephh    ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu,
466b5016cbbSstephh    ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu,
467b5016cbbSstephh    ereport.cpu.amd.bu.s_eccm@chip/cpu,
468b5016cbbSstephh    ereport.cpu.amd.nb.mem_ue@chip/cpu;
4698a40a695Sgavinm
4708a40a695Sgavinmprop fault.memory.page_ue@chip/memory-controller/dimm/rank (1)->
4718a40a695Sgavinm    ereport.memory.page_ue_trip@chip/memory-controller/dimm/rank;
4728a40a695Sgavinm
4738a40a695Sgavinmprop fault.memory.page_ue@chip/memory-controller/dimm/rank
474b5016cbbSstephh    { CONTAINS_RANK && SET_ADDR && SET_OFFSET } (0)->
475b5016cbbSstephh    ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu,
476b5016cbbSstephh    ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu,
477b5016cbbSstephh    ereport.cpu.amd.bu.s_eccm@chip/cpu,
478b5016cbbSstephh    ereport.cpu.amd.nb.mem_ue@chip/cpu;
4798a40a695Sgavinm
4808a40a695Sgavinmprop fault.memory.dimm_ue@chip/memory-controller/dimm/rank (1)->
4818a40a695Sgavinm    ereport.memory.dimm_ue_trip@chip/memory-controller/dimm/rank;
4828a40a695Sgavinm
483b5016cbbSstephhprop upset.memory.discard3@chip/cpu
484b5016cbbSstephh    { !RESOURCE_EXISTS } (1)->
485b5016cbbSstephh    ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu,
486b5016cbbSstephh    ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu,
487b5016cbbSstephh    ereport.cpu.amd.bu.s_eccm@chip/cpu,
488e5ba14ffSstephh    ereport.cpu.amd.nb.mem_ue@chip/cpu;
4898a40a695Sgavinm
4908a40a695Sgavinm/*								#CSTESTFAIL#
4918a40a695Sgavinm * If the BIOS fails a chip-select during POST, or perhaps after a
4928a40a695Sgavinm * sync flood from an uncorrectable error, then on revision F and G it
4938a40a695Sgavinm * should mark that chip-select as TestFail in the CS Base register.
4948a40a695Sgavinm * When the memory-controller driver discovers all the MC configuration
4958a40a695Sgavinm * it notes such failed chip-selects and creates topology nodes for the
4968a40a695Sgavinm * chip-select and associated dimms and ranks, and produces an ereport for each
4978a40a695Sgavinm * failed chip-select with detector set to the memory-controller node
4988a40a695Sgavinm * and resource indicating the failed chip-select.
4998a40a695Sgavinm */
5008a40a695Sgavinm
501b5016cbbSstephhevent ereport.cpu.amd.mc.cs_testfail@chip/memory-controller{within(5s)};
5027aec1d6eScindi
5038a40a695Sgavinmevent fault.memory.dimm_testfail@chip/memory-controller/dimm/rank,
5048a40a695Sgavinm    FITrate=1000, ASRU=dimm, FRU=dimm,
5058a40a695Sgavinm    action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */
5067aec1d6eScindi
5078a40a695Sgavinmevent error.memory.cs_testfail@chip/memory-controller/chip-select;
5087aec1d6eScindi
5098a40a695Sgavinm#define	CONTAINS_CS (payloadprop_contains("resource", \
5108a40a695Sgavinm	asru(chip/memory-controller/chip-select)))
5117aec1d6eScindi
512b5016cbbSstephhprop error.memory.cs_testfail@chip/memory-controller/chip-select (1)->
5138a40a695Sgavinm    ereport.cpu.amd.mc.cs_testfail@chip/memory-controller
5148a40a695Sgavinm    { CONTAINS_CS };
5157aec1d6eScindi
5168a40a695Sgavinm#define CSMATCH(s) \
5178a40a695Sgavinm	(confprop_defined(asru(chip/memory-controller/chip-select), s) && \
5188a40a695Sgavinm	confprop(asru(chip/memory-controller/chip-select), s) == \
5198a40a695Sgavinm	confprop(asru(chip/memory-controller/dimm/rank), "csname"))
5207aec1d6eScindi
521b5016cbbSstephhprop fault.memory.dimm_testfail@chip/memory-controller/dimm/rank (1)->
5228a40a695Sgavinm    error.memory.cs_testfail@chip/memory-controller/chip-select
5238a40a695Sgavinm    { CSMATCH("dimm1-csname") || CSMATCH("dimm2-csname")};
5247aec1d6eScindi
5258a40a695Sgavinm/*								#ADDRPAR#
5268a40a695Sgavinm * DRAM Command/Address Parity Errors.
5278a40a695Sgavinm *
5288a40a695Sgavinm *  - dramaddr_par : reported by the nb; the NB status register includes
5298a40a695Sgavinm *    a bit indicating which dram controller channel (A or B) experienced
5308a40a695Sgavinm *    the error.
5318a40a695Sgavinm */
5328a40a695Sgavinm
533b5016cbbSstephhevent ereport.cpu.amd.nb.dramaddr_par@chip/cpu{within(5s)};
5348a40a695Sgavinm
5358a40a695Sgavinmevent fault.cpu.amd.dramchannel@chip/memory-controller/dram-channel,
5368a40a695Sgavinm    FITrate=1000, ASRU=dram-channel;
5378a40a695Sgavinm
538*20c794b3Sgavinm#define GET_CHANNEL ($chan = (payloadprop("IA32_MCi_STATUS") >> 32 & 0x200) ? \
5398a40a695Sgavinm    1 : 0)
5408a40a695Sgavinm
5418a40a695Sgavinmprop fault.cpu.amd.dramchannel@chip/memory-controller/dram-channel[y] (0)->
5428a40a695Sgavinm    ereport.cpu.amd.nb.dramaddr_par@chip/cpu { GET_CHANNEL && $chan == y };
5437aec1d6eScindi
5448a40a695Sgavinm/*
5457aec1d6eScindi * l2 cache data errors.
5467aec1d6eScindi */
5477aec1d6eScindi
5487aec1d6eScindi#define L2CACHEDATA_FIT		1000
5497aec1d6eScindi#define L2CACHEDATA_SB_COUNT	3
5507aec1d6eScindi#define L2CACHEDATA_SB_TIME	12h
5517aec1d6eScindi
5527aec1d6eScindievent fault.cpu.amd.l2cachedata@chip/cpu, FITrate=L2CACHEDATA_FIT,
5537aec1d6eScindi	FRU=chip, ASRU=chip/cpu;
5547aec1d6eScindievent error.cpu.amd.l2cachedata_sb@chip/cpu;
5557aec1d6eScindievent error.cpu.amd.l2cachedata_mb@chip/cpu;
5567aec1d6eScindi
5577aec1d6eScindiprop fault.cpu.amd.l2cachedata@chip/cpu (1)->
5587aec1d6eScindi    error.cpu.amd.l2cachedata_sb@chip/cpu,
5597aec1d6eScindi    error.cpu.amd.l2cachedata_mb@chip/cpu;
5607aec1d6eScindi
5617aec1d6eScindi/* 								#L2D_SINGLE#
5627aec1d6eScindi * A single bit data array fault in an l2 cache can cause:
5637aec1d6eScindi *
5647aec1d6eScindi *  - inf_l2_ecc1 : reported by ic on this cpu
5657aec1d6eScindi *  - inf_l2_ecc1 : reported by dc on this cpu
5667aec1d6eScindi *  - l2d_ecc1 : reported by bu on copyback or on snoop from another cpu
5677aec1d6eScindi *
5687aec1d6eScindi * Single-bit errors are diagnosed to cache upsets.  SERD engines are used
5697aec1d6eScindi * to count upsets resulting from CEs.
5707aec1d6eScindi */
5717aec1d6eScindi
5727aec1d6eScindievent ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu{within(5s)};
5737aec1d6eScindievent ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu{within(5s)};
5747aec1d6eScindievent ereport.cpu.amd.bu.l2d_ecc1@chip/cpu{within(5s)};
575b5016cbbSstephhevent ereport.cpu.amd.l2d_sb_trip@chip/cpu{within(5s)};
5767aec1d6eScindi
5777aec1d6eScindiengine serd.cpu.amd.l2d_sb@chip/cpu,
5787aec1d6eScindi    N=L2CACHEDATA_SB_COUNT, T=L2CACHEDATA_SB_TIME, method=persistent,
5797aec1d6eScindi    trip=ereport.cpu.amd.l2d_sb_trip@chip/cpu;
5807aec1d6eScindi
5817aec1d6eScindievent upset.cpu.amd.l2d_sb@chip/cpu,
5827aec1d6eScindi	engine=serd.cpu.amd.l2d_sb@chip/cpu;
5837aec1d6eScindi
5847aec1d6eScindiprop upset.cpu.amd.l2d_sb@chip/cpu (1)->
5857aec1d6eScindi    ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu,
5867aec1d6eScindi    ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu,
5877aec1d6eScindi    ereport.cpu.amd.bu.l2d_ecc1@chip/cpu;
5887aec1d6eScindi
5897aec1d6eScindiprop error.cpu.amd.l2cachedata_sb@chip/cpu (1)->
5907aec1d6eScindi    ereport.cpu.amd.l2d_sb_trip@chip/cpu;
5917aec1d6eScindi
5927aec1d6eScindiprop fault.cpu.amd.l2cachedata@chip/cpu (0)->
5937aec1d6eScindi    ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu,
5947aec1d6eScindi    ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu,
5957aec1d6eScindi    ereport.cpu.amd.bu.l2d_ecc1@chip/cpu;
5967aec1d6eScindi
5977aec1d6eScindi/* 								#L2D_MULTI#
5987aec1d6eScindi * A multi-bit data array fault in an l2 cache can cause:
5997aec1d6eScindi *
6007aec1d6eScindi *  - inf_l2_eccm : reported by ic on this cpu
6017aec1d6eScindi *  - inf_l2_eccm : reported by dc on this cpu
6027aec1d6eScindi *  - l2d_eccm : reported by bu on copyback or on snoop from another cpu
6037aec1d6eScindi */
6047aec1d6eScindi
605b5016cbbSstephhevent ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu{within(5s)};
606b5016cbbSstephhevent ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu{within(5s)};
607b5016cbbSstephhevent ereport.cpu.amd.bu.l2d_eccm@chip/cpu{within(5s)};
6087aec1d6eScindi
6097aec1d6eScindiprop error.cpu.amd.l2cachedata_mb@chip/cpu (1)->
6107aec1d6eScindi    ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu,
6117aec1d6eScindi    ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu,
6127aec1d6eScindi    ereport.cpu.amd.bu.l2d_eccm@chip/cpu;
6137aec1d6eScindi
6147aec1d6eScindiprop fault.cpu.amd.l2cachedata@chip/cpu (0)->
6157aec1d6eScindi    ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu,
6167aec1d6eScindi    ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu,
6177aec1d6eScindi    ereport.cpu.amd.bu.l2d_eccm@chip/cpu;
6187aec1d6eScindi
6198a40a695Sgavinm/*
6207aec1d6eScindi * l2 cache main tag errors
6217aec1d6eScindi */
6227aec1d6eScindi
6237aec1d6eScindi#define L2CACHETAG_FIT		1000
6247aec1d6eScindi#define L2CACHETAG_SB_COUNT	3
6257aec1d6eScindi#define L2CACHETAG_SB_TIME	12h
6267aec1d6eScindi
6277aec1d6eScindievent fault.cpu.amd.l2cachetag@chip/cpu, FITrate=L2CACHETAG_FIT,
6287aec1d6eScindi	FRU=chip, ASRU=chip/cpu;
6297aec1d6eScindievent error.cpu.amd.l2cachetag_sb@chip/cpu;
6307aec1d6eScindievent error.cpu.amd.l2cachetag_mb@chip/cpu;
6317aec1d6eScindi
6327aec1d6eScindiprop fault.cpu.amd.l2cachetag@chip/cpu (1)->
6337aec1d6eScindi    error.cpu.amd.l2cachetag_sb@chip/cpu,
6347aec1d6eScindi    error.cpu.amd.l2cachetag_mb@chip/cpu;
6357aec1d6eScindi
6367aec1d6eScindi/* 								#L2T_SINGLE#
6377aec1d6eScindi * A single bit tag array fault in an l2 cache can cause:
6387aec1d6eScindi *
6397aec1d6eScindi *  - l2t_ecc1 : reported by bu on this cpu when detected during snoop
6407aec1d6eScindi *  - l2t_par : reported by bu on this cpu when detected other than during snoop
6417aec1d6eScindi *
6427aec1d6eScindi * Note that the bu.l2t_par ereport could be due to a single bit or multi bit
6437aec1d6eScindi * event. If the l2t_sb_trip has already triggered it will be treated as another
6447aec1d6eScindi * ce, otherwise it will be treated as a ue event.
6457aec1d6eScindi */
6467aec1d6eScindi
6477aec1d6eScindievent ereport.cpu.amd.bu.l2t_ecc1@chip/cpu{within(5s)};
648b5016cbbSstephhevent ereport.cpu.amd.bu.l2t_par@chip/cpu{within(5s)};
649b5016cbbSstephhevent ereport.cpu.amd.l2t_sb_trip@chip/cpu{within(5s)};
6507aec1d6eScindi
6517aec1d6eScindiengine serd.cpu.amd.l2t_sb@chip/cpu,
6527aec1d6eScindi    N=L2CACHETAG_SB_COUNT, T=L2CACHETAG_SB_TIME, method=persistent,
6537aec1d6eScindi    trip=ereport.cpu.amd.l2t_sb_trip@chip/cpu;
6547aec1d6eScindi
6557aec1d6eScindievent upset.cpu.amd.l2t_sb@chip/cpu,
6567aec1d6eScindi	engine=serd.cpu.amd.l2t_sb@chip/cpu;
6577aec1d6eScindi
6587aec1d6eScindiprop upset.cpu.amd.l2t_sb@chip/cpu (1)->
6597aec1d6eScindi    ereport.cpu.amd.bu.l2t_ecc1@chip/cpu,
6607aec1d6eScindi    ereport.cpu.amd.bu.l2t_par@chip/cpu;
6617aec1d6eScindi
6627aec1d6eScindiprop error.cpu.amd.l2cachetag_sb@chip/cpu (1)->
6637aec1d6eScindi    ereport.cpu.amd.l2t_sb_trip@chip/cpu;
6647aec1d6eScindi
6657aec1d6eScindiprop fault.cpu.amd.l2cachetag@chip/cpu (0)->
6667aec1d6eScindi    ereport.cpu.amd.bu.l2t_ecc1@chip/cpu,
6677aec1d6eScindi    ereport.cpu.amd.bu.l2t_par@chip/cpu;
6687aec1d6eScindi
6697aec1d6eScindi/* 								#L2T_MULTI#
6707aec1d6eScindi * A multi-bit tag array fault in an l2 cache can cause:
6717aec1d6eScindi *
6727aec1d6eScindi *  - l2t_eccm : reported by bu on this cpu when detected during snoop
6737aec1d6eScindi *  - l2t_par : reported by bu on this cpu when detected other than during snoop
6747aec1d6eScindi */
6757aec1d6eScindi
676b5016cbbSstephhevent ereport.cpu.amd.bu.l2t_eccm@chip/cpu{within(5s)};
6777aec1d6eScindi
6787aec1d6eScindiprop error.cpu.amd.l2cachetag_mb@chip/cpu (1)->
6797aec1d6eScindi    ereport.cpu.amd.bu.l2t_eccm@chip/cpu,
6807aec1d6eScindi    ereport.cpu.amd.bu.l2t_par@chip/cpu;
6817aec1d6eScindi
6827aec1d6eScindiprop fault.cpu.amd.l2cachetag@chip/cpu (0)->
6837aec1d6eScindi    ereport.cpu.amd.bu.l2t_eccm@chip/cpu,
6847aec1d6eScindi    ereport.cpu.amd.bu.l2t_par@chip/cpu;
6857aec1d6eScindi
6867aec1d6eScindi/* 								#ICD_PAR#
6877aec1d6eScindi * A data array parity fault in an I cache can cause:
6887aec1d6eScindi *
6897aec1d6eScindi *  - data_par : reported by ic on this cpu
6907aec1d6eScindi */
6917aec1d6eScindi
6927aec1d6eScindi#define ICACHEDATA_FIT		1000
6937aec1d6eScindi#define ICACHEDATA_SB_COUNT	2
6947aec1d6eScindi#define ICACHEDATA_SB_TIME	168h
6957aec1d6eScindi
6967aec1d6eScindievent ereport.cpu.amd.ic.data_par@chip/cpu{within(5s)};
697b5016cbbSstephhevent ereport.cpu.amd.ic_dp_trip@chip/cpu{within(5s)};
6987aec1d6eScindi
6997aec1d6eScindievent fault.cpu.amd.icachedata@chip/cpu, FITrate=ICACHEDATA_FIT,
7007aec1d6eScindi	FRU=chip, ASRU=chip/cpu;
7017aec1d6eScindi
7027aec1d6eScindiengine serd.cpu.amd.icachedata@chip/cpu,
7037aec1d6eScindi    N=ICACHEDATA_SB_COUNT, T=ICACHEDATA_SB_TIME, method=persistent,
7047aec1d6eScindi    trip=ereport.cpu.amd.ic_dp_trip@chip/cpu;
7057aec1d6eScindi
7067aec1d6eScindievent upset.cpu.amd.icachedata@chip/cpu,
7077aec1d6eScindi	engine=serd.cpu.amd.icachedata@chip/cpu;
7087aec1d6eScindi
7097aec1d6eScindiprop upset.cpu.amd.icachedata@chip/cpu (1)->
7107aec1d6eScindi    ereport.cpu.amd.ic.data_par@chip/cpu;
7117aec1d6eScindi
7127aec1d6eScindiprop fault.cpu.amd.icachedata@chip/cpu (1)->
7137aec1d6eScindi    ereport.cpu.amd.ic_dp_trip@chip/cpu;
7147aec1d6eScindi
7157aec1d6eScindiprop fault.cpu.amd.icachedata@chip/cpu (0)->
7167aec1d6eScindi    ereport.cpu.amd.ic.data_par@chip/cpu;
7177aec1d6eScindi
7187aec1d6eScindi/* 								#ICT_PAR#
7197aec1d6eScindi * A tag array parity fault in an I cache can cause:
7207aec1d6eScindi *
7217aec1d6eScindi *  - tag_par : reported by ic on this cpu
7227aec1d6eScindi */
7237aec1d6eScindi
7247aec1d6eScindi#define ICACHETAG_FIT		1000
7257aec1d6eScindi#define ICACHETAG_SB_COUNT	2
7267aec1d6eScindi#define ICACHETAG_SB_TIME	168h
7277aec1d6eScindi
7287aec1d6eScindievent ereport.cpu.amd.ic.tag_par@chip/cpu{within(5s)};
729b5016cbbSstephhevent ereport.cpu.amd.ic_tp_trip@chip/cpu{within(5s)};
7307aec1d6eScindi
7317aec1d6eScindievent fault.cpu.amd.icachetag@chip/cpu, FITrate=ICACHETAG_FIT,
7327aec1d6eScindi	FRU=chip, ASRU=chip/cpu;
7337aec1d6eScindi
7347aec1d6eScindiengine serd.cpu.amd.icachetag@chip/cpu,
7357aec1d6eScindi    N=ICACHETAG_SB_COUNT, T=ICACHETAG_SB_TIME, method=persistent,
7367aec1d6eScindi    trip=ereport.cpu.amd.ic_tp_trip@chip/cpu;
7377aec1d6eScindi
7387aec1d6eScindievent upset.cpu.amd.icachetag@chip/cpu,
7397aec1d6eScindi	engine=serd.cpu.amd.icachetag@chip/cpu;
7407aec1d6eScindi
7417aec1d6eScindiprop upset.cpu.amd.icachetag@chip/cpu (1)->
7427aec1d6eScindi    ereport.cpu.amd.ic.tag_par@chip/cpu;
7437aec1d6eScindi
7447aec1d6eScindiprop fault.cpu.amd.icachetag@chip/cpu (1)->
7457aec1d6eScindi    ereport.cpu.amd.ic_tp_trip@chip/cpu;
7467aec1d6eScindi
7477aec1d6eScindiprop fault.cpu.amd.icachetag@chip/cpu (0)->
7487aec1d6eScindi    ereport.cpu.amd.ic.tag_par@chip/cpu;
7497aec1d6eScindi
7507aec1d6eScindi/* 								#ICT_SNOOP#
7517aec1d6eScindi * A snoop tag array parity fault in an I cache can cause:
7527aec1d6eScindi *
7537aec1d6eScindi *  - stag_par : reported by ic on this cpu
7547aec1d6eScindi */
7557aec1d6eScindi
7567aec1d6eScindi#define ICACHESTAG_FIT		1000
7577aec1d6eScindi
7587aec1d6eScindievent ereport.cpu.amd.ic.stag_par@chip/cpu{within(5s)};
7597aec1d6eScindi
7607aec1d6eScindievent fault.cpu.amd.icachestag@chip/cpu, FITrate=ICACHESTAG_FIT,
7617aec1d6eScindi	FRU=chip, ASRU=chip/cpu;
7627aec1d6eScindi
7637aec1d6eScindiprop fault.cpu.amd.icachestag@chip/cpu (1)->
7647aec1d6eScindi    ereport.cpu.amd.ic.stag_par@chip/cpu;
7657aec1d6eScindi
7667aec1d6eScindi/* 								#ICTLB_1#
7677aec1d6eScindi * An l1tlb parity fault in an I cache can cause:
7687aec1d6eScindi *
7697aec1d6eScindi *  - l1tlb_par : reported by ic on this cpu
7707aec1d6eScindi */
7717aec1d6eScindi
7727aec1d6eScindi#define ICACHEL1TLB_FIT		1000
7737aec1d6eScindi#define ICACHEL1TLB_SB_COUNT	2
7747aec1d6eScindi#define ICACHEL1TLB_SB_TIME	168h
7757aec1d6eScindi
7767aec1d6eScindievent ereport.cpu.amd.ic.l1tlb_par@chip/cpu{within(5s)};
777b5016cbbSstephhevent ereport.cpu.amd.ic_l1tlb_trip@chip/cpu{within(5s)};
7787aec1d6eScindi
7797aec1d6eScindievent fault.cpu.amd.l1itlb@chip/cpu, FITrate=ICACHEL1TLB_FIT,
7807aec1d6eScindi	FRU=chip, ASRU=chip/cpu;
7817aec1d6eScindi
7827aec1d6eScindiengine serd.cpu.amd.l1itlb@chip/cpu,
7837aec1d6eScindi    N=ICACHEL1TLB_SB_COUNT, T=ICACHEL1TLB_SB_TIME, method=persistent,
7847aec1d6eScindi    trip=ereport.cpu.amd.ic_l1tlb_trip@chip/cpu;
7857aec1d6eScindi
7867aec1d6eScindievent upset.cpu.amd.l1itlb@chip/cpu,
7877aec1d6eScindi	engine=serd.cpu.amd.l1itlb@chip/cpu;
7887aec1d6eScindi
7897aec1d6eScindiprop upset.cpu.amd.l1itlb@chip/cpu (1)->
7907aec1d6eScindi    ereport.cpu.amd.ic.l1tlb_par@chip/cpu;
7917aec1d6eScindi
7927aec1d6eScindiprop fault.cpu.amd.l1itlb@chip/cpu (1)->
7937aec1d6eScindi    ereport.cpu.amd.ic_l1tlb_trip@chip/cpu;
7947aec1d6eScindi
7957aec1d6eScindiprop fault.cpu.amd.l1itlb@chip/cpu (0)->
7967aec1d6eScindi    ereport.cpu.amd.ic.l1tlb_par@chip/cpu;
7977aec1d6eScindi
7987aec1d6eScindi/* 								#ICTLB_2#
7997aec1d6eScindi * An l2tlb parity fault in an I cache can cause:
8007aec1d6eScindi *
8017aec1d6eScindi *  - l2tlb_par : reported by ic on this cpu
8027aec1d6eScindi */
8037aec1d6eScindi
8047aec1d6eScindi#define ICACHEL2TLB_FIT		1000
8057aec1d6eScindi#define ICACHEL2TLB_SB_COUNT	2
8067aec1d6eScindi#define ICACHEL2TLB_SB_TIME	168h
8077aec1d6eScindi
8087aec1d6eScindievent ereport.cpu.amd.ic.l2tlb_par@chip/cpu{within(5s)};
809b5016cbbSstephhevent ereport.cpu.amd.ic_l2tlb_trip@chip/cpu{within(5s)};
8107aec1d6eScindi
8117aec1d6eScindievent fault.cpu.amd.l2itlb@chip/cpu, FITrate=ICACHEL2TLB_FIT,
8127aec1d6eScindi	FRU=chip, ASRU=chip/cpu;
8137aec1d6eScindi
8147aec1d6eScindiengine serd.cpu.amd.l2itlb@chip/cpu,
8157aec1d6eScindi    N=ICACHEL2TLB_SB_COUNT, T=ICACHEL2TLB_SB_TIME, method=persistent,
8167aec1d6eScindi    trip=ereport.cpu.amd.ic_l2tlb_trip@chip/cpu;
8177aec1d6eScindi
8187aec1d6eScindievent upset.cpu.amd.l2itlb@chip/cpu,
8197aec1d6eScindi	engine=serd.cpu.amd.l2itlb@chip/cpu;
8207aec1d6eScindi
8217aec1d6eScindiprop upset.cpu.amd.l2itlb@chip/cpu (1)->
8227aec1d6eScindi    ereport.cpu.amd.ic.l2tlb_par@chip/cpu;
8237aec1d6eScindi
8247aec1d6eScindiprop fault.cpu.amd.l2itlb@chip/cpu (1)->
8257aec1d6eScindi    ereport.cpu.amd.ic_l2tlb_trip@chip/cpu;
8267aec1d6eScindi
8277aec1d6eScindiprop fault.cpu.amd.l2itlb@chip/cpu (0)->
8287aec1d6eScindi    ereport.cpu.amd.ic.l2tlb_par@chip/cpu;
8297aec1d6eScindi
8308a40a695Sgavinm/*
8317aec1d6eScindi * dcache data errors
8327aec1d6eScindi */
8337aec1d6eScindi
8347aec1d6eScindi#define DCACHEDATA_FIT		1000
8357aec1d6eScindi#define DCACHEDATA_SB_COUNT	2
8367aec1d6eScindi#define DCACHEDATA_SB_TIME	168h
8377aec1d6eScindi
8387aec1d6eScindievent fault.cpu.amd.dcachedata@chip/cpu, FITrate=DCACHEDATA_FIT,
8397aec1d6eScindi	FRU=chip, ASRU=chip/cpu;
8407aec1d6eScindievent error.cpu.amd.dcachedata_sb@chip/cpu;
8417aec1d6eScindievent error.cpu.amd.dcachedata_mb@chip/cpu;
8427aec1d6eScindi
8437aec1d6eScindiprop fault.cpu.amd.dcachedata@chip/cpu (1)->
8447aec1d6eScindi    error.cpu.amd.dcachedata_sb@chip/cpu,
8457aec1d6eScindi    error.cpu.amd.dcachedata_mb@chip/cpu;
8467aec1d6eScindi
8477aec1d6eScindi/* 								#DCD_SINGLE#
8487aec1d6eScindi * A single bit data array fault in an D cache can cause:
8497aec1d6eScindi *
8507aec1d6eScindi *  - data_ecc1 : reported by dc on this cpu by scrubber
8517aec1d6eScindi *  - data_ecc1_uc : reported by dc on this cpu other than by scrubber
8527aec1d6eScindi *
8537aec1d6eScindi * Make data_ecc1_uc fault immediately as it may have caused a panic
8547aec1d6eScindi */
8557aec1d6eScindi
8567aec1d6eScindievent ereport.cpu.amd.dc.data_ecc1@chip/cpu{within(5s)};
8577aec1d6eScindievent ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu{within(5s)};
858b5016cbbSstephhevent ereport.cpu.amd.dc_sb_trip@chip/cpu{within(5s)};
8597aec1d6eScindi
8607aec1d6eScindiengine serd.cpu.amd.dc_sb@chip/cpu,
8617aec1d6eScindi    N=DCACHEDATA_SB_COUNT, T=DCACHEDATA_SB_TIME, method=persistent,
8627aec1d6eScindi    trip=ereport.cpu.amd.dc_sb_trip@chip/cpu;
8637aec1d6eScindi
8647aec1d6eScindiengine serd.cpu.amd.dc_sb_uc@chip/cpu,
8657aec1d6eScindi    N=0, T=1hr, method=persistent,
8667aec1d6eScindi    trip=ereport.cpu.amd.dc_sb_trip@chip/cpu;
8677aec1d6eScindi
8687aec1d6eScindievent upset.cpu.amd.dc_sb@chip/cpu,
8697aec1d6eScindi	engine=serd.cpu.amd.dc_sb@chip/cpu;
8707aec1d6eScindi
8717aec1d6eScindievent upset.cpu.amd.dc_sb_uc@chip/cpu,
8727aec1d6eScindi	engine=serd.cpu.amd.dc_sb_uc@chip/cpu;
8737aec1d6eScindi
8747aec1d6eScindiprop upset.cpu.amd.dc_sb@chip/cpu (1)->
8757aec1d6eScindi    ereport.cpu.amd.dc.data_ecc1@chip/cpu;
8767aec1d6eScindi
8777aec1d6eScindiprop upset.cpu.amd.dc_sb_uc@chip/cpu (1)->
8787aec1d6eScindi    ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu;
8797aec1d6eScindi
8807aec1d6eScindiprop error.cpu.amd.dcachedata_sb@chip/cpu (1)->
8817aec1d6eScindi    ereport.cpu.amd.dc_sb_trip@chip/cpu;
8827aec1d6eScindi
8837aec1d6eScindiprop fault.cpu.amd.dcachedata@chip/cpu (0)->
8847aec1d6eScindi    ereport.cpu.amd.dc.data_ecc1@chip/cpu,
8857aec1d6eScindi    ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu;
8867aec1d6eScindi
8877aec1d6eScindi/* 								#DCD_MULTI#
8887aec1d6eScindi * A multi-bit data array fault in an D cache can cause:
8897aec1d6eScindi *
8907aec1d6eScindi *  - data_eccm : reported by dc on this cpu
8917aec1d6eScindi */
8927aec1d6eScindi
893b5016cbbSstephhevent ereport.cpu.amd.dc.data_eccm@chip/cpu{within(5s)};
8947aec1d6eScindi
8957aec1d6eScindiprop error.cpu.amd.dcachedata_mb@chip/cpu (1)->
8967aec1d6eScindi    ereport.cpu.amd.dc.data_eccm@chip/cpu;
8977aec1d6eScindi
8987aec1d6eScindiprop fault.cpu.amd.dcachedata@chip/cpu (0)->
8997aec1d6eScindi    ereport.cpu.amd.dc.data_eccm@chip/cpu;
9007aec1d6eScindi
9017aec1d6eScindi/* 								#DCT_PAR#
9027aec1d6eScindi * A tag array parity fault in an D cache can cause:
9037aec1d6eScindi *
9047aec1d6eScindi *  - tag_par : reported by dc on this cpu
9057aec1d6eScindi */
9067aec1d6eScindi
9077aec1d6eScindi#define DCACHETAG_FIT		1000
9087aec1d6eScindi
9097aec1d6eScindievent ereport.cpu.amd.dc.tag_par@chip/cpu{within(5s)};
9107aec1d6eScindi
9117aec1d6eScindievent fault.cpu.amd.dcachetag@chip/cpu, FITrate=DCACHETAG_FIT,
9127aec1d6eScindi	FRU=chip, ASRU=chip/cpu;
9137aec1d6eScindi
9147aec1d6eScindiprop fault.cpu.amd.dcachetag@chip/cpu (1)->
9157aec1d6eScindi    ereport.cpu.amd.dc.tag_par@chip/cpu;
9167aec1d6eScindi
9177aec1d6eScindi/* 								#DCT_SNOOP#
9187aec1d6eScindi * A snoop tag array parity fault in an D cache can cause:
9197aec1d6eScindi *
9207aec1d6eScindi *  - stag_par : reported by dc on this cpu
9217aec1d6eScindi */
9227aec1d6eScindi
9237aec1d6eScindi#define DCACHESTAG_FIT		1000
9247aec1d6eScindi
9257aec1d6eScindievent ereport.cpu.amd.dc.stag_par@chip/cpu{within(5s)};
9267aec1d6eScindi
9277aec1d6eScindievent fault.cpu.amd.dcachestag@chip/cpu, FITrate=DCACHESTAG_FIT,
9287aec1d6eScindi	FRU=chip, ASRU=chip/cpu;
9297aec1d6eScindi
9307aec1d6eScindiprop fault.cpu.amd.dcachestag@chip/cpu (1)->
9317aec1d6eScindi    ereport.cpu.amd.dc.stag_par@chip/cpu;
9327aec1d6eScindi
9337aec1d6eScindi/* 								#DCTLB_1#
9347aec1d6eScindi * An l1tlb parity fault in an D cache can cause:
9357aec1d6eScindi *
9367aec1d6eScindi *  - l1tlb_par : reported by dc on this cpu
9377aec1d6eScindi */
9387aec1d6eScindi
9397aec1d6eScindi#define L1DTLB_FIT		1000
9407aec1d6eScindi
9417aec1d6eScindievent ereport.cpu.amd.dc.l1tlb_par@chip/cpu{within(5s)};
9427aec1d6eScindi
9437aec1d6eScindievent fault.cpu.amd.l1dtlb@chip/cpu, FITrate=L1DTLB_FIT,
9447aec1d6eScindi	FRU=chip, ASRU=chip/cpu;
9457aec1d6eScindi
9467aec1d6eScindiprop fault.cpu.amd.l1dtlb@chip/cpu (1)->
9477aec1d6eScindi    ereport.cpu.amd.dc.l1tlb_par@chip/cpu;
9487aec1d6eScindi
9497aec1d6eScindi/* 								#DCTLB_2#
9507aec1d6eScindi * An l2tlb parity fault in an D cache can cause:
9517aec1d6eScindi *
9527aec1d6eScindi *  - l2tlb_par : reported by dc on this cpu
9537aec1d6eScindi */
9547aec1d6eScindi
9557aec1d6eScindi#define L2DTLB_FIT		1000
9567aec1d6eScindi
9577aec1d6eScindievent ereport.cpu.amd.dc.l2tlb_par@chip/cpu{within(5s)};
9587aec1d6eScindi
9597aec1d6eScindievent fault.cpu.amd.l2dtlb@chip/cpu, FITrate=L2DTLB_FIT,
9607aec1d6eScindi	FRU=chip, ASRU=chip/cpu;
9617aec1d6eScindi
9627aec1d6eScindiprop fault.cpu.amd.l2dtlb@chip/cpu (1)->
9637aec1d6eScindi    ereport.cpu.amd.dc.l2tlb_par@chip/cpu;
9647aec1d6eScindi
9658a40a695Sgavinm/*								#MISC#
9667aec1d6eScindi * Ereports that should not normally happen and which we will discard
9677aec1d6eScindi * without diagnosis if they do.  These fall into a few categories:
9687aec1d6eScindi *
9697aec1d6eScindi *	- the corresponding detector is not enabled, typically because
9707aec1d6eScindi *	  detection/handling of the event is taking place elsewhere
9717aec1d6eScindi *	  (nb.ma, nb.ta, ls.rde, ic.rdde, bu.s_rde, nb.gart_walk)
9727aec1d6eScindi *	- the event is associated with a sync flood so even if the detector is
9737aec1d6eScindi *	  enabled we will never handle the event and generate an ereport *and*
9747aec1d6eScindi *	  even if the ereport did arrive we could perform no useful diagnosis
9757aec1d6eScindi *	  e.g., the NB can be configured for sync flood on nb.mem_eccm
9767aec1d6eScindi *	  but we don't choose to discard that ereport here since we could have
9777aec1d6eScindi *	  made a useful diagnosis from it had it been delivered
9787aec1d6eScindi *	  (nb.ht_sync, nb.ht_crc)
9797aec1d6eScindi *	- events that will be accompanied by an immediate panic and
9807aec1d6eScindi *	  delivery of the ereport during subsequent reboot but from
9817aec1d6eScindi *	  which no useful diagnosis can be made. (nb.rmw, nb.wdog)
9827aec1d6eScindi *
9837aec1d6eScindi * Ereports for all of these can be generated by error simulation and
9847aec1d6eScindi * injection.  We will perform a null diagnosos of all these ereports in order
9857aec1d6eScindi * to avoid "no subscription" complaints during test harness runs.
9867aec1d6eScindi */
9877aec1d6eScindi
988b5016cbbSstephhevent ereport.cpu.amd.nb.ma@cpu{within(5s)};
989b5016cbbSstephhevent ereport.cpu.amd.nb.ta@cpu{within(5s)};
990b5016cbbSstephhevent ereport.cpu.amd.ls.s_rde@cpu{within(5s)};
991b5016cbbSstephhevent ereport.cpu.amd.ic.rdde@cpu{within(5s)};
992b5016cbbSstephhevent ereport.cpu.amd.bu.s_rde@cpu{within(5s)};
993b5016cbbSstephhevent ereport.cpu.amd.nb.gart_walk@cpu{within(5s)};
994b5016cbbSstephhevent ereport.cpu.amd.nb.ht_sync@cpu{within(5s)};
995b5016cbbSstephhevent ereport.cpu.amd.nb.ht_crc@cpu{within(5s)};
996b5016cbbSstephhevent ereport.cpu.amd.nb.rmw@cpu{within(5s)};
997b5016cbbSstephhevent ereport.cpu.amd.nb.wdog@cpu{within(5s)};
998b5016cbbSstephhevent ereport.cpu.amd.unknown@cpu{within(5s)};
9997aec1d6eScindi
10007aec1d6eScindievent upset.null_diag@cpu;
10017aec1d6eScindi
10027aec1d6eScindiprop upset.null_diag@cpu (1)->
10037aec1d6eScindi    ereport.cpu.amd.nb.ma@cpu,
10047aec1d6eScindi    ereport.cpu.amd.nb.ta@cpu,
10057aec1d6eScindi    ereport.cpu.amd.ls.s_rde@cpu,
10067aec1d6eScindi    ereport.cpu.amd.ic.rdde@cpu,
10077aec1d6eScindi    ereport.cpu.amd.bu.s_rde@cpu,
10087aec1d6eScindi    ereport.cpu.amd.nb.gart_walk@cpu,
10097aec1d6eScindi    ereport.cpu.amd.nb.ht_sync@cpu,
10107aec1d6eScindi    ereport.cpu.amd.nb.ht_crc@cpu,
10117aec1d6eScindi    ereport.cpu.amd.nb.rmw@cpu,
10127aec1d6eScindi    ereport.cpu.amd.nb.wdog@cpu,
10137aec1d6eScindi    ereport.cpu.amd.unknown@cpu;
1014