17aec1d6eScindi/* 27aec1d6eScindi * CDDL HEADER START 37aec1d6eScindi * 47aec1d6eScindi * The contents of this file are subject to the terms of the 55f25dc2aSgavinm * Common Development and Distribution License (the "License"). 65f25dc2aSgavinm * You may not use this file except in compliance with the License. 77aec1d6eScindi * 87aec1d6eScindi * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97aec1d6eScindi * or http://www.opensolaris.org/os/licensing. 107aec1d6eScindi * See the License for the specific language governing permissions 117aec1d6eScindi * and limitations under the License. 127aec1d6eScindi * 137aec1d6eScindi * When distributing Covered Code, include this CDDL HEADER in each 147aec1d6eScindi * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157aec1d6eScindi * If applicable, add the following below this CDDL HEADER, with the 167aec1d6eScindi * fields enclosed by brackets "[]" replaced with your own identifying 177aec1d6eScindi * information: Portions Copyright [yyyy] [name of copyright owner] 187aec1d6eScindi * 197aec1d6eScindi * CDDL HEADER END 207aec1d6eScindi */ 217aec1d6eScindi 227aec1d6eScindi/* 23b5016cbbSstephh * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 247aec1d6eScindi * Use is subject to license terms. 257aec1d6eScindi */ 267aec1d6eScindi 277aec1d6eScindi#pragma ident "%Z%%M% %I% %E% SMI" 287aec1d6eScindi 297aec1d6eScindi#pragma dictionary "AMD" 307aec1d6eScindi 317aec1d6eScindi/* 327aec1d6eScindi * Eversholt rules for the AMD Opteron CPU/Memory 337aec1d6eScindi */ 347aec1d6eScindi 358a40a695Sgavinmfru motherboard; 368a40a695Sgavinmfru chip; 377aec1d6eScindifru dimm; 387aec1d6eScindi 397aec1d6eScindiasru chip/cpu; 408a40a695Sgavinmasru dimm; 418a40a695Sgavinmasru dimm/rank; 428a40a695Sgavinmasru dram-channel; 438a40a695Sgavinmasru chip/memory-controller/chip-select; 447aec1d6eScindi 458a40a695Sgavinm#define MAX(x, y) ((x) >= (y) ? (x) : (y)) 468a40a695Sgavinm#define MIN(x, y) ((x) <= (y) ? (x) : (y)) 478a40a695Sgavinm 487aec1d6eScindi/* 498a40a695Sgavinm * SET_ADDR and SET_OFFSET are used to set a payload value in the fault that 508a40a695Sgavinm * we diagnose for page faults, to record the physical address of the faulting 518a40a695Sgavinm * page. The "asru-" prefix is hooked in the "rewrite-ASRU" confcalls made on 528a40a695Sgavinm * diagnosis of associated faults when the libtopo mem scheme rewrites the 538a40a695Sgavinm * asru in "mem" scheme. 547aec1d6eScindi */ 55*20c794b3Sgavinm#define SET_ADDR (setpayloadprop("asru-physaddr", payloadprop("IA32_MCi_ADDR"))) 567aec1d6eScindi 57b5016cbbSstephh#define SET_OFFSET (setpayloadprop("asru-offset", \ 58b5016cbbSstephh payloadprop("resource[0].hc-specific.offset"))) 597aec1d6eScindi 607aec1d6eScindi/* 618a40a695Sgavinm * RESOURCE_EXISTS is true if a member with name "resource" exists in the 627aec1d6eScindi * payload - regardless of type (e.g., nvlist or nvlist array) or value. 637aec1d6eScindi */ 647aec1d6eScindi#define RESOURCE_EXISTS (payloadprop_defined("resource")) 657aec1d6eScindi 667aec1d6eScindi/* 678a40a695Sgavinm * CONTAINS_RANK is true if the "resource" nvlist array (as used in memory 687aec1d6eScindi * ereports) exists and one if its members matches the path for the 698a40a695Sgavinm * rank node. Our memory propogation are of the form 708a40a695Sgavinm * 718a40a695Sgavinm * "prop foo@chip/memory-controller/dimm/rank -> blah@chip/cpu" 728a40a695Sgavinm * 737aec1d6eScindi * since cpus detect memory errors; in eversholt such a propogation, where 747aec1d6eScindi * the lhs path and rhs path do not match, expands to the cross-product of 758a40a695Sgavinm * all dimms, ranks and cpus on the same chip (since chip appears in the 768a40a695Sgavinm * path on both sides). We use CONTAINS_RANK to constrain the propogation 778a40a695Sgavinm * such that it only happens if the payload resource matches the rank. 787aec1d6eScindi */ 798a40a695Sgavinm#define CONTAINS_RANK (payloadprop_contains("resource", \ 80b5016cbbSstephh asru(chip/memory-controller/dimm/rank)) \ 81b5016cbbSstephh || payloadprop_contains("resource", \ 82b5016cbbSstephh asru(chip/memory-controller/dimm))) 837aec1d6eScindi 847aec1d6eScindi/* 857aec1d6eScindi * The following will tell us whether a syndrome that is known to be 868a40a695Sgavinm * correctable (from a mem_ce ereport) is single-bit or multi-bit. For a 877aec1d6eScindi * correctable ChipKill syndrome the number of bits set in the lowest 888a40a695Sgavinm * nibble indicates how many bits were in error. 897aec1d6eScindi */ 907aec1d6eScindi 917aec1d6eScindi#define CBITMASK(synd) ((synd) & 0xf) 927aec1d6eScindi 937aec1d6eScindi#define CKSINGLE(synd) \ 947aec1d6eScindi ((synd) == 0 || \ 957aec1d6eScindi (CBITMASK(synd) == 0x1 || CBITMASK(synd) == 0x2 || \ 967aec1d6eScindi CBITMASK(synd) == 0x4 || CBITMASK(synd) == 0x8)) 977aec1d6eScindi 987aec1d6eScindi#define SINGLE_BIT_CE \ 997aec1d6eScindi (payloadprop("syndrome-type") == "E" || \ 1007aec1d6eScindi (payloadprop("syndrome-type") == "C" && \ 1017aec1d6eScindi CKSINGLE(payloadprop("syndrome")))) 1027aec1d6eScindi 1037aec1d6eScindi#define MULTI_BIT_CE \ 1047aec1d6eScindi (payloadprop("syndrome-type") == "C" && \ 1057aec1d6eScindi !CKSINGLE(payloadprop("syndrome"))) 1067aec1d6eScindi 1077aec1d6eScindi/* 1088a40a695Sgavinm * A single bit fault in a memory rank can cause: 1097aec1d6eScindi * 1108a40a695Sgavinm * - mem_ce : reported by nb 1118a40a695Sgavinm * - inf_sys_ecc1: reported by ic or dc; inf_sys_ecc1 errors detected at the 1128a40a695Sgavinm * ic do not record a syndrome; these errors will not be triggered in 1138a40a695Sgavinm * ChipKill ECC mode (the NB corrects all ECC errors in that mode) 1148a40a695Sgavinm * - s_ecc1: reported by bu; this error will not be triggered in ChipKill 1158a40a695Sgavinm * ECC mode (the NB corrects all ECC in that mode) 1167aec1d6eScindi * 1178a40a695Sgavinm * Single-bit errors are fed into a per-rank SERD engine; if a SERD engine 1187aec1d6eScindi * trips we diagnose a fault.memory.page so that the response agent can 1197aec1d6eScindi * retire the page that caused the trip. If the total number of pages 1208a40a695Sgavinm * faulted in this way on a single rank exceeds a threshold we will 1218a40a695Sgavinm * diagnose a fault.memory.dimm_sb against the containing. 1227aec1d6eScindi * 1238a40a695Sgavinm * Multibit ChipKill-correctable errors are treated identically to 1248a40a695Sgavinm * single-bit errors, but via separate serd engines to allow distinct 1258a40a695Sgavinm * parameters if desired. 1267aec1d6eScindi * 1277aec1d6eScindi * Uncorrectable errors produce an immediate page fault and corresponding 1287aec1d6eScindi * fault.memory.dimm_ue. 1297aec1d6eScindi * 1307aec1d6eScindi * Page faults are essentially internal - action is only required when 1317aec1d6eScindi * they are accompanied by a dimm fault. As such we include message=0 1328a40a695Sgavinm * on page faults. 1337aec1d6eScindi */ 1347aec1d6eScindi 1358a40a695Sgavinmevent ereport.cpu.amd.ic.inf_sys_ecc1@chip/cpu{within(5s)}; 1368a40a695Sgavinmevent ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu{within(5s)}; 1378a40a695Sgavinmevent ereport.cpu.amd.bu.s_ecc1@chip/cpu{within(5s)}; 1388a40a695Sgavinmevent ereport.cpu.amd.nb.mem_ce@chip/cpu{within(5s)}; 1397aec1d6eScindi 1407aec1d6eScindi/* 1417aec1d6eScindi * If the address is not valid then no resource member will be included 1427aec1d6eScindi * in a nb.mem_ce or nb.mem_ue ereport. These cases should be rare. 1438a40a695Sgavinm * We will also discard all inf_sys_ecc1 events detected at the ic since they 1448a40a695Sgavinm * have no syndrome and therefore no resource information. 1457aec1d6eScindi * We will discard such ereports. An alternative may be to SERD them 1467aec1d6eScindi * on a per MC basis and trip if we see too many such events. 1477aec1d6eScindi */ 1487aec1d6eScindi 1498a40a695Sgavinmevent upset.memory.discard1@chip/cpu; 1507aec1d6eScindi 1517aec1d6eScindi/* #PAGE# 1528a40a695Sgavinm * Single-bit correctable errors are diagnosed as upsets and feed into per-rank 1538a40a695Sgavinm * SERD engines which diagnose fault.memory.page_sb if they trip. 1548a40a695Sgavinm * 1558a40a695Sgavinm * Multi-bit correctable (via ChipKill) errors are diagnosed as upsets and feed 1568a40a695Sgavinm * into additional per-rank SERD engines which diagnose fault.memory.page_ck 1578a40a695Sgavinm * if they trip. 1587aec1d6eScindi * 1598a40a695Sgavinm * The number of fault.memory.page and fault.memory.page_ck diagnosed is 1608a40a695Sgavinm * counted in stat engines for each type. These are used in deciding 1618a40a695Sgavinm * whether to declare a dimm faulty after repeated page faults. 1627aec1d6eScindi */ 1637aec1d6eScindi 1647aec1d6eScindi#define PAGE_FIT 1 1657aec1d6eScindi#define PAGE_SB_COUNT 2 1667aec1d6eScindi#define PAGE_SB_TIME 72h 1678a40a695Sgavinm#define PAGE_CK_COUNT 2 1688a40a695Sgavinm#define PAGE_CK_TIME 72h 1698a40a695Sgavinm 1708a40a695Sgavinm/* 1718a40a695Sgavinm * The fraction of pages on a single rank that must be diagnosed as faulty 1728a40a695Sgavinm * with single correctable unit faults before we will fault the rank. 1738a40a695Sgavinm * Once we have faulted the rank we will continue to diagnose any further page 1748a40a695Sgavinm * faults on the rank up to some maximum multiple of the threshold at which 1758a40a695Sgavinm * we faulted the dimm. This allows us to potentially contain some fairly 1768a40a695Sgavinm * far-reaching but still limited-extent fault (such as a partial column 1778a40a695Sgavinm * failure) without getting carried away and allowing a single faulty rank to 1788a40a695Sgavinm * use up the entire system-imposed page retirenment limit (which, once 1798a40a695Sgavinm * reached, causes retirement request to have no effect other than to fill 1808a40a695Sgavinm * the fault manager cache and logs). 1818a40a695Sgavinm * 1828a40a695Sgavinm * This fraction is specified in basis points, where 100 basis points are 1838a40a695Sgavinm * equivalent to 1 percent. It is applied on a per-rank basis. 1847aec1d6eScindi * 1858a40a695Sgavinm * The system imposes an absolute maximum on the number of pages it will 1868a40a695Sgavinm * retire; the current value is 10 basis points, or 0.1% of 'physmem'. Note 1878a40a695Sgavinm * that 'physmem' is reduced from installed memory pages by an amount 1888a40a695Sgavinm * reflecting permanent kernel memory allocations. This system page retire 1898a40a695Sgavinm * limit bounds the maximum real response to page faults across all ranks 1908a40a695Sgavinm * that fault manager response agents can effect, but it should not be confused 1918a40a695Sgavinm * with any diagnosis threshold (i.e., the number of faulty pages we are 1928a40a695Sgavinm * prepared to tolerate from a single rank before faulting the rank is 1938a40a695Sgavinm * distinct from the total number of pages we are prepared to retire from use 1948a40a695Sgavinm * in response to that and other faults). It is, however, desirable to 1958a40a695Sgavinm * arrange that the maximum number of pages we are prepared to fault from 1968a40a695Sgavinm * any one rank is less than the system-wide quota. 1977aec1d6eScindi */ 1988a40a695Sgavinm#define PAGE_RETIRE_LIMIT_BPS 5 /* or 0.05%; ~ 131 pages/GB %/ 1997aec1d6eScindi 2008a40a695Sgavinm/* 2018a40a695Sgavinm * A macro to manipulate the above fraction. Given a size in bytes convert 2028a40a695Sgavinm * this to pages (4K pagesize) and calculate the number of those pages 2038a40a695Sgavinm * indicated by PAGE_RETIRE_LIMIT_BPS basis points. 2048a40a695Sgavinm */ 2058a40a695Sgavinm#define _BPS_PGCNT(totalbytes) \ 2068a40a695Sgavinm ((((totalbytes) / 4096 ) * PAGE_RETIRE_LIMIT_BPS) / 10000) 2078a40a695Sgavinm 2088a40a695Sgavinm/* 2098a40a695Sgavinm * The single-correctable-unit threshold at which number of faulted pages 2108a40a695Sgavinm * on a rank we we fault the rank. We insist that this be at least 128 and 2118a40a695Sgavinm * never more than 512. 2128a40a695Sgavinm */ 2138a40a695Sgavinm#define RANK_THRESH MIN(512, MAX(128, \ 2148a40a695Sgavinm _BPS_PGCNT(confprop(asru(chip/memory-controller/dimm/rank), "size")))) 2158a40a695Sgavinm 2168a40a695Sgavinm/* 2178a40a695Sgavinm * The maximum number of single-correctable-unit page faults we will diagnose 2188a40a695Sgavinm * on a single rank (must be greater than RANK_THRESH). We set 2198a40a695Sgavinm * this at twice the rank fault threshold. 2208a40a695Sgavinm */ 2218a40a695Sgavinm#define RANK_PGFLT_MAX (2 * RANK_THRESH) 2228a40a695Sgavinm 2238a40a695Sgavinmengine stat.sbpgflt@chip/memory-controller/dimm/rank; 2248a40a695Sgavinmengine stat.ckpgflt@chip/memory-controller/dimm/rank; 2258a40a695Sgavinm 2268a40a695Sgavinmevent fault.memory.page_sb@chip/memory-controller/dimm/rank, 2278a40a695Sgavinm FITrate=PAGE_FIT, ASRU=dimm/rank, message=0, 2288a40a695Sgavinm count=stat.sbpgflt@chip/memory-controller/dimm/rank, 2298a40a695Sgavinm action=confcall("rewrite-ASRU"); /* rewrite ASRU to identify page in rank */ 2308a40a695Sgavinm 2318a40a695Sgavinm#define SB_PGFLTS (count(stat.sbpgflt@chip/memory-controller/dimm/rank)) 2328a40a695Sgavinm 2338a40a695Sgavinmevent fault.memory.page_ck@chip/memory-controller/dimm/rank, 2348a40a695Sgavinm FITrate=PAGE_FIT, ASRU=dimm/rank, message=0, 2358a40a695Sgavinm count=stat.ckpgflt@chip/memory-controller/dimm/rank, 2368a40a695Sgavinm action=confcall("rewrite-ASRU"); /* rewrite ASRU to identify page in rank */ 2378a40a695Sgavinm 2388a40a695Sgavinm#define CK_PGFLTS (count(stat.ckpgflt@chip/memory-controller/dimm/rank)) 2398a40a695Sgavinm 2408a40a695Sgavinm#define RANK_PGFLT_LIMIT_REACHED \ 2418a40a695Sgavinm (SB_PGFLTS + CK_PGFLTS > RANK_PGFLT_MAX) 2428a40a695Sgavinm 243b5016cbbSstephhevent ereport.memory.page_sb_trip@chip/memory-controller/dimm/rank{within(5s)}; 2448a40a695Sgavinmengine serd.memory.page_sb@chip/memory-controller/dimm/rank, 2458a40a695Sgavinm N=PAGE_SB_COUNT, T=PAGE_SB_TIME, method=persistent, 2468a40a695Sgavinm trip=ereport.memory.page_sb_trip@chip/memory-controller/dimm/rank; 2478a40a695Sgavinmevent upset.memory.page_sb@chip/memory-controller/dimm/rank, 2488a40a695Sgavinm engine=serd.memory.page_sb@chip/memory-controller/dimm/rank; 2498a40a695Sgavinm 250b5016cbbSstephhevent ereport.memory.page_ck_trip@chip/memory-controller/dimm/rank{within(5s)}; 2518a40a695Sgavinmengine serd.memory.page_ck@chip/memory-controller/dimm/rank, 2528a40a695Sgavinm N=PAGE_CK_COUNT, T=PAGE_CK_TIME, method=persistent, 2538a40a695Sgavinm trip=ereport.memory.page_ck_trip@chip/memory-controller/dimm/rank; 2548a40a695Sgavinmevent upset.memory.page_ck@chip/memory-controller/dimm/rank, 2558a40a695Sgavinm engine=serd.memory.page_ck@chip/memory-controller/dimm/rank; 2568a40a695Sgavinm 2578a40a695Sgavinmevent upset.memory.overpgfltlimit@chip/memory-controller/dimm/rank; 2588a40a695Sgavinm 2598a40a695Sgavinm/* 2608a40a695Sgavinm * If we have not reached the per-rank limit on faulted pages then 2618a40a695Sgavinm * continue to explain ereport observations as upsets which can lead 2628a40a695Sgavinm * lead to page fault diagnoses if the serd engine trips. 2638a40a695Sgavinm */ 264b5016cbbSstephhprop upset.memory.page_sb@chip/memory-controller/dimm/rank 265b5016cbbSstephh { CONTAINS_RANK && SINGLE_BIT_CE && !RANK_PGFLT_LIMIT_REACHED } (0)-> 266b5016cbbSstephh ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu, 267b5016cbbSstephh ereport.cpu.amd.bu.s_ecc1@chip/cpu, 268b5016cbbSstephh ereport.cpu.amd.nb.mem_ce@chip/cpu; 269b5016cbbSstephh 270b5016cbbSstephhprop upset.memory.page_ck@chip/memory-controller/dimm/rank 271b5016cbbSstephh { CONTAINS_RANK && MULTI_BIT_CE && !RANK_PGFLT_LIMIT_REACHED } (0)-> 2728a40a695Sgavinm /* no dc.inf_sys_ecc1 or bu.s_ecc1 in ChipKill mode */ 273b5016cbbSstephh ereport.cpu.amd.nb.mem_ce@chip/cpu; 2748a40a695Sgavinm 2758a40a695Sgavinm/* 2768a40a695Sgavinm * If we have reached the per-rank limit on faulted pages then diagnose 2778a40a695Sgavinm * further observations on the rank to a engine-less upset (i.e., discard 2788a40a695Sgavinm * them). 2798a40a695Sgavinm */ 280b5016cbbSstephhprop upset.memory.overpgfltlimit@chip/memory-controller/dimm/rank 281b5016cbbSstephh { CONTAINS_RANK && RANK_PGFLT_LIMIT_REACHED } (1)-> 282b5016cbbSstephh ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu, 283b5016cbbSstephh ereport.cpu.amd.bu.s_ecc1@chip/cpu, 284b5016cbbSstephh ereport.cpu.amd.nb.mem_ce@chip/cpu; 2858a40a695Sgavinm 2868a40a695Sgavinmprop fault.memory.page_sb@chip/memory-controller/dimm/rank (1)-> 2878a40a695Sgavinm ereport.memory.page_sb_trip@chip/memory-controller/dimm/rank; 2888a40a695Sgavinm 2898a40a695Sgavinmprop fault.memory.page_ck@chip/memory-controller/dimm/rank (1)-> 2908a40a695Sgavinm ereport.memory.page_ck_trip@chip/memory-controller/dimm/rank; 2918a40a695Sgavinm 2928a40a695Sgavinmprop fault.memory.page_sb@chip/memory-controller/dimm/rank 293b5016cbbSstephh { CONTAINS_RANK && SET_ADDR && SET_OFFSET } (0)-> 294b5016cbbSstephh ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu, 295b5016cbbSstephh ereport.cpu.amd.bu.s_ecc1@chip/cpu, 296b5016cbbSstephh ereport.cpu.amd.nb.mem_ce@chip/cpu; 2978a40a695Sgavinm 2988a40a695Sgavinmprop fault.memory.page_ck@chip/memory-controller/dimm/rank 299b5016cbbSstephh { CONTAINS_RANK && SET_ADDR && SET_OFFSET } (0)-> 300b5016cbbSstephh ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu, 301b5016cbbSstephh ereport.cpu.amd.bu.s_ecc1@chip/cpu, 302b5016cbbSstephh ereport.cpu.amd.nb.mem_ce@chip/cpu; 3038a40a695Sgavinm 3044156fc34Sgavinm/* 3054156fc34Sgavinm * Discard memory ereports that do not indicate a resource. 3064156fc34Sgavinm */ 307b5016cbbSstephhprop upset.memory.discard1@chip/cpu 308b5016cbbSstephh { !RESOURCE_EXISTS } (1)-> 309b5016cbbSstephh ereport.cpu.amd.ic.inf_sys_ecc1@chip/cpu, 310b5016cbbSstephh ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu, 311b5016cbbSstephh ereport.cpu.amd.bu.s_ecc1@chip/cpu, 312b5016cbbSstephh ereport.cpu.amd.nb.mem_ce@chip/cpu; 3138a40a695Sgavinm 3148a40a695Sgavinm/* #DIMM_SCU# 3158a40a695Sgavinm * "Single-correctable-unit" DIMM faults are diagnosed when the total number of 3168a40a695Sgavinm * page faults (diagnosed from repeated single-bit or multibit-chipkills) 3178a40a695Sgavinm * from any one rank on that DIMM reaches a threshold. A "correctable unit" 3188a40a695Sgavinm * is a single bit in normal 64/8 ECC mode, or a single symbol in ChipKill 3198a40a695Sgavinm * 128/16 mode (i.e., nibble-aligned nibble for the code used on Opteron). 3208a40a695Sgavinm * 3218a40a695Sgavinm * We do not stop diagnosing further single-bit page faults once we have 3228a40a695Sgavinm * declared a single-bit DIMM fault - we continue diagnosing them and 3238a40a695Sgavinm * response agents can continue to retire those pages up to the system-imposed 3248a40a695Sgavinm * retirement limit. 3258a40a695Sgavinm * 3268a40a695Sgavinm * Two distinct fault types may be diagnosed - fault.memory.dimm_sb and 3278a40a695Sgavinm * fault.memory.dimm_ck. Which one is diagnosed depends on whether we 3288a40a695Sgavinm * have reached the threshold for a majority of single-bit page faults or 3298a40a695Sgavinm * multibit page faults. 3308a40a695Sgavinm * 3318a40a695Sgavinm * Implementation: we maintain parallel SERD engines to the page_sb and 3328a40a695Sgavinm * page_ck engines, which trip in unison. On trip it generates a distinct 3338a40a695Sgavinm * ereport which we diagnose to a fault if the threshold has been 3348a40a695Sgavinm * reached, or to a throwaway upset if not. 3358a40a695Sgavinm * 3367aec1d6eScindi */ 3377aec1d6eScindi 3388a40a695Sgavinm#define DIMM_SB_FIT 2000 3397aec1d6eScindi#define DIMM_CK_FIT 4000 3407aec1d6eScindi 3418a40a695Sgavinmevent fault.memory.dimm_sb@chip/memory-controller/dimm/rank, 3428a40a695Sgavinm FITrate=DIMM_SB_FIT, FRU=dimm, ASRU=dimm, 3438a40a695Sgavinm action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */ 3447aec1d6eScindi 3458a40a695Sgavinmevent fault.memory.dimm_ck@chip/memory-controller/dimm/rank, 3468a40a695Sgavinm FITrate=DIMM_CK_FIT, FRU=dimm, ASRU=dimm, 3478a40a695Sgavinm action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */ 3487aec1d6eScindi 3498a40a695Sgavinmevent ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank 3508a40a695Sgavinm { within(5s) }; 3518a40a695Sgavinmengine serd.memory.dimm_sb@chip/memory-controller/dimm/rank, 3528a40a695Sgavinm N=PAGE_SB_COUNT, T=PAGE_SB_TIME, method=persistent, 3538a40a695Sgavinm trip=ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank; 3548a40a695Sgavinmevent upset.memory.dimm_sb@chip/memory-controller/dimm/rank, 3558a40a695Sgavinm engine=serd.memory.dimm_sb@chip/memory-controller/dimm/rank; 3567aec1d6eScindi 3578a40a695Sgavinmevent ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank 3588a40a695Sgavinm { within(5s) }; 3598a40a695Sgavinmengine serd.memory.dimm_ck@chip/memory-controller/dimm/rank, 3608a40a695Sgavinm N=PAGE_CK_COUNT, T=PAGE_CK_TIME, method=persistent, 3618a40a695Sgavinm trip=ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank; 3628a40a695Sgavinmevent upset.memory.dimm_ck@chip/memory-controller/dimm/rank, 3638a40a695Sgavinm engine=serd.memory.dimm_ck@chip/memory-controller/dimm/rank; 3647aec1d6eScindi 3658a40a695Sgavinmevent upset.memory.discard2@chip/memory-controller/dimm/rank; 3668a40a695Sgavinm 367b5016cbbSstephhprop upset.memory.dimm_sb@chip/memory-controller/dimm/rank 368b5016cbbSstephh { CONTAINS_RANK && SINGLE_BIT_CE } (0)-> 369b5016cbbSstephh ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu, 370b5016cbbSstephh ereport.cpu.amd.bu.s_ecc1@chip/cpu, 371b5016cbbSstephh ereport.cpu.amd.nb.mem_ce@chip/cpu; 3728a40a695Sgavinm 373b5016cbbSstephhprop upset.memory.dimm_ck@chip/memory-controller/dimm/rank 374b5016cbbSstephh { CONTAINS_RANK && MULTI_BIT_CE } (0)-> 375b5016cbbSstephh ereport.cpu.amd.nb.mem_ce@chip/cpu; 3768a40a695Sgavinm 3778a40a695Sgavinm/* 3788a40a695Sgavinm * The following two propogations diagnose a fault.memory.dimm_sb when 3798a40a695Sgavinm * either the dimm_sb or dimm_ck engine trips (for a new page fault) 3808a40a695Sgavinm * and the total number of page faults (sb and ck) exceeds the threshold 3818a40a695Sgavinm * value with the majority being from sb page faults. 3828a40a695Sgavinm */ 3838a40a695Sgavinmprop fault.memory.dimm_sb@chip/memory-controller/dimm/rank (0)-> 3848a40a695Sgavinm ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank 3858a40a695Sgavinm { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && SB_PGFLTS > RANK_THRESH / 2 }; 3868a40a695Sgavinm 3878a40a695Sgavinmprop fault.memory.dimm_sb@chip/memory-controller/dimm/rank (0)-> 3888a40a695Sgavinm ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank 3898a40a695Sgavinm { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && SB_PGFLTS > RANK_THRESH / 2 }; 3908a40a695Sgavinm 3918a40a695Sgavinm/* 3928a40a695Sgavinm * The following two propogation diagnose a fault.memory.dimm_ck when 3938a40a695Sgavinm * either the dimm_sb or dimm_ck engine trip (for a new page fault) 3948a40a695Sgavinm * and the total number of page faults (sb and ck) exceeds the threshold 3958a40a695Sgavinm * value with the majority being from ck page faults. 3968a40a695Sgavinm */ 3978a40a695Sgavinmprop fault.memory.dimm_ck@chip/memory-controller/dimm/rank (0)-> 3988a40a695Sgavinm ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank 3998a40a695Sgavinm { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && CK_PGFLTS > RANK_THRESH / 2 }; 4008a40a695Sgavinm 4018a40a695Sgavinmprop fault.memory.dimm_ck@chip/memory-controller/dimm/rank (0)-> 4028a40a695Sgavinm ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank 4038a40a695Sgavinm { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && CK_PGFLTS > RANK_THRESH / 2 }; 4048a40a695Sgavinm 4058a40a695Sgavinmprop upset.memory.discard2@chip/memory-controller/dimm/rank (1)-> 4068a40a695Sgavinm ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank, 4078a40a695Sgavinm ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank; 4087aec1d6eScindi 4097aec1d6eScindi/* #DIMM_UE# 4108a40a695Sgavinm * #PAGE_UE# 4118a40a695Sgavinm * An uncorrectable multi-bit fault in a memory dimm can cause: 4127aec1d6eScindi * 4138a40a695Sgavinm * - mem_ue : reported by nb for an access from a remote cpu 4148a40a695Sgavinm * - inf_sys_eccm : reported by ic or dc; the ic does not report a syndrome 4158a40a695Sgavinm * - s_eccm : reported by bu 4167aec1d6eScindi * 4177aec1d6eScindi * Note we use a SERD engine here simply as a way of ensuring that we get 4188a40a695Sgavinm * both dimm and page faults reported. 4198a40a695Sgavinm * 4208a40a695Sgavinm * Since on production systems we force HT Sync Flood on uncorrectable 4218a40a695Sgavinm * memory errors (if not already set as such by the BIOS, as it should be) 4228a40a695Sgavinm * we won't actually receive these ereports since the system will be reset. 4237aec1d6eScindi */ 4247aec1d6eScindi 4257aec1d6eScindi#define DIMM_UE_FIT 6000 4267aec1d6eScindi 4278a40a695Sgavinmevent ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu{within(5s)}; 4288a40a695Sgavinmevent ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu{within(5s)}; 4298a40a695Sgavinmevent ereport.cpu.amd.bu.s_eccm@chip/cpu{within(5s)}; 4308a40a695Sgavinmevent ereport.cpu.amd.nb.mem_ue@chip/cpu{within(5s)}; 4318a40a695Sgavinm 4328a40a695Sgavinmevent fault.memory.dimm_ue@chip/memory-controller/dimm/rank, 4338a40a695Sgavinm FITrate=DIMM_UE_FIT, FRU=dimm, ASRU=dimm, 4348a40a695Sgavinm action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */ 4358a40a695Sgavinm 4368a40a695Sgavinmevent fault.memory.page_ue@chip/memory-controller/dimm/rank, 4378a40a695Sgavinm FITrate=PAGE_FIT, ASRU=dimm/rank, message=0, 4388a40a695Sgavinm action=confcall("rewrite-ASRU"); /* rewrite ASRU to identify page in rank */ 4398a40a695Sgavinm 440b5016cbbSstephhevent ereport.memory.dimm_ue_trip@chip/memory-controller/dimm/rank{within(5s)}; 4418a40a695Sgavinmengine serd.memory.dimm_ue@chip/memory-controller/dimm/rank, 4428a40a695Sgavinm N=0, T=1h, method=persistent, 4438a40a695Sgavinm trip=ereport.memory.dimm_ue_trip@chip/memory-controller/dimm/rank; 4448a40a695Sgavinmevent upset.memory.dimm_ue@chip/memory-controller/dimm/rank, 4458a40a695Sgavinm engine=serd.memory.dimm_ue@chip/memory-controller/dimm/rank; 4468a40a695Sgavinm 447b5016cbbSstephhevent ereport.memory.page_ue_trip@chip/memory-controller/dimm/rank{within(5s)}; 4488a40a695Sgavinmengine serd.memory.page_ue@chip/memory-controller/dimm/rank, 4498a40a695Sgavinm N=0, T=1h, method=persistent, 4508a40a695Sgavinm trip=ereport.memory.page_ue_trip@chip/memory-controller/dimm/rank; 4518a40a695Sgavinmevent upset.memory.page_ue@chip/memory-controller/dimm/rank, 4528a40a695Sgavinm engine=serd.memory.page_ue@chip/memory-controller/dimm/rank; 4538a40a695Sgavinm 4548a40a695Sgavinmevent upset.memory.discard3@chip/cpu; 4558a40a695Sgavinm 456b5016cbbSstephhprop upset.memory.page_ue@chip/memory-controller/dimm/rank 457b5016cbbSstephh { CONTAINS_RANK } (0)-> 458b5016cbbSstephh ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu, 459b5016cbbSstephh ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu, 460b5016cbbSstephh ereport.cpu.amd.bu.s_eccm@chip/cpu, 461b5016cbbSstephh ereport.cpu.amd.nb.mem_ue@chip/cpu; 4628a40a695Sgavinm 463b5016cbbSstephhprop upset.memory.dimm_ue@chip/memory-controller/dimm/rank 464b5016cbbSstephh { CONTAINS_RANK } (0)-> 465b5016cbbSstephh ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu, 466b5016cbbSstephh ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu, 467b5016cbbSstephh ereport.cpu.amd.bu.s_eccm@chip/cpu, 468b5016cbbSstephh ereport.cpu.amd.nb.mem_ue@chip/cpu; 4698a40a695Sgavinm 4708a40a695Sgavinmprop fault.memory.page_ue@chip/memory-controller/dimm/rank (1)-> 4718a40a695Sgavinm ereport.memory.page_ue_trip@chip/memory-controller/dimm/rank; 4728a40a695Sgavinm 4738a40a695Sgavinmprop fault.memory.page_ue@chip/memory-controller/dimm/rank 474b5016cbbSstephh { CONTAINS_RANK && SET_ADDR && SET_OFFSET } (0)-> 475b5016cbbSstephh ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu, 476b5016cbbSstephh ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu, 477b5016cbbSstephh ereport.cpu.amd.bu.s_eccm@chip/cpu, 478b5016cbbSstephh ereport.cpu.amd.nb.mem_ue@chip/cpu; 4798a40a695Sgavinm 4808a40a695Sgavinmprop fault.memory.dimm_ue@chip/memory-controller/dimm/rank (1)-> 4818a40a695Sgavinm ereport.memory.dimm_ue_trip@chip/memory-controller/dimm/rank; 4828a40a695Sgavinm 483b5016cbbSstephhprop upset.memory.discard3@chip/cpu 484b5016cbbSstephh { !RESOURCE_EXISTS } (1)-> 485b5016cbbSstephh ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu, 486b5016cbbSstephh ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu, 487b5016cbbSstephh ereport.cpu.amd.bu.s_eccm@chip/cpu, 488e5ba14ffSstephh ereport.cpu.amd.nb.mem_ue@chip/cpu; 4898a40a695Sgavinm 4908a40a695Sgavinm/* #CSTESTFAIL# 4918a40a695Sgavinm * If the BIOS fails a chip-select during POST, or perhaps after a 4928a40a695Sgavinm * sync flood from an uncorrectable error, then on revision F and G it 4938a40a695Sgavinm * should mark that chip-select as TestFail in the CS Base register. 4948a40a695Sgavinm * When the memory-controller driver discovers all the MC configuration 4958a40a695Sgavinm * it notes such failed chip-selects and creates topology nodes for the 4968a40a695Sgavinm * chip-select and associated dimms and ranks, and produces an ereport for each 4978a40a695Sgavinm * failed chip-select with detector set to the memory-controller node 4988a40a695Sgavinm * and resource indicating the failed chip-select. 4998a40a695Sgavinm */ 5008a40a695Sgavinm 501b5016cbbSstephhevent ereport.cpu.amd.mc.cs_testfail@chip/memory-controller{within(5s)}; 5027aec1d6eScindi 5038a40a695Sgavinmevent fault.memory.dimm_testfail@chip/memory-controller/dimm/rank, 5048a40a695Sgavinm FITrate=1000, ASRU=dimm, FRU=dimm, 5058a40a695Sgavinm action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */ 5067aec1d6eScindi 5078a40a695Sgavinmevent error.memory.cs_testfail@chip/memory-controller/chip-select; 5087aec1d6eScindi 5098a40a695Sgavinm#define CONTAINS_CS (payloadprop_contains("resource", \ 5108a40a695Sgavinm asru(chip/memory-controller/chip-select))) 5117aec1d6eScindi 512b5016cbbSstephhprop error.memory.cs_testfail@chip/memory-controller/chip-select (1)-> 5138a40a695Sgavinm ereport.cpu.amd.mc.cs_testfail@chip/memory-controller 5148a40a695Sgavinm { CONTAINS_CS }; 5157aec1d6eScindi 5168a40a695Sgavinm#define CSMATCH(s) \ 5178a40a695Sgavinm (confprop_defined(asru(chip/memory-controller/chip-select), s) && \ 5188a40a695Sgavinm confprop(asru(chip/memory-controller/chip-select), s) == \ 5198a40a695Sgavinm confprop(asru(chip/memory-controller/dimm/rank), "csname")) 5207aec1d6eScindi 521b5016cbbSstephhprop fault.memory.dimm_testfail@chip/memory-controller/dimm/rank (1)-> 5228a40a695Sgavinm error.memory.cs_testfail@chip/memory-controller/chip-select 5238a40a695Sgavinm { CSMATCH("dimm1-csname") || CSMATCH("dimm2-csname")}; 5247aec1d6eScindi 5258a40a695Sgavinm/* #ADDRPAR# 5268a40a695Sgavinm * DRAM Command/Address Parity Errors. 5278a40a695Sgavinm * 5288a40a695Sgavinm * - dramaddr_par : reported by the nb; the NB status register includes 5298a40a695Sgavinm * a bit indicating which dram controller channel (A or B) experienced 5308a40a695Sgavinm * the error. 5318a40a695Sgavinm */ 5328a40a695Sgavinm 533b5016cbbSstephhevent ereport.cpu.amd.nb.dramaddr_par@chip/cpu{within(5s)}; 5348a40a695Sgavinm 5358a40a695Sgavinmevent fault.cpu.amd.dramchannel@chip/memory-controller/dram-channel, 5368a40a695Sgavinm FITrate=1000, ASRU=dram-channel; 5378a40a695Sgavinm 538*20c794b3Sgavinm#define GET_CHANNEL ($chan = (payloadprop("IA32_MCi_STATUS") >> 32 & 0x200) ? \ 5398a40a695Sgavinm 1 : 0) 5408a40a695Sgavinm 5418a40a695Sgavinmprop fault.cpu.amd.dramchannel@chip/memory-controller/dram-channel[y] (0)-> 5428a40a695Sgavinm ereport.cpu.amd.nb.dramaddr_par@chip/cpu { GET_CHANNEL && $chan == y }; 5437aec1d6eScindi 5448a40a695Sgavinm/* 5457aec1d6eScindi * l2 cache data errors. 5467aec1d6eScindi */ 5477aec1d6eScindi 5487aec1d6eScindi#define L2CACHEDATA_FIT 1000 5497aec1d6eScindi#define L2CACHEDATA_SB_COUNT 3 5507aec1d6eScindi#define L2CACHEDATA_SB_TIME 12h 5517aec1d6eScindi 5527aec1d6eScindievent fault.cpu.amd.l2cachedata@chip/cpu, FITrate=L2CACHEDATA_FIT, 5537aec1d6eScindi FRU=chip, ASRU=chip/cpu; 5547aec1d6eScindievent error.cpu.amd.l2cachedata_sb@chip/cpu; 5557aec1d6eScindievent error.cpu.amd.l2cachedata_mb@chip/cpu; 5567aec1d6eScindi 5577aec1d6eScindiprop fault.cpu.amd.l2cachedata@chip/cpu (1)-> 5587aec1d6eScindi error.cpu.amd.l2cachedata_sb@chip/cpu, 5597aec1d6eScindi error.cpu.amd.l2cachedata_mb@chip/cpu; 5607aec1d6eScindi 5617aec1d6eScindi/* #L2D_SINGLE# 5627aec1d6eScindi * A single bit data array fault in an l2 cache can cause: 5637aec1d6eScindi * 5647aec1d6eScindi * - inf_l2_ecc1 : reported by ic on this cpu 5657aec1d6eScindi * - inf_l2_ecc1 : reported by dc on this cpu 5667aec1d6eScindi * - l2d_ecc1 : reported by bu on copyback or on snoop from another cpu 5677aec1d6eScindi * 5687aec1d6eScindi * Single-bit errors are diagnosed to cache upsets. SERD engines are used 5697aec1d6eScindi * to count upsets resulting from CEs. 5707aec1d6eScindi */ 5717aec1d6eScindi 5727aec1d6eScindievent ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu{within(5s)}; 5737aec1d6eScindievent ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu{within(5s)}; 5747aec1d6eScindievent ereport.cpu.amd.bu.l2d_ecc1@chip/cpu{within(5s)}; 575b5016cbbSstephhevent ereport.cpu.amd.l2d_sb_trip@chip/cpu{within(5s)}; 5767aec1d6eScindi 5777aec1d6eScindiengine serd.cpu.amd.l2d_sb@chip/cpu, 5787aec1d6eScindi N=L2CACHEDATA_SB_COUNT, T=L2CACHEDATA_SB_TIME, method=persistent, 5797aec1d6eScindi trip=ereport.cpu.amd.l2d_sb_trip@chip/cpu; 5807aec1d6eScindi 5817aec1d6eScindievent upset.cpu.amd.l2d_sb@chip/cpu, 5827aec1d6eScindi engine=serd.cpu.amd.l2d_sb@chip/cpu; 5837aec1d6eScindi 5847aec1d6eScindiprop upset.cpu.amd.l2d_sb@chip/cpu (1)-> 5857aec1d6eScindi ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu, 5867aec1d6eScindi ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu, 5877aec1d6eScindi ereport.cpu.amd.bu.l2d_ecc1@chip/cpu; 5887aec1d6eScindi 5897aec1d6eScindiprop error.cpu.amd.l2cachedata_sb@chip/cpu (1)-> 5907aec1d6eScindi ereport.cpu.amd.l2d_sb_trip@chip/cpu; 5917aec1d6eScindi 5927aec1d6eScindiprop fault.cpu.amd.l2cachedata@chip/cpu (0)-> 5937aec1d6eScindi ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu, 5947aec1d6eScindi ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu, 5957aec1d6eScindi ereport.cpu.amd.bu.l2d_ecc1@chip/cpu; 5967aec1d6eScindi 5977aec1d6eScindi/* #L2D_MULTI# 5987aec1d6eScindi * A multi-bit data array fault in an l2 cache can cause: 5997aec1d6eScindi * 6007aec1d6eScindi * - inf_l2_eccm : reported by ic on this cpu 6017aec1d6eScindi * - inf_l2_eccm : reported by dc on this cpu 6027aec1d6eScindi * - l2d_eccm : reported by bu on copyback or on snoop from another cpu 6037aec1d6eScindi */ 6047aec1d6eScindi 605b5016cbbSstephhevent ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu{within(5s)}; 606b5016cbbSstephhevent ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu{within(5s)}; 607b5016cbbSstephhevent ereport.cpu.amd.bu.l2d_eccm@chip/cpu{within(5s)}; 6087aec1d6eScindi 6097aec1d6eScindiprop error.cpu.amd.l2cachedata_mb@chip/cpu (1)-> 6107aec1d6eScindi ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu, 6117aec1d6eScindi ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu, 6127aec1d6eScindi ereport.cpu.amd.bu.l2d_eccm@chip/cpu; 6137aec1d6eScindi 6147aec1d6eScindiprop fault.cpu.amd.l2cachedata@chip/cpu (0)-> 6157aec1d6eScindi ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu, 6167aec1d6eScindi ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu, 6177aec1d6eScindi ereport.cpu.amd.bu.l2d_eccm@chip/cpu; 6187aec1d6eScindi 6198a40a695Sgavinm/* 6207aec1d6eScindi * l2 cache main tag errors 6217aec1d6eScindi */ 6227aec1d6eScindi 6237aec1d6eScindi#define L2CACHETAG_FIT 1000 6247aec1d6eScindi#define L2CACHETAG_SB_COUNT 3 6257aec1d6eScindi#define L2CACHETAG_SB_TIME 12h 6267aec1d6eScindi 6277aec1d6eScindievent fault.cpu.amd.l2cachetag@chip/cpu, FITrate=L2CACHETAG_FIT, 6287aec1d6eScindi FRU=chip, ASRU=chip/cpu; 6297aec1d6eScindievent error.cpu.amd.l2cachetag_sb@chip/cpu; 6307aec1d6eScindievent error.cpu.amd.l2cachetag_mb@chip/cpu; 6317aec1d6eScindi 6327aec1d6eScindiprop fault.cpu.amd.l2cachetag@chip/cpu (1)-> 6337aec1d6eScindi error.cpu.amd.l2cachetag_sb@chip/cpu, 6347aec1d6eScindi error.cpu.amd.l2cachetag_mb@chip/cpu; 6357aec1d6eScindi 6367aec1d6eScindi/* #L2T_SINGLE# 6377aec1d6eScindi * A single bit tag array fault in an l2 cache can cause: 6387aec1d6eScindi * 6397aec1d6eScindi * - l2t_ecc1 : reported by bu on this cpu when detected during snoop 6407aec1d6eScindi * - l2t_par : reported by bu on this cpu when detected other than during snoop 6417aec1d6eScindi * 6427aec1d6eScindi * Note that the bu.l2t_par ereport could be due to a single bit or multi bit 6437aec1d6eScindi * event. If the l2t_sb_trip has already triggered it will be treated as another 6447aec1d6eScindi * ce, otherwise it will be treated as a ue event. 6457aec1d6eScindi */ 6467aec1d6eScindi 6477aec1d6eScindievent ereport.cpu.amd.bu.l2t_ecc1@chip/cpu{within(5s)}; 648b5016cbbSstephhevent ereport.cpu.amd.bu.l2t_par@chip/cpu{within(5s)}; 649b5016cbbSstephhevent ereport.cpu.amd.l2t_sb_trip@chip/cpu{within(5s)}; 6507aec1d6eScindi 6517aec1d6eScindiengine serd.cpu.amd.l2t_sb@chip/cpu, 6527aec1d6eScindi N=L2CACHETAG_SB_COUNT, T=L2CACHETAG_SB_TIME, method=persistent, 6537aec1d6eScindi trip=ereport.cpu.amd.l2t_sb_trip@chip/cpu; 6547aec1d6eScindi 6557aec1d6eScindievent upset.cpu.amd.l2t_sb@chip/cpu, 6567aec1d6eScindi engine=serd.cpu.amd.l2t_sb@chip/cpu; 6577aec1d6eScindi 6587aec1d6eScindiprop upset.cpu.amd.l2t_sb@chip/cpu (1)-> 6597aec1d6eScindi ereport.cpu.amd.bu.l2t_ecc1@chip/cpu, 6607aec1d6eScindi ereport.cpu.amd.bu.l2t_par@chip/cpu; 6617aec1d6eScindi 6627aec1d6eScindiprop error.cpu.amd.l2cachetag_sb@chip/cpu (1)-> 6637aec1d6eScindi ereport.cpu.amd.l2t_sb_trip@chip/cpu; 6647aec1d6eScindi 6657aec1d6eScindiprop fault.cpu.amd.l2cachetag@chip/cpu (0)-> 6667aec1d6eScindi ereport.cpu.amd.bu.l2t_ecc1@chip/cpu, 6677aec1d6eScindi ereport.cpu.amd.bu.l2t_par@chip/cpu; 6687aec1d6eScindi 6697aec1d6eScindi/* #L2T_MULTI# 6707aec1d6eScindi * A multi-bit tag array fault in an l2 cache can cause: 6717aec1d6eScindi * 6727aec1d6eScindi * - l2t_eccm : reported by bu on this cpu when detected during snoop 6737aec1d6eScindi * - l2t_par : reported by bu on this cpu when detected other than during snoop 6747aec1d6eScindi */ 6757aec1d6eScindi 676b5016cbbSstephhevent ereport.cpu.amd.bu.l2t_eccm@chip/cpu{within(5s)}; 6777aec1d6eScindi 6787aec1d6eScindiprop error.cpu.amd.l2cachetag_mb@chip/cpu (1)-> 6797aec1d6eScindi ereport.cpu.amd.bu.l2t_eccm@chip/cpu, 6807aec1d6eScindi ereport.cpu.amd.bu.l2t_par@chip/cpu; 6817aec1d6eScindi 6827aec1d6eScindiprop fault.cpu.amd.l2cachetag@chip/cpu (0)-> 6837aec1d6eScindi ereport.cpu.amd.bu.l2t_eccm@chip/cpu, 6847aec1d6eScindi ereport.cpu.amd.bu.l2t_par@chip/cpu; 6857aec1d6eScindi 6867aec1d6eScindi/* #ICD_PAR# 6877aec1d6eScindi * A data array parity fault in an I cache can cause: 6887aec1d6eScindi * 6897aec1d6eScindi * - data_par : reported by ic on this cpu 6907aec1d6eScindi */ 6917aec1d6eScindi 6927aec1d6eScindi#define ICACHEDATA_FIT 1000 6937aec1d6eScindi#define ICACHEDATA_SB_COUNT 2 6947aec1d6eScindi#define ICACHEDATA_SB_TIME 168h 6957aec1d6eScindi 6967aec1d6eScindievent ereport.cpu.amd.ic.data_par@chip/cpu{within(5s)}; 697b5016cbbSstephhevent ereport.cpu.amd.ic_dp_trip@chip/cpu{within(5s)}; 6987aec1d6eScindi 6997aec1d6eScindievent fault.cpu.amd.icachedata@chip/cpu, FITrate=ICACHEDATA_FIT, 7007aec1d6eScindi FRU=chip, ASRU=chip/cpu; 7017aec1d6eScindi 7027aec1d6eScindiengine serd.cpu.amd.icachedata@chip/cpu, 7037aec1d6eScindi N=ICACHEDATA_SB_COUNT, T=ICACHEDATA_SB_TIME, method=persistent, 7047aec1d6eScindi trip=ereport.cpu.amd.ic_dp_trip@chip/cpu; 7057aec1d6eScindi 7067aec1d6eScindievent upset.cpu.amd.icachedata@chip/cpu, 7077aec1d6eScindi engine=serd.cpu.amd.icachedata@chip/cpu; 7087aec1d6eScindi 7097aec1d6eScindiprop upset.cpu.amd.icachedata@chip/cpu (1)-> 7107aec1d6eScindi ereport.cpu.amd.ic.data_par@chip/cpu; 7117aec1d6eScindi 7127aec1d6eScindiprop fault.cpu.amd.icachedata@chip/cpu (1)-> 7137aec1d6eScindi ereport.cpu.amd.ic_dp_trip@chip/cpu; 7147aec1d6eScindi 7157aec1d6eScindiprop fault.cpu.amd.icachedata@chip/cpu (0)-> 7167aec1d6eScindi ereport.cpu.amd.ic.data_par@chip/cpu; 7177aec1d6eScindi 7187aec1d6eScindi/* #ICT_PAR# 7197aec1d6eScindi * A tag array parity fault in an I cache can cause: 7207aec1d6eScindi * 7217aec1d6eScindi * - tag_par : reported by ic on this cpu 7227aec1d6eScindi */ 7237aec1d6eScindi 7247aec1d6eScindi#define ICACHETAG_FIT 1000 7257aec1d6eScindi#define ICACHETAG_SB_COUNT 2 7267aec1d6eScindi#define ICACHETAG_SB_TIME 168h 7277aec1d6eScindi 7287aec1d6eScindievent ereport.cpu.amd.ic.tag_par@chip/cpu{within(5s)}; 729b5016cbbSstephhevent ereport.cpu.amd.ic_tp_trip@chip/cpu{within(5s)}; 7307aec1d6eScindi 7317aec1d6eScindievent fault.cpu.amd.icachetag@chip/cpu, FITrate=ICACHETAG_FIT, 7327aec1d6eScindi FRU=chip, ASRU=chip/cpu; 7337aec1d6eScindi 7347aec1d6eScindiengine serd.cpu.amd.icachetag@chip/cpu, 7357aec1d6eScindi N=ICACHETAG_SB_COUNT, T=ICACHETAG_SB_TIME, method=persistent, 7367aec1d6eScindi trip=ereport.cpu.amd.ic_tp_trip@chip/cpu; 7377aec1d6eScindi 7387aec1d6eScindievent upset.cpu.amd.icachetag@chip/cpu, 7397aec1d6eScindi engine=serd.cpu.amd.icachetag@chip/cpu; 7407aec1d6eScindi 7417aec1d6eScindiprop upset.cpu.amd.icachetag@chip/cpu (1)-> 7427aec1d6eScindi ereport.cpu.amd.ic.tag_par@chip/cpu; 7437aec1d6eScindi 7447aec1d6eScindiprop fault.cpu.amd.icachetag@chip/cpu (1)-> 7457aec1d6eScindi ereport.cpu.amd.ic_tp_trip@chip/cpu; 7467aec1d6eScindi 7477aec1d6eScindiprop fault.cpu.amd.icachetag@chip/cpu (0)-> 7487aec1d6eScindi ereport.cpu.amd.ic.tag_par@chip/cpu; 7497aec1d6eScindi 7507aec1d6eScindi/* #ICT_SNOOP# 7517aec1d6eScindi * A snoop tag array parity fault in an I cache can cause: 7527aec1d6eScindi * 7537aec1d6eScindi * - stag_par : reported by ic on this cpu 7547aec1d6eScindi */ 7557aec1d6eScindi 7567aec1d6eScindi#define ICACHESTAG_FIT 1000 7577aec1d6eScindi 7587aec1d6eScindievent ereport.cpu.amd.ic.stag_par@chip/cpu{within(5s)}; 7597aec1d6eScindi 7607aec1d6eScindievent fault.cpu.amd.icachestag@chip/cpu, FITrate=ICACHESTAG_FIT, 7617aec1d6eScindi FRU=chip, ASRU=chip/cpu; 7627aec1d6eScindi 7637aec1d6eScindiprop fault.cpu.amd.icachestag@chip/cpu (1)-> 7647aec1d6eScindi ereport.cpu.amd.ic.stag_par@chip/cpu; 7657aec1d6eScindi 7667aec1d6eScindi/* #ICTLB_1# 7677aec1d6eScindi * An l1tlb parity fault in an I cache can cause: 7687aec1d6eScindi * 7697aec1d6eScindi * - l1tlb_par : reported by ic on this cpu 7707aec1d6eScindi */ 7717aec1d6eScindi 7727aec1d6eScindi#define ICACHEL1TLB_FIT 1000 7737aec1d6eScindi#define ICACHEL1TLB_SB_COUNT 2 7747aec1d6eScindi#define ICACHEL1TLB_SB_TIME 168h 7757aec1d6eScindi 7767aec1d6eScindievent ereport.cpu.amd.ic.l1tlb_par@chip/cpu{within(5s)}; 777b5016cbbSstephhevent ereport.cpu.amd.ic_l1tlb_trip@chip/cpu{within(5s)}; 7787aec1d6eScindi 7797aec1d6eScindievent fault.cpu.amd.l1itlb@chip/cpu, FITrate=ICACHEL1TLB_FIT, 7807aec1d6eScindi FRU=chip, ASRU=chip/cpu; 7817aec1d6eScindi 7827aec1d6eScindiengine serd.cpu.amd.l1itlb@chip/cpu, 7837aec1d6eScindi N=ICACHEL1TLB_SB_COUNT, T=ICACHEL1TLB_SB_TIME, method=persistent, 7847aec1d6eScindi trip=ereport.cpu.amd.ic_l1tlb_trip@chip/cpu; 7857aec1d6eScindi 7867aec1d6eScindievent upset.cpu.amd.l1itlb@chip/cpu, 7877aec1d6eScindi engine=serd.cpu.amd.l1itlb@chip/cpu; 7887aec1d6eScindi 7897aec1d6eScindiprop upset.cpu.amd.l1itlb@chip/cpu (1)-> 7907aec1d6eScindi ereport.cpu.amd.ic.l1tlb_par@chip/cpu; 7917aec1d6eScindi 7927aec1d6eScindiprop fault.cpu.amd.l1itlb@chip/cpu (1)-> 7937aec1d6eScindi ereport.cpu.amd.ic_l1tlb_trip@chip/cpu; 7947aec1d6eScindi 7957aec1d6eScindiprop fault.cpu.amd.l1itlb@chip/cpu (0)-> 7967aec1d6eScindi ereport.cpu.amd.ic.l1tlb_par@chip/cpu; 7977aec1d6eScindi 7987aec1d6eScindi/* #ICTLB_2# 7997aec1d6eScindi * An l2tlb parity fault in an I cache can cause: 8007aec1d6eScindi * 8017aec1d6eScindi * - l2tlb_par : reported by ic on this cpu 8027aec1d6eScindi */ 8037aec1d6eScindi 8047aec1d6eScindi#define ICACHEL2TLB_FIT 1000 8057aec1d6eScindi#define ICACHEL2TLB_SB_COUNT 2 8067aec1d6eScindi#define ICACHEL2TLB_SB_TIME 168h 8077aec1d6eScindi 8087aec1d6eScindievent ereport.cpu.amd.ic.l2tlb_par@chip/cpu{within(5s)}; 809b5016cbbSstephhevent ereport.cpu.amd.ic_l2tlb_trip@chip/cpu{within(5s)}; 8107aec1d6eScindi 8117aec1d6eScindievent fault.cpu.amd.l2itlb@chip/cpu, FITrate=ICACHEL2TLB_FIT, 8127aec1d6eScindi FRU=chip, ASRU=chip/cpu; 8137aec1d6eScindi 8147aec1d6eScindiengine serd.cpu.amd.l2itlb@chip/cpu, 8157aec1d6eScindi N=ICACHEL2TLB_SB_COUNT, T=ICACHEL2TLB_SB_TIME, method=persistent, 8167aec1d6eScindi trip=ereport.cpu.amd.ic_l2tlb_trip@chip/cpu; 8177aec1d6eScindi 8187aec1d6eScindievent upset.cpu.amd.l2itlb@chip/cpu, 8197aec1d6eScindi engine=serd.cpu.amd.l2itlb@chip/cpu; 8207aec1d6eScindi 8217aec1d6eScindiprop upset.cpu.amd.l2itlb@chip/cpu (1)-> 8227aec1d6eScindi ereport.cpu.amd.ic.l2tlb_par@chip/cpu; 8237aec1d6eScindi 8247aec1d6eScindiprop fault.cpu.amd.l2itlb@chip/cpu (1)-> 8257aec1d6eScindi ereport.cpu.amd.ic_l2tlb_trip@chip/cpu; 8267aec1d6eScindi 8277aec1d6eScindiprop fault.cpu.amd.l2itlb@chip/cpu (0)-> 8287aec1d6eScindi ereport.cpu.amd.ic.l2tlb_par@chip/cpu; 8297aec1d6eScindi 8308a40a695Sgavinm/* 8317aec1d6eScindi * dcache data errors 8327aec1d6eScindi */ 8337aec1d6eScindi 8347aec1d6eScindi#define DCACHEDATA_FIT 1000 8357aec1d6eScindi#define DCACHEDATA_SB_COUNT 2 8367aec1d6eScindi#define DCACHEDATA_SB_TIME 168h 8377aec1d6eScindi 8387aec1d6eScindievent fault.cpu.amd.dcachedata@chip/cpu, FITrate=DCACHEDATA_FIT, 8397aec1d6eScindi FRU=chip, ASRU=chip/cpu; 8407aec1d6eScindievent error.cpu.amd.dcachedata_sb@chip/cpu; 8417aec1d6eScindievent error.cpu.amd.dcachedata_mb@chip/cpu; 8427aec1d6eScindi 8437aec1d6eScindiprop fault.cpu.amd.dcachedata@chip/cpu (1)-> 8447aec1d6eScindi error.cpu.amd.dcachedata_sb@chip/cpu, 8457aec1d6eScindi error.cpu.amd.dcachedata_mb@chip/cpu; 8467aec1d6eScindi 8477aec1d6eScindi/* #DCD_SINGLE# 8487aec1d6eScindi * A single bit data array fault in an D cache can cause: 8497aec1d6eScindi * 8507aec1d6eScindi * - data_ecc1 : reported by dc on this cpu by scrubber 8517aec1d6eScindi * - data_ecc1_uc : reported by dc on this cpu other than by scrubber 8527aec1d6eScindi * 8537aec1d6eScindi * Make data_ecc1_uc fault immediately as it may have caused a panic 8547aec1d6eScindi */ 8557aec1d6eScindi 8567aec1d6eScindievent ereport.cpu.amd.dc.data_ecc1@chip/cpu{within(5s)}; 8577aec1d6eScindievent ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu{within(5s)}; 858b5016cbbSstephhevent ereport.cpu.amd.dc_sb_trip@chip/cpu{within(5s)}; 8597aec1d6eScindi 8607aec1d6eScindiengine serd.cpu.amd.dc_sb@chip/cpu, 8617aec1d6eScindi N=DCACHEDATA_SB_COUNT, T=DCACHEDATA_SB_TIME, method=persistent, 8627aec1d6eScindi trip=ereport.cpu.amd.dc_sb_trip@chip/cpu; 8637aec1d6eScindi 8647aec1d6eScindiengine serd.cpu.amd.dc_sb_uc@chip/cpu, 8657aec1d6eScindi N=0, T=1hr, method=persistent, 8667aec1d6eScindi trip=ereport.cpu.amd.dc_sb_trip@chip/cpu; 8677aec1d6eScindi 8687aec1d6eScindievent upset.cpu.amd.dc_sb@chip/cpu, 8697aec1d6eScindi engine=serd.cpu.amd.dc_sb@chip/cpu; 8707aec1d6eScindi 8717aec1d6eScindievent upset.cpu.amd.dc_sb_uc@chip/cpu, 8727aec1d6eScindi engine=serd.cpu.amd.dc_sb_uc@chip/cpu; 8737aec1d6eScindi 8747aec1d6eScindiprop upset.cpu.amd.dc_sb@chip/cpu (1)-> 8757aec1d6eScindi ereport.cpu.amd.dc.data_ecc1@chip/cpu; 8767aec1d6eScindi 8777aec1d6eScindiprop upset.cpu.amd.dc_sb_uc@chip/cpu (1)-> 8787aec1d6eScindi ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu; 8797aec1d6eScindi 8807aec1d6eScindiprop error.cpu.amd.dcachedata_sb@chip/cpu (1)-> 8817aec1d6eScindi ereport.cpu.amd.dc_sb_trip@chip/cpu; 8827aec1d6eScindi 8837aec1d6eScindiprop fault.cpu.amd.dcachedata@chip/cpu (0)-> 8847aec1d6eScindi ereport.cpu.amd.dc.data_ecc1@chip/cpu, 8857aec1d6eScindi ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu; 8867aec1d6eScindi 8877aec1d6eScindi/* #DCD_MULTI# 8887aec1d6eScindi * A multi-bit data array fault in an D cache can cause: 8897aec1d6eScindi * 8907aec1d6eScindi * - data_eccm : reported by dc on this cpu 8917aec1d6eScindi */ 8927aec1d6eScindi 893b5016cbbSstephhevent ereport.cpu.amd.dc.data_eccm@chip/cpu{within(5s)}; 8947aec1d6eScindi 8957aec1d6eScindiprop error.cpu.amd.dcachedata_mb@chip/cpu (1)-> 8967aec1d6eScindi ereport.cpu.amd.dc.data_eccm@chip/cpu; 8977aec1d6eScindi 8987aec1d6eScindiprop fault.cpu.amd.dcachedata@chip/cpu (0)-> 8997aec1d6eScindi ereport.cpu.amd.dc.data_eccm@chip/cpu; 9007aec1d6eScindi 9017aec1d6eScindi/* #DCT_PAR# 9027aec1d6eScindi * A tag array parity fault in an D cache can cause: 9037aec1d6eScindi * 9047aec1d6eScindi * - tag_par : reported by dc on this cpu 9057aec1d6eScindi */ 9067aec1d6eScindi 9077aec1d6eScindi#define DCACHETAG_FIT 1000 9087aec1d6eScindi 9097aec1d6eScindievent ereport.cpu.amd.dc.tag_par@chip/cpu{within(5s)}; 9107aec1d6eScindi 9117aec1d6eScindievent fault.cpu.amd.dcachetag@chip/cpu, FITrate=DCACHETAG_FIT, 9127aec1d6eScindi FRU=chip, ASRU=chip/cpu; 9137aec1d6eScindi 9147aec1d6eScindiprop fault.cpu.amd.dcachetag@chip/cpu (1)-> 9157aec1d6eScindi ereport.cpu.amd.dc.tag_par@chip/cpu; 9167aec1d6eScindi 9177aec1d6eScindi/* #DCT_SNOOP# 9187aec1d6eScindi * A snoop tag array parity fault in an D cache can cause: 9197aec1d6eScindi * 9207aec1d6eScindi * - stag_par : reported by dc on this cpu 9217aec1d6eScindi */ 9227aec1d6eScindi 9237aec1d6eScindi#define DCACHESTAG_FIT 1000 9247aec1d6eScindi 9257aec1d6eScindievent ereport.cpu.amd.dc.stag_par@chip/cpu{within(5s)}; 9267aec1d6eScindi 9277aec1d6eScindievent fault.cpu.amd.dcachestag@chip/cpu, FITrate=DCACHESTAG_FIT, 9287aec1d6eScindi FRU=chip, ASRU=chip/cpu; 9297aec1d6eScindi 9307aec1d6eScindiprop fault.cpu.amd.dcachestag@chip/cpu (1)-> 9317aec1d6eScindi ereport.cpu.amd.dc.stag_par@chip/cpu; 9327aec1d6eScindi 9337aec1d6eScindi/* #DCTLB_1# 9347aec1d6eScindi * An l1tlb parity fault in an D cache can cause: 9357aec1d6eScindi * 9367aec1d6eScindi * - l1tlb_par : reported by dc on this cpu 9377aec1d6eScindi */ 9387aec1d6eScindi 9397aec1d6eScindi#define L1DTLB_FIT 1000 9407aec1d6eScindi 9417aec1d6eScindievent ereport.cpu.amd.dc.l1tlb_par@chip/cpu{within(5s)}; 9427aec1d6eScindi 9437aec1d6eScindievent fault.cpu.amd.l1dtlb@chip/cpu, FITrate=L1DTLB_FIT, 9447aec1d6eScindi FRU=chip, ASRU=chip/cpu; 9457aec1d6eScindi 9467aec1d6eScindiprop fault.cpu.amd.l1dtlb@chip/cpu (1)-> 9477aec1d6eScindi ereport.cpu.amd.dc.l1tlb_par@chip/cpu; 9487aec1d6eScindi 9497aec1d6eScindi/* #DCTLB_2# 9507aec1d6eScindi * An l2tlb parity fault in an D cache can cause: 9517aec1d6eScindi * 9527aec1d6eScindi * - l2tlb_par : reported by dc on this cpu 9537aec1d6eScindi */ 9547aec1d6eScindi 9557aec1d6eScindi#define L2DTLB_FIT 1000 9567aec1d6eScindi 9577aec1d6eScindievent ereport.cpu.amd.dc.l2tlb_par@chip/cpu{within(5s)}; 9587aec1d6eScindi 9597aec1d6eScindievent fault.cpu.amd.l2dtlb@chip/cpu, FITrate=L2DTLB_FIT, 9607aec1d6eScindi FRU=chip, ASRU=chip/cpu; 9617aec1d6eScindi 9627aec1d6eScindiprop fault.cpu.amd.l2dtlb@chip/cpu (1)-> 9637aec1d6eScindi ereport.cpu.amd.dc.l2tlb_par@chip/cpu; 9647aec1d6eScindi 9658a40a695Sgavinm/* #MISC# 9667aec1d6eScindi * Ereports that should not normally happen and which we will discard 9677aec1d6eScindi * without diagnosis if they do. These fall into a few categories: 9687aec1d6eScindi * 9697aec1d6eScindi * - the corresponding detector is not enabled, typically because 9707aec1d6eScindi * detection/handling of the event is taking place elsewhere 9717aec1d6eScindi * (nb.ma, nb.ta, ls.rde, ic.rdde, bu.s_rde, nb.gart_walk) 9727aec1d6eScindi * - the event is associated with a sync flood so even if the detector is 9737aec1d6eScindi * enabled we will never handle the event and generate an ereport *and* 9747aec1d6eScindi * even if the ereport did arrive we could perform no useful diagnosis 9757aec1d6eScindi * e.g., the NB can be configured for sync flood on nb.mem_eccm 9767aec1d6eScindi * but we don't choose to discard that ereport here since we could have 9777aec1d6eScindi * made a useful diagnosis from it had it been delivered 9787aec1d6eScindi * (nb.ht_sync, nb.ht_crc) 9797aec1d6eScindi * - events that will be accompanied by an immediate panic and 9807aec1d6eScindi * delivery of the ereport during subsequent reboot but from 9817aec1d6eScindi * which no useful diagnosis can be made. (nb.rmw, nb.wdog) 9827aec1d6eScindi * 9837aec1d6eScindi * Ereports for all of these can be generated by error simulation and 9847aec1d6eScindi * injection. We will perform a null diagnosos of all these ereports in order 9857aec1d6eScindi * to avoid "no subscription" complaints during test harness runs. 9867aec1d6eScindi */ 9877aec1d6eScindi 988b5016cbbSstephhevent ereport.cpu.amd.nb.ma@cpu{within(5s)}; 989b5016cbbSstephhevent ereport.cpu.amd.nb.ta@cpu{within(5s)}; 990b5016cbbSstephhevent ereport.cpu.amd.ls.s_rde@cpu{within(5s)}; 991b5016cbbSstephhevent ereport.cpu.amd.ic.rdde@cpu{within(5s)}; 992b5016cbbSstephhevent ereport.cpu.amd.bu.s_rde@cpu{within(5s)}; 993b5016cbbSstephhevent ereport.cpu.amd.nb.gart_walk@cpu{within(5s)}; 994b5016cbbSstephhevent ereport.cpu.amd.nb.ht_sync@cpu{within(5s)}; 995b5016cbbSstephhevent ereport.cpu.amd.nb.ht_crc@cpu{within(5s)}; 996b5016cbbSstephhevent ereport.cpu.amd.nb.rmw@cpu{within(5s)}; 997b5016cbbSstephhevent ereport.cpu.amd.nb.wdog@cpu{within(5s)}; 998b5016cbbSstephhevent ereport.cpu.amd.unknown@cpu{within(5s)}; 9997aec1d6eScindi 10007aec1d6eScindievent upset.null_diag@cpu; 10017aec1d6eScindi 10027aec1d6eScindiprop upset.null_diag@cpu (1)-> 10037aec1d6eScindi ereport.cpu.amd.nb.ma@cpu, 10047aec1d6eScindi ereport.cpu.amd.nb.ta@cpu, 10057aec1d6eScindi ereport.cpu.amd.ls.s_rde@cpu, 10067aec1d6eScindi ereport.cpu.amd.ic.rdde@cpu, 10077aec1d6eScindi ereport.cpu.amd.bu.s_rde@cpu, 10087aec1d6eScindi ereport.cpu.amd.nb.gart_walk@cpu, 10097aec1d6eScindi ereport.cpu.amd.nb.ht_sync@cpu, 10107aec1d6eScindi ereport.cpu.amd.nb.ht_crc@cpu, 10117aec1d6eScindi ereport.cpu.amd.nb.rmw@cpu, 10127aec1d6eScindi ereport.cpu.amd.nb.wdog@cpu, 10137aec1d6eScindi ereport.cpu.amd.unknown@cpu; 1014