1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26#pragma dictionary "INTEL" 27 28/* 29 * Eversholt rules for the intel CPU/Memory 30 */ 31 32/* 33 * Ereports for Simple error codes. 34 */ 35 36#define SMPL_EVENT(leafclass, t) \ 37 event ereport.cpu.intel.leafclass@chip/core/strand { within(t) } 38 39SMPL_EVENT(unknown, 1s); 40SMPL_EVENT(unclassified, 1s); 41SMPL_EVENT(microcode_rom_parity, 1s); 42SMPL_EVENT(external, 1s); 43SMPL_EVENT(frc, 1s); 44SMPL_EVENT(internal_timer, 1s); 45SMPL_EVENT(internal_parity, 1s); 46SMPL_EVENT(internal_unclassified, 1s); 47 48/* 49 * Propogations for all but "external" and "unknown" simple errors. 50 * If the error is uncorrected we produce a fault immediately, otherwise 51 * we diagnose it to an upset and decalre a fault when the SERD engine 52 * trips. prop statement for ereport.cpu.intel.internal_unclassified is 53 * moved to the Nehalem EX section to deal with poison case. 54 */ 55 56engine serd.cpu.intel.simple@chip/core/strand, N=3, T=72h; 57event fault.cpu.intel.internal@chip/core/strand, 58 engine=serd.cpu.intel.simple@chip/core/strand; 59 60prop fault.cpu.intel.internal@chip/core/strand 61 { payloadprop("error_uncorrected") == 1 ? setserdincrement(4) : 1} (0)-> 62 ereport.cpu.intel.microcode_rom_parity@chip/core/strand, 63 ereport.cpu.intel.internal_timer@chip/core/strand, 64 ereport.cpu.intel.internal_parity@chip/core/strand, 65 ereport.cpu.intel.unclassified@chip/core/strand, 66 ereport.cpu.intel.frc@chip/core/strand; 67 68/* 69 * Ereports for Compound error codes. These are in pairs "foo" and "foo_uc" 70 * for the corrected and uncorrected version of each error type. All are 71 * detected at chip/core/strand. 72 */ 73 74#define CMPND_EVENT(leafclass, t) \ 75 event ereport.cpu.intel.leafclass@chip/core/strand { within(t) }; \ 76 event ereport.cpu.intel.leafclass/**/_uc@chip/core/strand { within(t) } 77 78/* 79 * Ereports for Compound error codes - intel errors 80 */ 81CMPND_EVENT(l0cache, 1s); 82CMPND_EVENT(l1cache, 1s); 83CMPND_EVENT(l2cache, 1s); 84CMPND_EVENT(cache, 1s); 85 86/* 87 * Ereports for Compound error codes - TLB errors 88 */ 89CMPND_EVENT(l0dtlb, 1s); 90CMPND_EVENT(l1dtlb, 1s); 91CMPND_EVENT(l2dtlb, 1s); 92CMPND_EVENT(dtlb, 1s); 93 94CMPND_EVENT(l0itlb, 1s); 95CMPND_EVENT(l1itlb, 1s); 96CMPND_EVENT(l2itlb, 1s); 97CMPND_EVENT(itlb, 1s); 98 99CMPND_EVENT(l0tlb, 1s); 100CMPND_EVENT(l1tlb, 1s); 101CMPND_EVENT(l2tlb, 1s); 102CMPND_EVENT(tlb, 1s); 103 104/* 105 * Ereports for Compound error codes - memory hierarchy errors 106 */ 107CMPND_EVENT(l0dcache, 1s); 108CMPND_EVENT(l1dcache, 1s); 109CMPND_EVENT(l2dcache, 1s); 110CMPND_EVENT(dcache, 1s); 111 112CMPND_EVENT(l0icache, 1s); 113CMPND_EVENT(l1icache, 1s); 114CMPND_EVENT(l2icache, 1s); 115CMPND_EVENT(icache, 1s); 116 117/* 118 * Ereports for Compound error codes - bus and interconnect errors 119 */ 120CMPND_EVENT(bus_interconnect, 1s); 121CMPND_EVENT(bus_interconnect_memory, 1s); 122CMPND_EVENT(bus_interconnect_io, 1s); 123 124/* 125 * Compound error propogations. 126 * 127 * We resist the temptation propogate, for example, a single dcache fault 128 * to all ereports mentioning dcache (l0dcache, l1dcache, l2dcache, dcache). 129 * Instead we will diagnose a distinct fault for each possible cache level, 130 * whether or not current chips have dcaches at all levels. 131 * 132 * Corrected errors are SERDed and produce a fault when the engine fires; 133 * the same fault is diagnosed immediately for a corresponding uncorrected 134 * error. 135 */ 136 137#define CMPND_FLT_PROP_1(erptleaf, fltleaf, n, t) \ 138 engine serd.cpu.intel.fltleaf@chip/core/strand, N=n, T=t; \ 139 event fault.cpu.intel.fltleaf@chip/core/strand, \ 140 engine=serd.cpu.intel.fltleaf@chip/core/strand; \ 141 \ 142 prop fault.cpu.intel.fltleaf@chip/core/strand (0)-> \ 143 ereport.cpu.intel.erptleaf@chip/core/strand; \ 144 \ 145 prop fault.cpu.intel.fltleaf@chip/core/strand \ 146 { setserdincrement(n + 1) } (0)-> \ 147 ereport.cpu.intel.erptleaf/**/_uc@chip/core/strand 148 149#define CMPND_FLT_PROP_2(erptleaf, fltleaf, n, t) \ 150 engine serd.cpu.intel.fltleaf@chip/core/strand, N=n, T=t; \ 151 event fault.cpu.intel.fltleaf@chip/core/strand, retire=0, response=0,\ 152 engine=serd.cpu.intel.fltleaf@chip/core/strand; \ 153 \ 154 prop fault.cpu.intel.fltleaf@chip/core/strand (0)-> \ 155 ereport.cpu.intel.erptleaf@chip/core/strand; \ 156 \ 157 prop fault.cpu.intel.fltleaf@chip/core/strand \ 158 { setserdincrement(n + 1) } (0)-> \ 159 ereport.cpu.intel.erptleaf/**/_uc@chip/core/strand 160 161CMPND_FLT_PROP_1(l0cache, l0cache, 3, 72h); 162CMPND_FLT_PROP_1(l1cache, l1cache, 3, 72h); 163CMPND_FLT_PROP_1(l2cache, l2cache, 3, 72h); 164CMPND_FLT_PROP_1(cache, cache, 12, 72h); 165 166CMPND_FLT_PROP_1(l0dtlb, l0dtlb, 3, 72h); 167CMPND_FLT_PROP_1(l1dtlb, l1dtlb, 3, 72h); 168CMPND_FLT_PROP_1(l2dtlb, l2dtlb, 3, 72h); 169CMPND_FLT_PROP_1(dtlb, dtlb, 12, 72h); 170 171CMPND_FLT_PROP_1(l0itlb, l0itlb, 3, 72h); 172CMPND_FLT_PROP_1(l1itlb, l1itlb, 3, 72h); 173CMPND_FLT_PROP_1(l2itlb, l2itlb, 3, 72h); 174CMPND_FLT_PROP_1(itlb, itlb, 12, 72h); 175 176CMPND_FLT_PROP_1(l0tlb, l0tlb, 3, 72h); 177CMPND_FLT_PROP_1(l1tlb, l1tlb, 3, 72h); 178CMPND_FLT_PROP_1(l2tlb, l2tlb, 3, 72h); 179CMPND_FLT_PROP_1(tlb, tlb, 12, 72h); 180 181CMPND_FLT_PROP_1(l0dcache, l0dcache, 3, 72h); 182CMPND_FLT_PROP_1(l1dcache, l1dcache, 3, 72h); 183CMPND_FLT_PROP_1(l2dcache, l2dcache, 3, 72h); 184CMPND_FLT_PROP_1(dcache, dcache, 12, 72h); 185 186CMPND_FLT_PROP_1(l0icache, l0icache, 3, 72h); 187CMPND_FLT_PROP_1(l1icache, l1icache, 3, 72h); 188CMPND_FLT_PROP_1(l2icache, l2icache, 3, 72h); 189CMPND_FLT_PROP_1(icache, icache, 12, 72h); 190 191CMPND_FLT_PROP_2(bus_interconnect, bus_interconnect, 10, 72h); 192CMPND_FLT_PROP_2(bus_interconnect_memory, bus_interconnect_memory, 10, 72h); 193CMPND_FLT_PROP_2(bus_interconnect_io, bus_interconnect_io, 10, 72h); 194 195event upset.discard@chip/core/strand; 196 197prop upset.discard@chip/core/strand (0)-> 198 ereport.cpu.intel.external@chip/core/strand, 199 ereport.cpu.intel.unknown@chip/core/strand; 200 201/* errors detected in northbridge */ 202 203 204/* 205 * SET_ADDR and SET_OFFSET are used to set a payload value in the fault that 206 * we diagnose for page faults, to record the physical address of the faulting 207 * page. 208 */ 209#define SET_ADDR (!payloadprop_defined("physaddr") || \ 210 setpayloadprop("asru-physaddr", payloadprop("physaddr"))) 211 212#define SET_OFFSET (!payloadprop_defined("offset") || \ 213 setpayloadprop("asru-offset", payloadprop("offset"))) 214 215#define EREPORT_BUS_ERROR \ 216 ereport.cpu.intel.bus_interconnect_memory_uc@chip/core/strand, \ 217 ereport.cpu.intel.bus_interconnect_uc@chip/core/strand, \ 218 ereport.cpu.intel.bus_interconnect_memory@chip/core/strand, \ 219 ereport.cpu.intel.bus_interconnect@chip/core/strand, \ 220 ereport.cpu.intel.external@chip/core/strand 221 222engine stat.ce_pgflt@memory-controller/dram-channel/dimm; 223 224event ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller{within(12s)}; 225event ereport.cpu.intel.nb.ddr2_mem_ue@ 226 motherboard/memory-controller{within(12s)}; 227event ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller{within(12s)}; 228event fault.memory.intel.page_ue@ 229 motherboard/memory-controller/dram-channel/dimm/rank, 230 message=0, response=0; 231event fault.memory.intel.dimm_ue@ 232 motherboard/memory-controller/dram-channel/dimm/rank; 233 234prop fault.memory.intel.page_ue@ 235 motherboard/memory-controller/dram-channel/dimm/rank[rank_num] 236 { payloadprop_defined("rank") && rank_num == payloadprop("rank") && 237 (payloadprop_defined("physaddr") || payloadprop_defined("offset")) && 238 SET_ADDR && SET_OFFSET } (1)-> 239 ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller, 240 ereport.cpu.intel.nb.ddr2_mem_ue@motherboard/memory-controller, 241 ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller; 242 243prop fault.memory.intel.dimm_ue@ 244 motherboard/memory-controller/dram-channel/dimm/rank[rank_num] 245 { payloadprop_defined("rank") && rank_num == payloadprop("rank") } (1)-> 246 ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller, 247 ereport.cpu.intel.nb.ddr2_mem_ue@motherboard/memory-controller, 248 ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller; 249 250event upset.memory.intel.discard@motherboard/memory-controller{within(1s)}; 251 252prop upset.memory.intel.discard@motherboard/memory-controller (0)-> 253 ereport.cpu.intel.nb.mem_ue@motherboard/memory-controller, 254 ereport.cpu.intel.nb.ddr2_mem_ue@motherboard/memory-controller, 255 ereport.cpu.intel.nb.fbd.ma@motherboard/memory-controller; 256 257prop upset.memory.intel.discard@motherboard/memory-controller (0)-> 258 EREPORT_BUS_ERROR; 259 260#define PAGE_CE_COUNT 2 261#define PAGE_CE_TIME 72h 262#define DIMM_CE_COUNT 10 263#define DIMM_CE_TIME 1week 264 265#define MBDIMM motherboard/memory-controller/dram-channel/dimm 266event ereport.cpu.intel.nb.mem_ce@MBDIMM/rank{within(12s)}; 267event ereport.cpu.intel.nb.ddr2_mem_ce@MBDIMM/rank{within(12s)}; 268event ereport.cpu.intel.nb.ddr2_mem_ce@ 269 motherboard/memory-controller{within(12s)}; 270 271engine serd.memory.intel.page_ce@MBDIMM/rank, N=PAGE_CE_COUNT, T=PAGE_CE_TIME; 272event fault.memory.intel.page_ce@MBDIMM/rank, message=0, response=0, 273 count=stat.ce_pgflt@MBDIMM, engine=serd.memory.intel.page_ce@MBDIMM/rank; 274prop fault.memory.intel.page_ce@MBDIMM/rank 275 { (payloadprop_defined("physaddr") || payloadprop_defined("offset")) && 276 SET_ADDR && SET_OFFSET } (0)-> 277 ereport.cpu.intel.nb.mem_ce@MBDIMM/rank, 278 ereport.cpu.intel.nb.ddr2_mem_ce@MBDIMM/rank; 279 280engine serd.memory.intel.dimm_ce@MBDIMM/rank, N=DIMM_CE_COUNT, T=DIMM_CE_TIME; 281event fault.memory.intel.dimm_ce@MBDIMM/rank, 282 engine=serd.memory.intel.dimm_ce@MBDIMM/rank; 283prop fault.memory.intel.dimm_ce@MBDIMM/rank 284 { !confprop_defined(MBDIMM, "dimm-size") && 285 count(stat.ce_pgflt@MBDIMM) > 512 } (1)-> 286 ereport.cpu.intel.nb.mem_ce@MBDIMM/rank, 287 ereport.cpu.intel.nb.ddr2_mem_ce@MBDIMM/rank; 288#define DIMM_CE(dimm_size, n, t, fault_rate) \ 289 prop fault.memory.intel.dimm_ce@MBDIMM/rank { \ 290 confprop(MBDIMM, "dimm-size") == dimm_size && \ 291 count(stat.ce_pgflt@MBDIMM) > fault_rate && \ 292 setserdn(n) & setserdt(t) } (1)-> \ 293 ereport.cpu.intel.nb.mem_ce@MBDIMM/rank, \ 294 ereport.cpu.intel.nb.ddr2_mem_ce@MBDIMM/rank; 295 296DIMM_CE("8G", 8, 1week, 2000) 297DIMM_CE("4G", 4, 1week, 1500) 298DIMM_CE("2G", 4, 2week, 1000) 299DIMM_CE("1G", 4, 4week, 500) 300DIMM_CE("512M", 4, 8week, 250) 301DIMM_CE("256M", 4, 16week, 125) 302 303prop upset.memory.intel.discard@motherboard/memory-controller (0)-> 304 ereport.cpu.intel.nb.ddr2_mem_ce@motherboard/memory-controller; 305 306event ereport.cpu.intel.nb.fbd.alert@rank{within(12s)}; 307event fault.memory.intel.fbd.alert@rank, retire=0; 308 309prop fault.memory.intel.fbd.alert@rank (1)-> 310 ereport.cpu.intel.nb.fbd.alert@rank; 311 312prop fault.memory.intel.fbd.alert@rank (0)-> 313 EREPORT_BUS_ERROR; 314 315event ereport.cpu.intel.nb.fbd.crc@rank{within(12s)}; 316event fault.memory.intel.fbd.crc@rank, retire=0; 317 318prop fault.memory.intel.fbd.crc@rank (1)-> 319 ereport.cpu.intel.nb.fbd.crc@rank; 320 321prop fault.memory.intel.fbd.crc@rank (0)-> EREPORT_BUS_ERROR; 322 323event ereport.cpu.intel.nb.fbd.reset_timeout@memory-controller {within(12s)}; 324event fault.memory.intel.fbd.reset_timeout@memory-controller, retire=0; 325 326prop fault.memory.intel.fbd.reset_timeout@memory-controller (1)-> 327 ereport.cpu.intel.nb.fbd.reset_timeout@memory-controller; 328 329prop fault.memory.intel.fbd.reset_timeout@memory-controller (0)-> 330 EREPORT_BUS_ERROR; 331 332event ereport.cpu.intel.nb.fbd.ch@dram-channel {within(12s)}; 333engine serd.cpu.intel.nb.fbd.ch@dram-channel, N=2, T=1month; 334event fault.memory.intel.fbd.ch@dram-channel, retire=0, 335 engine=serd.cpu.intel.nb.fbd.ch@dram-channel; 336 337prop fault.memory.intel.fbd.ch@dram-channel (1)-> 338 ereport.cpu.intel.nb.fbd.ch@dram-channel; 339 340prop fault.memory.intel.fbd.ch@dram-channel (0)-> 341 EREPORT_BUS_ERROR; 342 343event ereport.cpu.intel.nb.fbd.otf@dram-channel {within(12s)}; 344engine serd.cpu.intel.nb.fbd_otf@dram-channel, N=2, T=1week; 345event fault.memory.intel.fbd.otf@dram-channel, retire=0, response=0, 346 engine=serd.cpu.intel.nb.fbd_otf@dram-channel; 347 348prop fault.memory.intel.fbd.otf@dram-channel (1)-> 349 ereport.cpu.intel.nb.fbd.otf@dram-channel; 350 351event ereport.cpu.intel.nb.otf@motherboard {within(12s)}; 352event fault.cpu.intel.nb.otf@motherboard, retire=0, response=0; 353 354prop fault.cpu.intel.nb.otf@motherboard (1)-> 355 ereport.cpu.intel.nb.otf@motherboard; 356 357event ereport.cpu.intel.nb.unknown@motherboard {within(12s)}; 358event ereport.cpu.intel.nb.unknown@memory-controller {within(12s)}; 359event ereport.cpu.intel.nb.unknown@memory-controller/dram-channel {within(12s)}; 360event ereport.cpu.intel.nb.spd@memory-controller/dram-channel {within(12s)}; 361event ereport.cpu.intel.nb.ddr2_spd@ 362 memory-controller/dram-channel {within(12s)}; 363event upset.discard@memory-controller; 364 365prop upset.discard@memory-controller (0)-> 366 ereport.cpu.intel.nb.unknown@motherboard, 367 ereport.cpu.intel.nb.unknown@memory-controller, 368 ereport.cpu.intel.nb.unknown@memory-controller/dram-channel, 369 ereport.cpu.intel.nb.spd@memory-controller/dram-channel, 370 ereport.cpu.intel.nb.ddr2_spd@memory-controller/dram-channel; 371 372event ereport.cpu.intel.nb.mem_ds@memory-controller{within(30s)}; 373event ereport.cpu.intel.nb.ddr2_mem_ds@memory-controller{within(30s)}; 374event fault.memory.intel.fbd.mem_ds@memory-controller/dram-channel/dimm/rank, 375 retire=0; 376 377prop fault.memory.intel.fbd.mem_ds@ 378 memory-controller/dram-channel/dimm/rank[rank_num] 379 { payloadprop_defined("rank") && rank_num == payloadprop("rank") } (1)-> 380 ereport.cpu.intel.nb.mem_ds@memory-controller, 381 ereport.cpu.intel.nb.ddr2_mem_ds@memory-controller; 382 383event ereport.cpu.intel.nb.fsb@chip{within(12s)}; 384event fault.cpu.intel.nb.fsb@chip, retire=0; 385 386prop fault.cpu.intel.nb.fsb@chip (1)-> 387 ereport.cpu.intel.nb.fsb@chip; 388 389prop fault.cpu.intel.nb.fsb@chip (0)-> EREPORT_BUS_ERROR; 390 391event ereport.cpu.intel.nb.ie@motherboard{within(12s)}; 392event fault.cpu.intel.nb.ie@motherboard, retire=0; 393event upset.cpu.intel.nb.ie_ce@motherboard{within(12s)}; 394 395prop upset.cpu.intel.nb.ie_ce@motherboard 396 { payloadprop("intel-error-list") == "B6" } (0)-> 397 ereport.cpu.intel.nb.ie@motherboard; 398 399prop fault.cpu.intel.nb.ie@motherboard 400 { payloadprop("intel-error-list") != "B6" } (1)-> 401 ereport.cpu.intel.nb.ie@motherboard; 402 403prop fault.cpu.intel.nb.ie@motherboard (0)-> EREPORT_BUS_ERROR; 404 405event ereport.cpu.intel.nb.dma@motherboard{within(12s)}; 406event upset.cpu.intel.nb.dma@motherboard; 407 408prop upset.cpu.intel.nb.dma@motherboard (1)-> 409 ereport.cpu.intel.nb.dma@motherboard; 410 411event ereport.cpu.intel.nb.esi@motherboard{within(12s)}; 412event ereport.cpu.intel.nb.pex@hostbridge{within(12s)}; 413event upset.cpu.intel.nb.pex@hostbridge; 414 415prop upset.cpu.intel.nb.pex@hostbridge (1)-> 416 ereport.cpu.intel.nb.esi@motherboard, 417 ereport.cpu.intel.nb.pex@hostbridge; 418 419prop upset.cpu.intel.nb.pex@hostbridge (0)-> EREPORT_BUS_ERROR; 420 421event ereport.cpu.intel.nb.unknown@rank{within(12s)}; 422event upset.discard@rank; 423 424prop upset.discard@rank (1)-> 425 ereport.cpu.intel.nb.unknown@rank; 426 427prop upset.discard@rank (0)-> EREPORT_BUS_ERROR; 428 429/* 430 * CPU integrated memory controller 431 */ 432 433#define CONTAINS_RANK (payloadprop_contains("resource", \ 434 asru(chip/memory-controller/dram-channel/dimm/rank)) || \ 435 payloadprop_contains("resource", \ 436 asru(chip/memory-controller/dram-channel/dimm))) 437 438#define STAT_CPU_MEM_CE_PGFLTS \ 439 stat.ce_pgflt@chip/memory-controller/dram-channel/dimm 440 441#define SET_RES_OFFSET \ 442 (!payloadprop_defined("resource[0].hc-specific.offset") || \ 443 setpayloadprop("asru-offset", \ 444 payloadprop("resource[0].hc-specific.offset"))) 445 446engine STAT_CPU_MEM_CE_PGFLTS; 447 448event ereport.cpu.intel.quickpath.mem_ue@chip/memory-controller 449 {within(12s)}, discard_if_config_unknown=1; 450 451event fault.memory.intel.page_ue@ 452 chip/memory-controller/dram-channel/dimm/rank, 453 message=0, response=0; /* do not message individual pageflts */ 454 455prop fault.memory.intel.page_ue@ 456 chip/memory-controller/dram-channel/dimm/rank 457 { CONTAINS_RANK && (payloadprop_defined("physaddr") || 458 payloadprop_defined("resource[0].hc-specific.offset")) && 459 SET_ADDR && SET_RES_OFFSET } (0)-> 460 ereport.cpu.intel.quickpath.mem_ue@chip/memory-controller; 461 462#define CHIPDIMM chip/memory-controller/dram-channel/dimm 463 464event fault.memory.intel.dimm_ue@CHIPDIMM/rank; 465 466event error.memory.intel.dimm_ue_ep@CHIPDIMM/rank; 467event error.memory.intel.dimm_ue_ex@CHIPDIMM/rank; 468 469prop fault.memory.intel.dimm_ue@CHIPDIMM/rank (1)-> 470 error.memory.intel.dimm_ue_ep@CHIPDIMM/rank, 471 error.memory.intel.dimm_ue_ex@CHIPDIMM/rank; 472 473prop error.memory.intel.dimm_ue_ep@CHIPDIMM/rank 474 { CONTAINS_RANK } (1)-> 475 ereport.cpu.intel.quickpath.mem_ue@chip/memory-controller; 476 477prop fault.memory.intel.dimm_ue@CHIPDIMM/rank (0)-> EREPORT_BUS_ERROR; 478 479event ereport.cpu.intel.quickpath.mem_ce@ 480 chip/memory-controller {within(12s)}, discard_if_config_unknown=1; 481 482engine serd.memory.intel.page_ce@CHIPDIMM/rank, N=PAGE_CE_COUNT, T=PAGE_CE_TIME; 483event fault.memory.intel.page_ce@CHIPDIMM/rank, message=0, response=0, 484 count=STAT_CPU_MEM_CE_PGFLTS, 485 engine=serd.memory.intel.page_ce@CHIPDIMM/rank; 486prop fault.memory.intel.page_ce@CHIPDIMM/rank 487 { CONTAINS_RANK && (payloadprop_defined("physaddr") || 488 payloadprop_defined("resource[0].hc-specific.offset")) && 489 SET_ADDR && SET_RES_OFFSET } (0)-> 490 ereport.cpu.intel.quickpath.mem_ce@chip/memory-controller; 491 492engine serd.memory.intel.dimm_ce@CHIPDIMM, N=PAGE_CE_COUNT, T=PAGE_CE_TIME; 493event fault.memory.intel.dimm_ce@CHIPDIMM, 494 engine=serd.memory.intel.dimm_ce@CHIPDIMM; 495prop fault.memory.intel.dimm_ce@CHIPDIMM 496 { !confprop_defined(CHIPDIMM, "dimm-size") && 497 count(STAT_CPU_MEM_CE_PGFLTS) > 512 } (0)-> 498 ereport.cpu.intel.quickpath.mem_ce@chip/memory-controller; 499 500#define CPU_MEM_DIMM_CE(dimm_size, n, t, fault_rate) \ 501 prop fault.memory.intel.dimm_ce@CHIPDIMM { \ 502 confprop(CHIPDIMM, "dimm-size") == dimm_size && \ 503 count(STAT_CPU_MEM_CE_PGFLTS) > fault_rate && \ 504 setserdn(n) & setserdt(t) } (0)-> \ 505 ereport.cpu.intel.quickpath.mem_ce@ \ 506 chip/memory-controller; 507 508CPU_MEM_DIMM_CE("16G", 16, 1week, 2000) 509CPU_MEM_DIMM_CE("8G", 8, 1week, 2000) 510CPU_MEM_DIMM_CE("4G", 4, 1week, 1500) 511CPU_MEM_DIMM_CE("2G", 4, 2week, 1000) 512CPU_MEM_DIMM_CE("1G", 4, 4week, 500) 513CPU_MEM_DIMM_CE("512M", 4, 8week, 250) 514 515event ereport.cpu.intel.quickpath.mem_unknown@chip/memory-controller 516 {within(12s)}, discard_if_config_unknown=1; 517event ereport.cpu.intel.quickpath.mem_unknown@ 518 chip/memory-controller/dram-channel {within(12s)}, 519 discard_if_config_unknown=1; 520event ereport.cpu.intel.quickpath.mem_unknown@ 521 chip/memory-controller/dram-channel/dimm/rank{within(12s)}; 522event upset.discard@chip/memory-controller; 523event upset.discard@chip/memory-controller/dram-channel/dimm/rank; 524 525prop upset.discard@chip/memory-controller (0)-> 526 ereport.cpu.intel.quickpath.mem_unknown@chip/memory-controller, 527 ereport.cpu.intel.quickpath.mem_unknown@ 528 chip/memory-controller/dram-channel; 529 530prop upset.discard@ 531 chip/memory-controller/dram-channel/dimm/rank (1)-> 532 ereport.cpu.intel.quickpath.mem_unknown@ 533 chip/memory-controller/dram-channel/dimm/rank; 534 535event ereport.cpu.intel.quickpath.mem_parity@chip/memory-controller 536 {within(1s)}, discard_if_config_unknown=1; 537event fault.cpu.intel.quickpath.mem_parity@chip/memory-controller; 538 539prop fault.cpu.intel.quickpath.mem_parity@chip/memory-controller (1)-> 540 ereport.cpu.intel.quickpath.mem_parity@chip/memory-controller; 541 542event ereport.cpu.intel.quickpath.mem_addr_parity@chip/memory-controller 543 {within(1s)}, discard_if_config_unknown=1; 544event fault.cpu.intel.quickpath.mem_addr_parity@ 545 chip/memory-controller; 546event fault.cpu.intel.quickpath.mem_addr_parity@CHIPDIMM; 547event fault.cpu.intel.quickpath.mem_addr_parity@CHIPDIMM/rank; 548 549prop fault.cpu.intel.quickpath.mem_addr_parity@ 550 chip/memory-controller (1)-> 551 ereport.cpu.intel.quickpath.mem_addr_parity@chip/memory-controller; 552 553prop fault.cpu.intel.quickpath.mem_addr_parity@CHIPDIMM 554 { payloadprop_contains("resource", asru(CHIPDIMM)) } (1)-> 555 ereport.cpu.intel.quickpath.mem_addr_parity@chip/memory-controller; 556 557prop fault.cpu.intel.quickpath.mem_addr_parity@CHIPDIMM/rank 558 { payloadprop_contains("resource", asru(CHIPDIMM/rank)) } (1)-> 559 ereport.cpu.intel.quickpath.mem_addr_parity@chip/memory-controller; 560 561event ereport.cpu.intel.quickpath.mem_bad_addr@chip/memory-controller 562 {within(1s)}, discard_if_config_unknown=1; 563event fault.cpu.intel.quickpath.mem_bad_addr@chip/memory-controller; 564 565prop fault.cpu.intel.quickpath.mem_bad_addr@chip/memory-controller (1)-> 566 ereport.cpu.intel.quickpath.mem_bad_addr@chip/memory-controller; 567 568event ereport.cpu.intel.quickpath.mem_spare@chip/memory-controller 569 {within(1s)}, discard_if_config_unknown=1; 570event fault.cpu.intel.quickpath.mem_spare@ 571 chip/memory-controller/dram-channel/dimm; 572 573prop fault.cpu.intel.quickpath.mem_spare@ 574 chip/memory-controller/dram-channel/dimm (1)-> 575 ereport.cpu.intel.quickpath.mem_spare@chip/memory-controller; 576 577event ereport.cpu.intel.quickpath.mem_bad_id@chip/memory-controller 578 {within(1s)}, discard_if_config_unknown=1; 579event fault.cpu.intel.quickpath.mem_bad_id@chip/memory-controller; 580 581prop fault.cpu.intel.quickpath.mem_bad_id@chip/memory-controller (1)-> 582 ereport.cpu.intel.quickpath.mem_bad_id@chip/memory-controller; 583 584event ereport.cpu.intel.quickpath.mem_redundant@chip/memory-controller 585 {within(1s)}, discard_if_config_unknown=1; 586engine serd.cpu.intel.quickpath.mem_redundant@CHIPDIMM, N=2, T=72h; 587event fault.cpu.intel.quickpath.mem_redundant@CHIPDIMM, 588 engine=serd.cpu.intel.quickpath.mem_redundant@CHIPDIMM; 589 590event error.cpu.intel.quickpath.mem_redundant@CHIPDIMM/rank; 591 592prop fault.cpu.intel.quickpath.mem_redundant@CHIPDIMM (1)-> 593 error.cpu.intel.quickpath.mem_redundant@CHIPDIMM/rank<>; 594prop error.cpu.intel.quickpath.mem_redundant@CHIPDIMM/rank 595 { CONTAINS_RANK } (1)-> 596 ereport.cpu.intel.quickpath.mem_redundant@ 597 chip/memory-controller; 598 599#define STATUS_UC (payloadprop("error_uncorrected") == 1) 600event ereport.cpu.intel.quickpath.interconnect@chip 601 {within(1s)}; 602event upset.cpu.intel.quickpath.interconnect@chip; 603/* Diagnose corrected events to upsets */ 604prop upset.cpu.intel.quickpath.interconnect@chip 605 { !STATUS_UC } (1)-> 606 ereport.cpu.intel.quickpath.interconnect@chip; 607 608engine serd.cpu.intel.quickpath.interconnect@chip, 609 N=3, T=72h; 610event fault.cpu.intel.quickpath.interconnect@chip, 611 engine=serd.cpu.intel.quickpath.interconnect@chip; 612 613/* Diagnose uncorrected events to faults */ 614prop fault.cpu.intel.quickpath.interconnect@chip 615 { STATUS_UC } (0)-> 616 ereport.cpu.intel.quickpath.interconnect@chip; 617 618 619/* 620 * Nehalem EX specific rules 621 */ 622/* MBox errors */ 623#define EX_MEM_EVENT(leafclass, t) \ 624 event ereport.cpu.intel.quickpath.leafclass@ \ 625 chip/memory-controller { within(t) }, discard_if_config_unknown=1 626 627EX_MEM_EVENT(mem_lnktrns, 1s); 628EX_MEM_EVENT(mem_lnkpers, 1s); 629EX_MEM_EVENT(mem_sbfbdlinkerr, 1s); 630EX_MEM_EVENT(mem_nbfbdlnkerr, 1s); 631EX_MEM_EVENT(mem_lnkcrcvld, 1s); 632 633engine serd.cpu.intel.quickpath.mem_link_ce@chip/memory-controller, 634 N=500, T=1week; 635event fault.cpu.intel.quickpath.mem_link_ce@chip/memory-controller, 636 engine=serd.cpu.intel.quickpath.mem_link_ce@chip/memory-controller, 637 retire=0, response=0; 638 639prop fault.cpu.intel.quickpath.mem_link_ce@chip/memory-controller -> 640 ereport.cpu.intel.quickpath.mem_lnktrns@chip/memory-controller, 641 ereport.cpu.intel.quickpath.mem_lnkpers@chip/memory-controller, 642 ereport.cpu.intel.quickpath.mem_sbfbdlinkerr@chip/memory-controller, 643 ereport.cpu.intel.quickpath.mem_nbfbdlnkerr@chip/memory-controller, 644 ereport.cpu.intel.quickpath.mem_lnkcrcvld@chip/memory-controller; 645 646EX_MEM_EVENT(mem_lnkuncorr_uc, 1s); 647EX_MEM_EVENT(mem_lnkpers_uc, 1s); 648EX_MEM_EVENT(mem_sbfbdlinkerr_uc, 1s); 649EX_MEM_EVENT(mem_nbfbdlnkerr_uc, 1s); 650EX_MEM_EVENT(mem_lnkcrcvld_uc, 1s); 651 652event fault.cpu.intel.quickpath.mem_link_ue@chip/memory-controller, 653 retire=0; 654 655prop fault.cpu.intel.quickpath.mem_link_ue@chip/memory-controller -> 656 ereport.cpu.intel.quickpath.mem_lnkuncorr_uc@chip/memory-controller, 657 ereport.cpu.intel.quickpath.mem_lnkpers_uc@chip/memory-controller, 658 ereport.cpu.intel.quickpath.mem_sbfbdlinkerr_uc@chip/memory-controller, 659 ereport.cpu.intel.quickpath.mem_nbfbdlnkerr_uc@chip/memory-controller, 660 ereport.cpu.intel.quickpath.mem_lnkcrcvld_uc@chip/memory-controller; 661 662EX_MEM_EVENT(mem_ptrl_fsm_err, 1s); 663EX_MEM_EVENT(mem_errflw_fsm_fail, 1s); 664EX_MEM_EVENT(mem_vberr, 1s); 665 666engine serd.cpu.intel.quickpath.mem_controller_ce@chip/memory-controller, 667 N=500, T=1week; 668event fault.cpu.intel.quickpath.mem_controller_ce@chip/memory-controller, 669 engine=serd.cpu.intel.quickpath.mem_controller_ce@chip/memory-controller, 670 retire=0, response=0; 671 672prop fault.cpu.intel.quickpath.mem_controller_ce@chip/memory-controller -> 673 ereport.cpu.intel.quickpath.mem_ptrl_fsm_err@chip/memory-controller, 674 ereport.cpu.intel.quickpath.mem_errflw_fsm_fail@chip/memory-controller, 675 ereport.cpu.intel.quickpath.mem_vberr@chip/memory-controller; 676 677EX_MEM_EVENT(mem_ptrl_fsm_err_uc, 1s); 678EX_MEM_EVENT(mem_errflw_fsm_fail_uc, 1s); 679EX_MEM_EVENT(mem_mcpar_fsmerr_uc, 1s); 680EX_MEM_EVENT(mem_vberr_uc, 1s); 681EX_MEM_EVENT(mem_fberr_uc, 1s); 682 683event fault.cpu.intel.quickpath.mem_controller_ue@chip/memory-controller, 684 retire=0; 685 686prop fault.cpu.intel.quickpath.mem_controller_ue@chip/memory-controller -> 687 ereport.cpu.intel.quickpath.mem_ptrl_fsm_err_uc@chip/memory-controller, 688 ereport.cpu.intel.quickpath.mem_errflw_fsm_fail_uc@chip/memory-controller, 689 ereport.cpu.intel.quickpath.mem_mcpar_fsmerr_uc@chip/memory-controller, 690 ereport.cpu.intel.quickpath.mem_vberr_uc@chip/memory-controller, 691 ereport.cpu.intel.quickpath.mem_fberr_uc@chip/memory-controller; 692 693EX_MEM_EVENT(mem_scrubbing_uc, 1s); 694event fault.cpu.intel.quickpath.mem_scrubbing@ 695 chip/memory-controller/dram-channel/dimm/rank, 696 response=0; 697 698prop fault.cpu.intel.quickpath.mem_scrubbing@ 699 chip/memory-controller/dram-channel/dimm/rank[rank_num] 700 { payloadprop_defined("rank") && rank_num == payloadprop("rank") && 701 (payloadprop_defined("physaddr") || payloadprop_defined("offset")) && 702 SET_ADDR && SET_OFFSET } (1)-> 703 ereport.cpu.intel.quickpath.mem_scrubbing_uc@chip/memory-controller; 704 705EX_MEM_EVENT(mem_ecc_uc, 12s); 706EX_MEM_EVENT(mem_even_parity_uc, 1s); 707 708EX_MEM_EVENT(mem_ecc, 12s); 709EX_MEM_EVENT(mem_even_parity, 1s); 710 711event error.memory.intel.ex_dimm_ce@ 712 chip/memory-controller/dram-channel/dimm/rank; 713 714prop fault.memory.intel.page_ue@ 715 chip/memory-controller/dram-channel/dimm/rank[rank_num] 716 { payloadprop_defined("rank") && rank_num == payloadprop("rank") && 717 (payloadprop_defined("physaddr") || payloadprop_defined("offset")) && 718 SET_ADDR && SET_OFFSET } (0)-> 719 ereport.cpu.intel.quickpath.mem_ecc_uc@chip/memory-controller, 720 ereport.cpu.intel.quickpath.mem_even_parity_uc@chip/memory-controller; 721 722prop fault.memory.intel.page_ce@ 723 chip/memory-controller/dram-channel/dimm/rank[rank_num] 724 { payloadprop_defined("rank") && rank_num == payloadprop("rank") && 725 (payloadprop_defined("physaddr") || payloadprop_defined("offset")) && 726 SET_ADDR && SET_OFFSET } (0)-> 727 ereport.cpu.intel.quickpath.mem_ecc@chip/memory-controller, 728 ereport.cpu.intel.quickpath.mem_even_parity@chip/memory-controller; 729 730prop error.memory.intel.dimm_ue_ex@ 731 chip/memory-controller/dram-channel/dimm/rank[rank_num] 732 { payloadprop_defined("rank") && rank_num == payloadprop("rank") } (1)-> 733 ereport.cpu.intel.quickpath.mem_ecc_uc@chip/memory-controller, 734 ereport.cpu.intel.quickpath.mem_even_parity_uc@chip/memory-controller; 735 736prop fault.memory.intel.dimm_ce@ 737 chip/memory-controller/dram-channel/dimm 738 { !confprop_defined(chip/memory-controller/dram-channel/dimm, 739 "dimm-size") && setserdn(10) & setserdt(1week) } (0)-> 740 error.memory.intel.ex_dimm_ce@ 741 chip/memory-controller/dram-channel/dimm/rank; 742prop error.memory.intel.ex_dimm_ce@ 743 chip/memory-controller/dram-channel/dimm/rank[rank_num] 744 { payloadprop_defined("rank") && rank_num == payloadprop("rank") && 745 !confprop_defined(chip/memory-controller/dram-channel/dimm, 746 "dimm-size") && 747 count(STAT_CPU_MEM_CE_PGFLTS) > 512 } (1)-> 748 ereport.cpu.intel.quickpath.mem_ecc@chip/memory-controller, 749 ereport.cpu.intel.quickpath.mem_even_parity@chip/memory-controller; 750 751#define EX_CPU_MEM_DIMM_CE(dimm_size, n, t, fault_rate) \ 752 prop fault.memory.intel.dimm_ce@ \ 753 chip/memory-controller/dram-channel/dimm { \ 754 confprop(chip/memory-controller/dram-channel/dimm, \ 755 "dimm-size") == dimm_size && \ 756 setserdn(n) & setserdt(t) } (0)-> \ 757 error.memory.intel.ex_dimm_ce@ \ 758 chip/memory-controller/dram-channel/dimm/rank; \ 759 prop error.memory.intel.ex_dimm_ce@ \ 760 chip/memory-controller/dram-channel/dimm/rank[rank_num] { \ 761 payloadprop_defined("rank") && rank_num == payloadprop("rank") && \ 762 confprop(chip/memory-controller/dram-channel/dimm, \ 763 "dimm-size") == dimm_size && \ 764 count(STAT_CPU_MEM_CE_PGFLTS) > fault_rate } (1)-> \ 765 ereport.cpu.intel.quickpath.mem_ecc@chip/memory-controller, \ 766 ereport.cpu.intel.quickpath.mem_even_parity@chip/memory-controller; 767 768EX_CPU_MEM_DIMM_CE("16G", 16, 1week, 2000) 769EX_CPU_MEM_DIMM_CE("8G", 8, 1week, 2000) 770EX_CPU_MEM_DIMM_CE("4G", 4, 1week, 1500) 771EX_CPU_MEM_DIMM_CE("2G", 4, 2week, 1000) 772EX_CPU_MEM_DIMM_CE("1G", 4, 4week, 500) 773 774event upset.memory.intel.discard@chip/memory-controller{within(1s)}; 775 776prop upset.memory.intel.discard@chip/memory-controller (0)-> 777 ereport.cpu.intel.quickpath.mem_scrubbing_uc@chip/memory-controller, 778 ereport.cpu.intel.quickpath.mem_ecc_uc@chip/memory-controller, 779 ereport.cpu.intel.quickpath.mem_even_parity_uc@chip/memory-controller, 780 ereport.cpu.intel.quickpath.mem_ecc@chip/memory-controller, 781 ereport.cpu.intel.quickpath.mem_even_parity@chip/memory-controller; 782 783EX_MEM_EVENT(mem_failover_mir, 1s); 784event fault.cpu.intel.quickpath.mem_failover_mir@chip/memory-controller, 785 retire=0; 786 787prop fault.cpu.intel.quickpath.mem_failover_mir@chip/memory-controller -> 788 ereport.cpu.intel.quickpath.mem_failover_mir@chip/memory-controller; 789 790/* 791 * RBox errors 792 */ 793#define EX_EVENT(leafclass, t) \ 794 event ereport.cpu.intel.quickpath.leafclass@chip { within(t) } 795 796engine serd.cpu.intel.quickpath.bus_interconnect@chip, 797 N=3, T=72h; 798event fault.cpu.intel.quickpath.bus_interconnect@chip, 799 engine=serd.cpu.intel.quickpath.bus_interconnect@chip, 800 retire=0; 801 802EX_EVENT(bus_retry_abort, 1s); 803EX_EVENT(bus_link_init_ce, 1s); 804event upset.cpu.intel.quickpath.discard@chip; 805 806prop upset.cpu.intel.quickpath.discard@chip (0)-> 807 ereport.cpu.intel.quickpath.bus_retry_abort@chip, 808 ereport.cpu.intel.quickpath.bus_link_init_ce@chip; 809 810EX_EVENT(bus_unknown, 1s); 811EX_EVENT(bus_single_ecc, 1s); 812EX_EVENT(bus_crc_flit, 1s); 813 814prop fault.cpu.intel.quickpath.bus_interconnect@chip (0)-> 815 ereport.cpu.intel.quickpath.bus_unknown@chip, 816 ereport.cpu.intel.quickpath.bus_single_ecc@chip, 817 ereport.cpu.intel.quickpath.bus_crc_flit@chip; 818 819EX_EVENT(bus_unknown_external, 1s); 820EX_EVENT(bus_crc_flit_external, 1s); 821prop upset.cpu.intel.quickpath.discard@chip (0)-> 822 ereport.cpu.intel.quickpath.bus_unknown_external@chip, 823 ereport.cpu.intel.quickpath.bus_crc_flit_external@chip; 824 825EX_EVENT(bus_unknown_uc, 1s); 826EX_EVENT(bus_opr_poison_err, 1s); 827EX_EVENT(bus_eot_parity, 1s); 828EX_EVENT(bus_rta_parity, 1s); 829EX_EVENT(bus_bad_sbu_route, 1s); 830EX_EVENT(bus_bad_msg, 1s); 831EX_EVENT(bus_bad_vn_credit, 1s); 832EX_EVENT(bus_hdr_double_ecc, 1s); 833EX_EVENT(bus_link_retry_err, 1s); 834 835prop fault.cpu.intel.quickpath.bus_interconnect@chip 836 { setserdincrement(4) } (0)-> 837 ereport.cpu.intel.quickpath.bus_unknown_uc@chip, 838 ereport.cpu.intel.quickpath.bus_opr_poison_err@chip, 839 ereport.cpu.intel.quickpath.bus_eot_parity@chip, 840 ereport.cpu.intel.quickpath.bus_rta_parity@chip, 841 ereport.cpu.intel.quickpath.bus_bad_sbu_route@chip, 842 ereport.cpu.intel.quickpath.bus_bad_msg@chip, 843 ereport.cpu.intel.quickpath.bus_bad_vn_credit@chip, 844 ereport.cpu.intel.quickpath.bus_hdr_double_ecc@chip, 845 ereport.cpu.intel.quickpath.bus_link_retry_err@chip; 846 847EX_EVENT(bus_unknown_uc_external, 1s); 848EX_EVENT(bus_opr_poison_err_external, 1s); 849EX_EVENT(bus_eot_parity_external, 1s); 850EX_EVENT(bus_rta_parity_external, 1s); 851EX_EVENT(bus_bad_sbu_route_external, 1s); 852EX_EVENT(bus_bad_msg_external, 1s); 853EX_EVENT(bus_bad_vn_credit_external, 1s); 854EX_EVENT(bus_hdr_double_ecc_external, 1s); 855EX_EVENT(bus_link_retry_err_external, 1s); 856 857prop upset.cpu.intel.quickpath.discard@chip (0)-> 858 ereport.cpu.intel.quickpath.bus_unknown_uc_external@chip, 859 ereport.cpu.intel.quickpath.bus_opr_poison_err_external@chip, 860 ereport.cpu.intel.quickpath.bus_eot_parity_external@chip, 861 ereport.cpu.intel.quickpath.bus_rta_parity_external@chip, 862 ereport.cpu.intel.quickpath.bus_bad_sbu_route_external@chip, 863 ereport.cpu.intel.quickpath.bus_bad_msg_external@chip, 864 ereport.cpu.intel.quickpath.bus_bad_vn_credit_external@chip, 865 ereport.cpu.intel.quickpath.bus_hdr_double_ecc_external@chip, 866 ereport.cpu.intel.quickpath.bus_link_retry_err_external@chip; 867 868/* 869 * CBox errors 870 */ 871EX_EVENT(llc_ewb_uc, 1s); 872event fault.cpu.intel.quickpath.llc_ewb@chip, 873 retire=0, response=0; 874 875prop fault.cpu.intel.quickpath.llc_ewb@chip 876 { (payloadprop_defined("physaddr") || payloadprop_defined("offset")) && 877 SET_ADDR && SET_OFFSET } (1)-> 878 ereport.cpu.intel.quickpath.llc_ewb_uc@chip; 879 880prop upset.cpu.intel.quickpath.discard@chip (0)-> 881 ereport.cpu.intel.quickpath.llc_ewb_uc@chip; 882 883/* 884 * SBox errors 885 */ 886EX_EVENT(system_cache_uc, 1s); 887event fault.cpu.intel.quickpath.system_cache@chip, 888 retire=0, response=0; 889prop fault.cpu.intel.quickpath.system_cache@chip -> 890 ereport.cpu.intel.quickpath.system_cache_uc@chip; 891 892/* 893 * BBox errors 894 */ 895EX_EVENT(home_agent_uc, 1s); 896event fault.cpu.intel.quickpath.home_agent@chip, 897 retire=0, response=0; 898prop fault.cpu.intel.quickpath.home_agent@chip -> 899 ereport.cpu.intel.quickpath.home_agent_uc@chip; 900 901/* 902 * UBox errors 903 */ 904EX_EVENT(sys_cfg_cfa_ecc, 1s); 905EX_EVENT(sys_cfg_uc, 1s); 906 907engine serd.cpu.intel.quickpath.sys_cfg@chip, 908 N=2, T=72h; 909event fault.cpu.intel.quickpath.sys_cfg@chip, 910 engine=serd.cpu.intel.quickpath.sys_cfg@chip, 911 retire=0, response=0; 912 913prop fault.cpu.intel.quickpath.sys_cfg@chip (0)-> 914 ereport.cpu.intel.quickpath.sys_cfg_cfa_ecc@chip; 915 916prop fault.cpu.intel.quickpath.sys_cfg@chip 917 { setserdincrement(3) } (0)-> 918 ereport.cpu.intel.quickpath.sys_cfg_uc@chip; 919 920/* 921 * Handling poison errors 922 */ 923engine stat.has_poison@motherboard; 924event fault.cpu.intel.has_poison@motherboard, 925 count=stat.has_poison@motherboard[0], 926 message=0, retire=0, response=0; 927engine stat.discard_fatal@motherboard; 928event fault.cpu.intel.discard_fatal@motherboard, 929 count=stat.discard_fatal@motherboard[0], 930 message=0, retire=0, response=0; 931 932prop fault.cpu.intel.has_poison@motherboard 933 { payloadprop_defined("poison") && 1 == payloadprop("poison") } (1)-> 934 ereport.cpu.intel.quickpath.mem_scrubbing_uc@chip<>/memory-controller<>, 935 ereport.cpu.intel.quickpath.llc_ewb_uc@chip<>, 936 ereport.cpu.intel.quickpath.system_cache_uc@chip<>, 937 ereport.cpu.intel.quickpath.bus_opr_poison_err@chip<>, 938 ereport.cpu.intel.quickpath.bus_opr_poison_err_external@chip<>; 939 940prop fault.cpu.intel.discard_fatal@motherboard 941 { count(stat.has_poison@motherboard[0]) > count(stat.discard_fatal@motherboard[0]) && 942 payloadprop_defined("bank_number") && 5 == payloadprop("bank_number") && 943 payloadprop_defined("processor_context_corrupt") && 944 1 == payloadprop("processor_context_corrupt") } (0)-> 945 ereport.cpu.intel.internal_unclassified@chip<>/core<>/strand<> {within(10s)}; 946 947prop fault.cpu.intel.internal@chip/core/strand 948 { (count(stat.has_poison@motherboard[0]) <= count(stat.discard_fatal@motherboard[0]) || 949 !payloadprop_defined("bank_number") || 5 != payloadprop("bank_number") || 950 !payloadprop_defined("processor_context_corrupt") || 951 0 == payloadprop("processor_context_corrupt")) && 952 (payloadprop("error_uncorrected") == 1 ? setserdincrement(4) : 1) } (0)-> 953 ereport.cpu.intel.internal_unclassified@chip/core/strand; 954