1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Ereport-handling routines for memory errors
28 */
29
30 #include <cmd_mem.h>
31 #include <cmd_dimm.h>
32 #include <cmd_bank.h>
33 #include <cmd_page.h>
34 #include <cmd_cpu.h>
35 #include <cmd.h>
36
37 #include <strings.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <limits.h>
41 #include <unistd.h>
42 #include <fm/fmd_api.h>
43 #include <sys/fm/protocol.h>
44 #include <sys/fm/cpu/UltraSPARC-III.h>
45 #include <sys/async.h>
46 #include <sys/cheetahregs.h>
47 #include <sys/errclassify.h>
48 #include <sys/fm/io/sun4upci.h>
49 #include <sys/pci/pcisch.h>
50
51 /* Jalapeno-specific values from cheetahregs.h */
52 #define USIIIi_AFSR_AID 0x0000000000003e00ull /* AID causing UE/CE */
53 #define USIIIi_AFSR_AID_SHIFT 9
54 #define USIIIi_AFSR_JREQ 0x0000000007000000ull /* Active JBus req */
55 #define USIIIi_AFSR_JREQ_SHIFT 24
56 #define TOM_AID_MATCH_MASK 0xe
57
58 #define FIRE_AID 0xe
59 #define FIRE_JBC_ADDR_MASK 0x000007ffffffffffull
60 #define FIRE_JBC_JITEL1 "jbc-jitel1"
61
62 /*ARGSUSED*/
63 cmd_evdisp_t
cmd_mem_synd_check(fmd_hdl_t * hdl,uint64_t afar,uint8_t afar_status,uint16_t synd,uint8_t synd_status,cmd_cpu_t * cpu)64 cmd_mem_synd_check(fmd_hdl_t *hdl, uint64_t afar, uint8_t afar_status,
65 uint16_t synd, uint8_t synd_status, cmd_cpu_t *cpu)
66 {
67 if (synd == CH_POISON_SYND_FROM_XXU_WRITE ||
68 ((cpu->cpu_type == CPU_ULTRASPARC_IIIi ||
69 cpu->cpu_type == CPU_ULTRASPARC_IIIiplus) &&
70 synd == CH_POISON_SYND_FROM_XXU_WRMERGE)) {
71 fmd_hdl_debug(hdl,
72 "discarding UE due to magic syndrome %x\n", synd);
73 return (CMD_EVD_UNUSED);
74 }
75 return (CMD_EVD_OK);
76 }
77
78 static cmd_evdisp_t
xe_common(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_xe_handler_f * hdlr)79 xe_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
80 const char *class, cmd_xe_handler_f *hdlr)
81 {
82 uint64_t afar;
83 uint16_t synd;
84 uint8_t afar_status, synd_status;
85 nvlist_t *rsrc;
86 char *typenm;
87 uint64_t disp;
88 int minorvers = 1;
89
90 if (nvlist_lookup_pairs(nvl, 0,
91 FM_EREPORT_PAYLOAD_NAME_AFAR, DATA_TYPE_UINT64, &afar,
92 FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS, DATA_TYPE_UINT8, &afar_status,
93 FM_EREPORT_PAYLOAD_NAME_SYND, DATA_TYPE_UINT16, &synd,
94 FM_EREPORT_PAYLOAD_NAME_SYND_STATUS, DATA_TYPE_UINT8, &synd_status,
95 FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, DATA_TYPE_STRING, &typenm,
96 FM_EREPORT_PAYLOAD_NAME_RESOURCE, DATA_TYPE_NVLIST, &rsrc,
97 NULL) != 0)
98 return (CMD_EVD_BAD);
99
100 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
101 &disp) != 0)
102 minorvers = 0;
103
104 return (hdlr(hdl, ep, nvl, class, afar, afar_status, synd,
105 synd_status, cmd_mem_name2type(typenm, minorvers), disp, rsrc));
106 }
107
108 /*ARGSUSED*/
109 cmd_evdisp_t
cmd_ce(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)110 cmd_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
111 cmd_errcl_t clcode)
112 {
113 return (xe_common(hdl, ep, nvl, class, cmd_ce_common));
114 }
115
116 /*ARGSUSED*/
117 cmd_evdisp_t
cmd_ue(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)118 cmd_ue(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
119 cmd_errcl_t clcode)
120 {
121 return (xe_common(hdl, ep, nvl, class, cmd_ue_common));
122 }
123
124 cmd_evdisp_t
cmd_frx(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)125 cmd_frx(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
126 cmd_errcl_t clcode)
127 {
128 cmd_errcl_t matchmask = (clcode == CMD_ERRCL_FRC ? (CMD_ERRCL_RCE |
129 CMD_ERRCL_IOCE) : (CMD_ERRCL_RUE | CMD_ERRCL_IOUE));
130
131 return (cmd_rxefrx_common(hdl, ep, nvl, class, clcode, matchmask));
132 }
133
134 /*
135 * When we complete an IOxE/RxE FRx pair, we have enough information to
136 * create either a CE or a UE, as appropriate. Before dispatching the
137 * joined event to the xE handler, we need to generate the FMRI for the
138 * named DIMM. While one of the events may already contain a resource FMRI,
139 * said FMRI is incomplete. The detector didn't have the necessary
140 * information (the AFAR, the AFSR, *and* the syndrome) needed to create
141 * a DIMM-level FMRI.
142 */
143 static cmd_evdisp_t
iorxefrx_synthesize(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,uint64_t afar,uint8_t afar_status,uint64_t afsr,uint16_t synd,uint8_t synd_status,ce_dispact_t type,uint64_t disp,cmd_xe_handler_f * hdlr)144 iorxefrx_synthesize(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
145 const char *class, uint64_t afar, uint8_t afar_status, uint64_t afsr,
146 uint16_t synd, uint8_t synd_status, ce_dispact_t type, uint64_t disp,
147 cmd_xe_handler_f *hdlr)
148 {
149 nvlist_t *fmri;
150 int rc;
151
152 if ((fmri = cmd_dimm_fmri_derive(hdl, afar, synd, afsr)) == NULL)
153 return (CMD_EVD_UNUSED);
154
155 rc = hdlr(hdl, ep, nvl, class, afar, afar_status, synd, synd_status,
156 type, disp, fmri);
157
158 nvlist_free(fmri);
159
160 return (rc);
161 }
162
163 static cmd_iorxefrx_t *
iorxefrx_match(fmd_hdl_t * hdl,cmd_errcl_t errcl,cmd_errcl_t matchmask,uint_t det_agentid,uint_t afsr_agentid)164 iorxefrx_match(fmd_hdl_t *hdl, cmd_errcl_t errcl, cmd_errcl_t matchmask,
165 uint_t det_agentid, uint_t afsr_agentid)
166 {
167 cmd_iorxefrx_t *rf;
168
169 for (rf = cmd_list_next(&cmd.cmd_iorxefrx); rf != NULL;
170 rf = cmd_list_next(rf)) {
171
172 fmd_hdl_debug(hdl, "rf->rf_errcl = %llx, matchmask = %llx\n"
173 "rf->rf_det_agentid = %lx, afsr_agentid = %lx\n"
174 "rf->rf_afsr_agentid = %lx, det_agentid = %lx\n",
175 rf->rf_errcl, matchmask, rf->rf_det_agentid, afsr_agentid,
176 rf->rf_afsr_agentid, det_agentid);
177
178 if ((rf->rf_errcl & matchmask) == 0)
179 continue;
180
181 /*
182 * For IOxEs we are unable to match based on both the detector
183 * and the captured Agent Id in the AFSR, because the bridge
184 * captures it's own Agent Id instead of the remote CPUs.
185 *
186 * Also, the LSB of Tomatillo's jpid is aliased for each chip
187 * and therefore needs to be factored out of our matching.
188 */
189 if ((CMD_ERRCL_ISIOXE(rf->rf_errcl) ||
190 CMD_ERRCL_ISIOXE(errcl)) &&
191 ((rf->rf_afsr_agentid & TOM_AID_MATCH_MASK) ==
192 (afsr_agentid & TOM_AID_MATCH_MASK)))
193 return (rf);
194
195 /*
196 * Check for both here since IOxE is not involved
197 */
198 if ((rf->rf_afsr_agentid == det_agentid) &&
199 (rf->rf_det_agentid == afsr_agentid))
200 return (rf);
201 }
202
203 return (NULL);
204 }
205
206 /*
207 * Got an RxE or an FRx. FRx ereports can be matched with RxE ereports and
208 * vice versa. FRx ereports can also be matched with IOxE ereports.
209 */
210 cmd_evdisp_t
cmd_rxefrx_common(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode,cmd_errcl_t matchmask)211 cmd_rxefrx_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
212 const char *class, cmd_errcl_t clcode, cmd_errcl_t matchmask)
213 {
214 cmd_xe_handler_f *hdlr;
215 cmd_iorxefrx_t *rfmatch, *rferr;
216 cmd_cpu_t *cpu;
217 char *typenm;
218 int isrxe = CMD_ERRCL_MATCH(clcode, CMD_ERRCL_RCE | CMD_ERRCL_RUE);
219 int isce = CMD_ERRCL_MATCH(clcode, CMD_ERRCL_RCE | CMD_ERRCL_FRC);
220 int rc;
221 int minorvers = 1;
222 uint8_t level = clcode & CMD_ERRCL_LEVEL_EXTRACT;
223
224 clcode &= CMD_ERRCL_LEVEL_MASK;
225 rferr = fmd_hdl_zalloc(hdl, sizeof (cmd_iorxefrx_t), FMD_SLEEP);
226
227 if (nvlist_lookup_pairs(nvl, 0,
228 FM_EREPORT_PAYLOAD_NAME_SYND, DATA_TYPE_UINT16, &rferr->rf_synd,
229 FM_EREPORT_PAYLOAD_NAME_SYND_STATUS, DATA_TYPE_UINT8,
230 &rferr->rf_synd_status,
231 FM_EREPORT_PAYLOAD_NAME_AFAR, DATA_TYPE_UINT64, &rferr->rf_afar,
232 FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS, DATA_TYPE_UINT8,
233 &rferr->rf_afar_status,
234 FM_EREPORT_PAYLOAD_NAME_AFSR, DATA_TYPE_UINT64, &rferr->rf_afsr,
235 FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, DATA_TYPE_STRING, &typenm,
236 NULL) != 0) {
237 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
238 return (CMD_EVD_BAD);
239 }
240 if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
241 &rferr->rf_disp) != 0)
242 minorvers = 0;
243
244 rferr->rf_type = cmd_mem_name2type(typenm, minorvers);
245
246 if ((cpu = cmd_cpu_lookup_from_detector(hdl, nvl, class,
247 level)) == NULL) {
248 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
249 return (CMD_EVD_UNUSED);
250 }
251
252 if (!isrxe && rferr->rf_synd_status != AFLT_STAT_VALID) {
253 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
254 return (CMD_EVD_UNUSED);
255 }
256
257 if (isrxe) {
258 rferr->rf_afsr_agentid = (rferr->rf_afsr &
259 USIIIi_AFSR_JREQ) >> USIIIi_AFSR_JREQ_SHIFT;
260 } else {
261 rferr->rf_afsr_agentid = (rferr->rf_afsr &
262 USIIIi_AFSR_AID) >> USIIIi_AFSR_AID_SHIFT;
263 }
264
265 rferr->rf_errcl = clcode;
266 rferr->rf_det_agentid = cpu->cpu_cpuid;
267
268 if ((rfmatch = iorxefrx_match(hdl, clcode, matchmask,
269 rferr->rf_det_agentid, rferr->rf_afsr_agentid)) == NULL) {
270 cmd_iorxefrx_queue(hdl, rferr);
271 return (CMD_EVD_OK);
272 }
273
274 /*
275 * Found a match. Send a synthesized ereport to the appropriate
276 * routine.
277 */
278 fmd_hdl_debug(hdl, "matched %cE %llx with %llx", "UC"[isce],
279 rferr->rf_errcl, rfmatch->rf_errcl);
280
281 hdlr = (isce ? cmd_ce_common : cmd_ue_common);
282 if (isrxe) {
283 rc = iorxefrx_synthesize(hdl, ep, nvl, class, rferr->rf_afar,
284 rferr->rf_afar_status, rfmatch->rf_afsr, rfmatch->rf_synd,
285 rfmatch->rf_synd_status, rferr->rf_type, rferr->rf_disp,
286 hdlr);
287 } else {
288 rc = iorxefrx_synthesize(hdl, ep, nvl, class, rfmatch->rf_afar,
289 rfmatch->rf_afar_status, rferr->rf_afsr, rferr->rf_synd,
290 rferr->rf_synd_status, rfmatch->rf_type, rferr->rf_disp,
291 hdlr);
292 }
293
294 cmd_iorxefrx_free(hdl, rfmatch);
295 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
296
297 return (rc);
298 }
299
300 /*
301 * This fire IOxE must be matched with an FRx before UE/CE processing
302 * is possible.
303 *
304 * Note that for fire ereports we don't receive AFSR, AFAR, AFAR-Status
305 * and SYND values but we can derive the AFAR from the payload value
306 * FIRE_JBC_JITEL1. We may receive a TYPNM value.
307 */
308 static cmd_evdisp_t
cmd_ioxefrx_fire(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t errcl,cmd_errcl_t matchmask)309 cmd_ioxefrx_fire(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
310 const char *class, cmd_errcl_t errcl, cmd_errcl_t matchmask)
311 {
312 cmd_xe_handler_f *hdlr;
313 cmd_iorxefrx_t *rfmatch, *rferr;
314 uint64_t afar;
315 int isce = CMD_ERRCL_MATCH(errcl, CMD_ERRCL_IOCE);
316 char *portid_str;
317 char *path = NULL;
318 char *typenm = NULL;
319 nvlist_t *det = NULL;
320 int rc;
321 int minorvers = 1;
322
323 rferr = fmd_hdl_zalloc(hdl, sizeof (cmd_iorxefrx_t), FMD_SLEEP);
324
325 /*
326 * Lookup device path of host bridge.
327 */
328 (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det);
329 (void) nvlist_lookup_string(det, FM_FMRI_DEV_PATH, &path);
330
331 /*
332 * get Jbus port id from the device path
333 */
334 portid_str = strrchr(path, '@') + 1;
335 rferr->rf_det_agentid = strtol(portid_str, NULL, 16);
336
337 rferr->rf_errcl = errcl;
338 rferr->rf_afsr_agentid = FIRE_AID;
339 rferr->rf_afar_status = AFLT_STAT_VALID;
340 rferr->rf_synd_status = AFLT_STAT_VALID;
341
342 /*
343 * Extract the afar from the payload
344 */
345 (void) nvlist_lookup_uint64(nvl, FIRE_JBC_JITEL1, &afar);
346 rferr->rf_afar = afar & FIRE_JBC_ADDR_MASK;
347
348 rferr->rf_afsr = 0;
349 rferr->rf_synd = 0;
350
351 if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
352 &typenm) == 0)
353 rferr->rf_type = cmd_mem_name2type(typenm, minorvers);
354
355 /*
356 * Need to send in the io_jpid that we get from the device path above
357 * for both the det_agentid and the afsr_agentid, since the CPU does not
358 * capture the same address as the bridge. The bridge has the LSB
359 * aliased and the CPU is missing the MSB.
360 */
361 if ((rfmatch = iorxefrx_match(hdl, rferr->rf_errcl, matchmask,
362 rferr->rf_det_agentid, rferr->rf_afsr_agentid)) == NULL) {
363 cmd_iorxefrx_queue(hdl, rferr);
364 return (CMD_EVD_OK);
365 }
366
367 /* Found a match. Synthesize an ereport for UE/CE processing. */
368 fmd_hdl_debug(hdl, "matched %cE %llx with %llx\n", "UC"[isce],
369 rferr->rf_errcl, rfmatch->rf_errcl);
370
371 hdlr = (isce ? cmd_ce_common : cmd_ue_common);
372 rc = iorxefrx_synthesize(hdl, ep, nvl, class, rferr->rf_afar,
373 rferr->rf_afar_status, rfmatch->rf_afsr, rfmatch->rf_synd,
374 rfmatch->rf_synd_status, rferr->rf_type, rferr->rf_disp, hdlr);
375
376 cmd_iorxefrx_free(hdl, rfmatch);
377 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
378
379 return (rc);
380 }
381
382 /* This IOxE must be matched with an FRx before UE/CE processing is possible */
383 static cmd_evdisp_t
cmd_ioxefrx_common(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t errcl,cmd_errcl_t matchmask)384 cmd_ioxefrx_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
385 const char *class, cmd_errcl_t errcl, cmd_errcl_t matchmask)
386 {
387 cmd_xe_handler_f *hdlr;
388 cmd_iorxefrx_t *rfmatch, *rferr;
389 char *typenm;
390 int isce = CMD_ERRCL_MATCH(errcl, CMD_ERRCL_IOCE);
391 char *portid_str;
392 char *path = NULL;
393 nvlist_t *det = NULL;
394 int rc;
395 int minorvers = 1;
396
397 rferr = fmd_hdl_zalloc(hdl, sizeof (cmd_iorxefrx_t), FMD_SLEEP);
398
399 if (nvlist_lookup_pairs(nvl, 0,
400 PCI_ECC_AFAR, DATA_TYPE_UINT64, &rferr->rf_afar,
401 PCI_ECC_AFSR, DATA_TYPE_UINT64, &rferr->rf_afsr,
402 PCI_ECC_SYND, DATA_TYPE_UINT16, &rferr->rf_synd,
403 PCI_ECC_TYPE, DATA_TYPE_STRING, &typenm,
404 NULL) != 0) {
405 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
406 return (CMD_EVD_BAD);
407 }
408
409 if (nvlist_lookup_uint64(nvl, PCI_ECC_DISP, &rferr->rf_disp) != 0)
410 minorvers = 0;
411
412 rferr->rf_type = cmd_mem_name2type(typenm, minorvers);
413 rferr->rf_errcl = errcl;
414
415 /*
416 * Lookup device path of host bridge.
417 */
418 (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det);
419 (void) nvlist_lookup_string(det, FM_FMRI_DEV_PATH, &path);
420
421 /*
422 * get Jbus port id from the device path
423 */
424 portid_str = strrchr(path, '@') + 1;
425 rferr->rf_det_agentid = strtol(portid_str, NULL, 16);
426
427 rferr->rf_afsr_agentid = (rferr->rf_afsr &
428 SCHIZO_ECC_UE_AFSR_AGENT_MID) >> SCHIZO_ECC_UE_AFSR_AGENT_MID_SHIFT;
429
430 /*
431 * Only 4 bits of the Jbus AID are sent on the Jbus. MSB is the one
432 * that is chosen not to make the trip. This is not in any of the Jbus
433 * or Tomatillo documents and was discovered during testing and verified
434 * by Jalapeno H/W designer.
435 */
436 rferr->rf_afsr_agentid &= 0xf;
437 rferr->rf_afar_status = AFLT_STAT_VALID;
438 rferr->rf_synd_status = AFLT_STAT_VALID;
439
440 /*
441 * Need to send in the io_jpid that we get from the device path above
442 * for both the det_agentid and the afsr_agentid, since the CPU does not
443 * capture the same address as the bridge. The bridge has the LSB
444 * aliased and the CPU is missing the MSB.
445 */
446 if ((rfmatch = iorxefrx_match(hdl, rferr->rf_errcl, matchmask,
447 rferr->rf_det_agentid, rferr->rf_afsr_agentid)) == NULL) {
448 cmd_iorxefrx_queue(hdl, rferr);
449 return (CMD_EVD_OK);
450 }
451
452 /* Found a match. Synthesize an ereport for UE/CE processing. */
453 fmd_hdl_debug(hdl, "matched %cE %llx with %llx\n", "UC"[isce],
454 rferr->rf_errcl, rfmatch->rf_errcl);
455
456 hdlr = (isce ? cmd_ce_common : cmd_ue_common);
457 rc = iorxefrx_synthesize(hdl, ep, nvl, class, rferr->rf_afar,
458 rferr->rf_afar_status, rfmatch->rf_afsr, rfmatch->rf_synd,
459 rfmatch->rf_synd_status, rferr->rf_type, rferr->rf_disp, hdlr);
460
461 cmd_iorxefrx_free(hdl, rfmatch);
462 fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
463
464 return (rc);
465 }
466
467 /* IOxE ereports that don't need matching with FRx ereports */
468 static cmd_evdisp_t
ioxe_common(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)469 ioxe_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
470 cmd_errcl_t clcode)
471 {
472 int isce = CMD_ERRCL_MATCH(clcode, CMD_ERRCL_IOCE);
473 cmd_xe_handler_f *hdlr = isce ? cmd_ce_common : cmd_ue_common;
474 uint64_t afar;
475 uint16_t synd;
476 nvlist_t *rsrc;
477 char *typenm;
478 uint64_t disp;
479 int minorvers = 1;
480
481 if (nvlist_lookup_pairs(nvl, 0,
482 PCI_ECC_AFAR, DATA_TYPE_UINT64, &afar,
483 PCI_ECC_SYND, DATA_TYPE_UINT16, &synd,
484 PCI_ECC_TYPE, DATA_TYPE_STRING, &typenm,
485 PCI_ECC_RESOURCE, DATA_TYPE_NVLIST, &rsrc,
486 NULL) != 0)
487 return (CMD_EVD_BAD);
488
489 if (nvlist_lookup_uint64(nvl, PCI_ECC_DISP, &disp) != 0)
490 minorvers = 0;
491
492 return (hdlr(hdl, ep, nvl, class, afar, AFLT_STAT_VALID, synd,
493 AFLT_STAT_VALID, cmd_mem_name2type(typenm, minorvers), disp,
494 rsrc));
495 }
496
497 cmd_evdisp_t
cmd_rxe(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)498 cmd_rxe(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
499 cmd_errcl_t clcode)
500 {
501 cmd_errcl_t matchmask = (clcode == CMD_ERRCL_RCE ? CMD_ERRCL_FRC :
502 CMD_ERRCL_FRU);
503
504 return (cmd_rxefrx_common(hdl, ep, nvl, class, clcode, matchmask));
505 }
506
507 cmd_evdisp_t
cmd_ioxe(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)508 cmd_ioxe(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
509 cmd_errcl_t clcode)
510 {
511 cmd_errcl_t matchmask = (clcode == CMD_ERRCL_IOCE ? CMD_ERRCL_FRC :
512 CMD_ERRCL_FRU);
513
514 if (fmd_nvl_class_match(hdl, nvl, "ereport.io.tom.*")) {
515 return (cmd_ioxefrx_common(hdl, ep, nvl, class, clcode,
516 matchmask));
517 } else if (fmd_nvl_class_match(hdl, nvl, "ereport.io.fire.*")) {
518 return (cmd_ioxefrx_fire(hdl, ep, nvl, class, clcode,
519 matchmask));
520 } else
521 return (ioxe_common(hdl, ep, nvl, class, clcode));
522 }
523
524 /*ARGSUSED*/
525 cmd_evdisp_t
cmd_ioxe_sec(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)526 cmd_ioxe_sec(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
527 cmd_errcl_t clcode)
528 {
529 /*
530 * Secondary IOxE's can't be used to identify failed or failing
531 * resources, as they don't contain enough information. Ignore them.
532 */
533 return (CMD_EVD_OK);
534 }
535
536 /*ARGSUSED*/
537 ulong_t
cmd_mem_get_phys_pages(fmd_hdl_t * hdl)538 cmd_mem_get_phys_pages(fmd_hdl_t *hdl)
539 {
540 return (sysconf(_SC_PHYS_PAGES));
541 }
542
543 /*
544 * sun4u bit position as function of e_synd,
545 * from JPS1 Implementation Supplement table P-7
546 * Encode bit positions as follows:
547 * 0-127 data bits 0-127
548 * 128-136 check bits 0-8 (Cn = 128+n)
549 * no error or multibit error = -1 (not valid CE)
550 */
551
552 int esynd2bit [] = {
553 -1, 128, 129, -1, 130, -1, -1, 47,
554 131, -1, -1, 53, -1, 41, 29, -1, /* 000-00F */
555 132, -1, -1, 50, -1, 38, 25, -1,
556 -1, 33, 24, -1, 11, -1, -1, 16, /* 010-01F */
557 133, -1, -1, 46, -1, 37, 19, -1,
558 -1, 31, 32, -1, 7, -1, -1, 10, /* 020-02F */
559 -1, 40, 13, -1, 59, -1, -1, 66,
560 -1, -1, -1, 0, -1, 67, 71, -1, /* 030-03F */
561 134, -1, -1, 43, -1, 36, 18, -1,
562 -1, 49, 15, -1, 63, -1, -1, 6, /* 040-04F */
563 -1, 44, 28, -1, -1, -1, -1, 52,
564 68, -1, -1, 62, -1, -1, -1, -1, /* 050-05F */
565 -1, 26, 106, -1, 64, -1, -1, 2,
566 120, -1, -1, -1, -1, -1, -1, -1, /* 060-06F */
567 116, -1, -1, -1, -1, -1, -1, -1,
568 -1, 58, 54, -1, -1, -1, -1, -1, /* 070-07F */
569 135, -1, -1, 42, -1, 35, 17, -1,
570 -1, 45, 14, -1, 21, -1, -1, 5, /* 080-08F */
571 -1, 27, -1, -1, 99, -1, -1, 3,
572 114, -1, -1, 20, -1, -1, -1, -1, /* 090-09F */
573 -1, 23, 113, -1, 112, -1, -1, 51,
574 95, -1, -1, -1, -1, -1, -1, -1, /* 0A0-0AF */
575 103, -1, -1, -1, -1, -1, -1, -1,
576 -1, 48, -1, -1, 73, -1, -1, -1, /* 0B0-0BF */
577 -1, 22, 110, -1, 109, -1, -1, 9,
578 108, -1, -1, -1, -1, -1, -1, -1, /* 0C0-0CF */
579 102, -1, -1, -1, -1, -1, -1, -1,
580 -1, -1, -1, -1, -1, -1, -1, -1, /* 0D0-0DF */
581 98, -1, -1, -1, -1, -1, -1, -1,
582 -1, -1, -1, -1, -1, -1, -1, -1, /* 0E0-0EF */
583 -1, -1, -1, -1, -1, -1, -1, -1,
584 56, -1, -1, -1, -1, -1, -1, -1, /* 0F0-0FF */
585 136, -1, -1, 39, -1, 34, 105, -1,
586 -1, 30, 104, -1, 101, -1, -1, 4, /* 100-10F */
587 -1, -1, 100, -1, 83, -1, -1, 12,
588 87, -1, -1, 57, -1, -1, -1, -1, /* 110-11F */
589 -1, 97, 82, -1, 78, -1, -1, 1,
590 96, -1, -1, -1, -1, -1, -1, -1, /* 120-12F */
591 94, -1, -1, -1, -1, -1, -1, -1,
592 -1, -1, 79, -1, 69, -1, -1, -1, /* 130-13F */
593 -1, 93, 92, -1, 91, -1, -1, 8,
594 90, -1, -1, -1, -1, -1, -1, -1, /* 140-14F */
595 89, -1, -1, -1, -1, -1, -1, -1,
596 -1, -1, -1, -1, -1, -1, -1, -1, /* 150-15F */
597 86, -1, -1, -1, -1, -1, -1, -1,
598 -1, -1, -1, -1, -1, -1, -1, -1, /* 160-16F */
599 -1, -1, -1, -1, -1, -1, -1, -1,
600 60, -1, -1, -1, -1, -1, -1, -1, /* 170-17F */
601 -1, 88, 85, -1, 84, -1, -1, 55,
602 81, -1, -1, -1, -1, -1, -1, -1, /* 180-18F */
603 77, -1, -1, -1, -1, -1, -1, -1,
604 -1, -1, -1, -1, -1, -1, -1, -1, /* 190-19F */
605 74, -1, -1, -1, -1, -1, -1, -1,
606 -1, -1, -1, -1, -1, -1, -1, -1, /* 1A0-1AF */
607 -1, 70, 107, -1, 65, -1, -1, -1,
608 127, -1, -1, -1, -1, -1, -1, -1, /* 1B0-1BF */
609 80, -1, -1, 72, -1, 119, 118, -1,
610 -1, 126, 76, -1, 125, -1, -1, -1, /* 1C0-1CF */
611 -1, 115, 124, -1, 75, -1, -1, -1,
612 61, -1, -1, -1, -1, -1, -1, -1, /* 1D0-1DF */
613 -1, 123, 122, -1, 121, -1, -1, -1,
614 117, -1, -1, -1, -1, -1, -1, -1, /* 1E0-1EF */
615 111, -1, -1, -1, -1, -1, -1, -1,
616 -1, -1, -1, -1, -1, -1, -1, -1 /* 1F0-1FF */
617 };
618
619 int msynd2bit [] = { /* msynd 0-F */
620 -1, 140, 141, -1,
621 142, -1, -1, 137,
622 143, -1, -1, 138,
623 -1, 139, -1, -1
624 };
625
626 int
cmd_synd2upos(uint16_t syndrome)627 cmd_synd2upos(uint16_t syndrome)
628 {
629 return (esynd2bit[syndrome]);
630 }
631
632 const char *fmd_fmri_get_platform();
633
634 #define DP_MAX 25
635
636 const char *slotname[] = {
637 "Slot A", "Slot B", "Slot C", "Slot D"};
638
639 typedef struct fault_info {
640 uint32_t id;
641 int count;
642 } fault_info_t;
643
644 struct plat2id_map {
645 char *platnm;
646 int id;
647 } id_plat[] = {
648 {"SUNW,Sun-Fire-15000", 1},
649 {"SUNW,Sun-Fire", 2},
650 {"SUNW,Netra-T12", 2},
651 {"SUNW,Sun-Fire-480R", 3},
652 {"SUNW,Sun-Fire-V490", 3},
653 {"SUNW,Sun-Fire-V440", 3},
654 {"SUNW,Sun-Fire-V445", 3},
655 {"SUNW,Netra-440", 3},
656 {"SUNW,Sun-Fire-880", 4},
657 {"SUNW,Sun-Fire-V890", 4},
658 {NULL, 0}
659 };
660
661 /*ARGSUSED*/
662 void
cmd_to_hashed_addr(uint64_t * addr,uint64_t afar,const char * class)663 cmd_to_hashed_addr(uint64_t *addr, uint64_t afar, const char *class)
664 {
665 *addr = afar;
666 }
667
668 /*ARGSUSED*/
669 int
cmd_same_datapath_dimms(cmd_dimm_t * d1,cmd_dimm_t * d2)670 cmd_same_datapath_dimms(cmd_dimm_t *d1, cmd_dimm_t *d2)
671 {
672 return (1);
673 }
674
675 static int
cmd_get_platform()676 cmd_get_platform()
677 {
678 const char *platname;
679 int id = -1;
680 int i;
681
682 platname = fmd_fmri_get_platform();
683 for (i = 0; id_plat[i].platnm != NULL; i++) {
684 if (strcmp(platname, id_plat[i].platnm) == 0) {
685 id = id_plat[i].id;
686 break;
687 }
688 }
689 return (id);
690 }
691
692 static int
cmd_get_boardid(uint32_t cpuid)693 cmd_get_boardid(uint32_t cpuid)
694 {
695 int boardid;
696 int id = cmd_get_platform();
697
698 switch (id) {
699 case 1:
700 boardid = ((cpuid >> 5) & 0x1f);
701 break;
702 case 2:
703 boardid = ((cpuid & 0x1f) / 4);
704 break;
705
706 case 3:
707 cpuid = cpuid & 0x07;
708 boardid = ((cpuid % 2) == 0) ? 0 : 1;
709 break;
710 case 4:
711 cpuid = cpuid & 0x07;
712 if ((cpuid % 2) == 0)
713 boardid = (cpuid < 4) ? 0 : 2;
714 else
715 boardid = (cpuid < 5) ? 1 : 3;
716 break;
717 default:
718 boardid = 5;
719 break;
720 }
721
722 return (boardid);
723 }
724
725 static void
cmd_get_faulted_comp(fmd_hdl_t * hdl,cmd_dimm_t * d1,cmd_dimm_t * d2,uint16_t upos,fault_info_t ** fault_list,int cpu)726 cmd_get_faulted_comp(fmd_hdl_t *hdl, cmd_dimm_t *d1, cmd_dimm_t *d2,
727 uint16_t upos, fault_info_t **fault_list, int cpu)
728 {
729 cmd_mq_t *ip;
730 int i, j, k, idj;
731 uint32_t id;
732 uint32_t *cpuid = NULL;
733 int max_rpt;
734
735 max_rpt = 2 * cmd.cmd_nupos;
736
737 cpuid = fmd_hdl_alloc(hdl, max_rpt * sizeof (uint32_t), FMD_SLEEP);
738
739 if (cpuid == NULL)
740 return;
741
742 for (i = 0, j = 0; i < CMD_MAX_CKWDS; i++) {
743 for (ip = cmd_list_next(&d1->mq_root[i]); ip != NULL;
744 ip = cmd_list_next(ip)) {
745 if (upos == ip->mq_unit_position) {
746 cpuid[j] = ip->mq_cpuid;
747 j++;
748 }
749 if (j >= cmd.cmd_nupos)
750 break;
751 }
752 if (j >= cmd.cmd_nupos)
753 break;
754 }
755
756 for (i = 0; i < CMD_MAX_CKWDS; i++) {
757 for (ip = cmd_list_next(&d2->mq_root[i]); ip != NULL;
758 ip = cmd_list_next(ip)) {
759 if (upos == ip->mq_unit_position) {
760 cpuid[j] = ip->mq_cpuid;
761 j++;
762 }
763 if (j >= max_rpt)
764 break;
765 }
766 if (j >= max_rpt)
767 break;
768 }
769
770 for (i = 0, k = 0; i < max_rpt; i++) {
771 if (cpuid[i] == ULONG_MAX)
772 continue;
773 id = (cpu == 0) ? cmd_get_boardid(cpuid[i]) : cpuid[i];
774 fault_list[k] = fmd_hdl_alloc(hdl,
775 sizeof (fault_info_t), FMD_SLEEP);
776 if (fault_list[k] == NULL)
777 break;
778 fault_list[k]->count = 1;
779 fault_list[k]->id = id;
780 for (j = i + 1; j < max_rpt; j++) {
781 if (cpuid[j] == ULONG_MAX)
782 continue;
783 idj = (cpu == 0) ? cmd_get_boardid(cpuid[j]) : cpuid[j];
784 if (id == idj) {
785 fault_list[k]->count++;
786 cpuid[j] = ULONG_MAX;
787 }
788 }
789 k++;
790 }
791
792 fmd_hdl_free(hdl, cpuid, max_rpt * sizeof (uint32_t));
793 }
794
795 /*ARGSUSED*/
796 static nvlist_t *
cmd_board_mkfru(fmd_hdl_t * hdl,char * frustr)797 cmd_board_mkfru(fmd_hdl_t *hdl, char *frustr)
798 {
799 nvlist_t *hcel, *fru;
800 int err;
801
802 if (frustr == NULL)
803 return (NULL);
804
805 if (nvlist_alloc(&hcel, NV_UNIQUE_NAME, 0) != 0)
806 return (NULL);
807
808 err = nvlist_add_string(hcel, FM_FMRI_HC_NAME,
809 FM_FMRI_LEGACY_HC);
810 err |= nvlist_add_string(hcel, FM_FMRI_HC_ID, frustr);
811 if (err != 0) {
812 nvlist_free(hcel);
813 return (NULL);
814 }
815
816 if (nvlist_alloc(&fru, NV_UNIQUE_NAME, 0) != 0) {
817 nvlist_free(hcel);
818 return (NULL);
819 }
820 err = nvlist_add_uint8(fru, FM_VERSION, FM_HC_SCHEME_VERSION);
821 err |= nvlist_add_string(fru, FM_FMRI_SCHEME,
822 FM_FMRI_SCHEME_HC);
823 err |= nvlist_add_string(fru, FM_FMRI_HC_ROOT, "");
824 err |= nvlist_add_uint32(fru, FM_FMRI_HC_LIST_SZ, 1);
825 err |= nvlist_add_nvlist_array(fru, FM_FMRI_HC_LIST, &hcel, 1);
826 if (err != 0) {
827 nvlist_free(fru);
828 nvlist_free(hcel);
829 return (NULL);
830 }
831 nvlist_free(hcel);
832 return (fru);
833 }
834
835 /*
836 * Startcat, Serengeti, V4xx, and V8xx: fault the system boards of
837 * the detectors in proportion to the number of ereports out of 8
838 * Other systems: fault the detectors in proportion to the number of
839 * ereports out of 8
840 */
841 void
cmd_gen_datapath_fault(fmd_hdl_t * hdl,cmd_dimm_t * d1,cmd_dimm_t * d2,uint16_t upos,nvlist_t * det)842 cmd_gen_datapath_fault(fmd_hdl_t *hdl, cmd_dimm_t *d1, cmd_dimm_t *d2,
843 uint16_t upos, nvlist_t *det)
844 {
845 char frustr[DP_MAX];
846 fmd_case_t *cp;
847 int i, ratio, type, fault_cpu, max_rpt;
848 uint32_t id;
849 uint8_t cpumask;
850 char *cpustr;
851 fault_info_t **fault_list = NULL;
852 nvlist_t *fru = NULL, *asru = NULL, *flt = NULL;
853
854 max_rpt = cmd.cmd_nupos * 2;
855 fault_list = fmd_hdl_alloc(hdl,
856 max_rpt * sizeof (fault_info_t *), FMD_SLEEP);
857
858 if (fault_list == NULL)
859 return;
860
861 for (i = 0; i < max_rpt; i++)
862 fault_list[i] = NULL;
863
864 type = cmd_get_platform();
865
866 fault_cpu = (type == -1) ? 1 : 0;
867
868 cmd_get_faulted_comp(hdl, d1, d2, upos, fault_list, fault_cpu);
869
870 cp = fmd_case_open(hdl, NULL);
871
872 for (i = 0; i < max_rpt; i++) {
873 if (fault_list[i] == NULL)
874 continue;
875 id = fault_list[i]->id;
876
877 switch (type) {
878 case 1:
879 (void) snprintf(frustr, DP_MAX, "EX%d", id);
880 break;
881 case 2:
882 (void) snprintf(frustr, DP_MAX, "/N0/SB%d", id);
883 break;
884 case 3:
885 case 4:
886 (void) snprintf(frustr, DP_MAX, slotname[id]);
887 break;
888 default:
889 cpustr = cmd_cpu_getfrustr_by_id(hdl, id);
890 if (nvlist_lookup_uint8(det, FM_FMRI_CPU_MASK, &cpumask)
891 == 0) {
892 asru = cmd_cpu_fmri_create(id, cpumask);
893 (void) fmd_nvl_fmri_expand(hdl, asru);
894 }
895 break;
896 }
897
898 ratio = (fault_list[i]->count * 100) / (cmd.cmd_nupos * 2);
899
900 if (fault_cpu) {
901 fru = cmd_cpu_mkfru(hdl, cpustr, NULL, NULL);
902 fmd_hdl_strfree(hdl, cpustr);
903 if (fru == NULL) {
904 nvlist_free(asru);
905 break;
906 }
907 flt = cmd_nvl_create_fault(hdl, "fault.memory.datapath",
908 ratio, asru, fru, asru);
909 nvlist_free(asru);
910 } else {
911 fru = cmd_board_mkfru(hdl, frustr);
912 if (fru == NULL)
913 break;
914 flt = cmd_nvl_create_fault(hdl, "fault.memory.datapath",
915 ratio, fru, fru, fru);
916 }
917
918 fmd_case_add_suspect(hdl, cp, flt);
919
920 /* free up memory */
921 nvlist_free(fru);
922 }
923
924 fmd_case_solve(hdl, cp);
925
926 for (i = 0; i < max_rpt; i++) {
927 if (fault_list[i] != NULL)
928 fmd_hdl_free(hdl, fault_list[i], sizeof (fault_info_t));
929 }
930
931 fmd_hdl_free(hdl, fault_list, sizeof (fault_info_t *) * max_rpt);
932 }
933