1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * Ereport-handling routines for memory errors
28  */
29 
30 #include <cmd_mem.h>
31 #include <cmd_dimm.h>
32 #include <cmd_bank.h>
33 #include <cmd_page.h>
34 #include <cmd_cpu.h>
35 #include <cmd.h>
36 
37 #include <strings.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <limits.h>
41 #include <unistd.h>
42 #include <fm/fmd_api.h>
43 #include <sys/fm/protocol.h>
44 #include <sys/fm/cpu/UltraSPARC-III.h>
45 #include <sys/async.h>
46 #include <sys/cheetahregs.h>
47 #include <sys/errclassify.h>
48 #include <sys/fm/io/sun4upci.h>
49 #include <sys/pci/pcisch.h>
50 
51 /* Jalapeno-specific values from cheetahregs.h */
52 #define	USIIIi_AFSR_AID		0x0000000000003e00ull /* AID causing UE/CE */
53 #define	USIIIi_AFSR_AID_SHIFT	9
54 #define	USIIIi_AFSR_JREQ	0x0000000007000000ull /* Active JBus req */
55 #define	USIIIi_AFSR_JREQ_SHIFT	24
56 #define	TOM_AID_MATCH_MASK	0xe
57 
58 #define	FIRE_AID		0xe
59 #define	FIRE_JBC_ADDR_MASK	0x000007ffffffffffull
60 #define	FIRE_JBC_JITEL1		"jbc-jitel1"
61 
62 /*ARGSUSED*/
63 cmd_evdisp_t
cmd_mem_synd_check(fmd_hdl_t * hdl,uint64_t afar,uint8_t afar_status,uint16_t synd,uint8_t synd_status,cmd_cpu_t * cpu)64 cmd_mem_synd_check(fmd_hdl_t *hdl, uint64_t afar, uint8_t afar_status,
65     uint16_t synd, uint8_t synd_status, cmd_cpu_t *cpu)
66 {
67 	if (synd == CH_POISON_SYND_FROM_XXU_WRITE ||
68 	    ((cpu->cpu_type == CPU_ULTRASPARC_IIIi ||
69 	    cpu->cpu_type == CPU_ULTRASPARC_IIIiplus) &&
70 	    synd == CH_POISON_SYND_FROM_XXU_WRMERGE)) {
71 		fmd_hdl_debug(hdl,
72 		    "discarding UE due to magic syndrome %x\n", synd);
73 		return (CMD_EVD_UNUSED);
74 	}
75 	return (CMD_EVD_OK);
76 }
77 
78 static cmd_evdisp_t
xe_common(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_xe_handler_f * hdlr)79 xe_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
80     const char *class, cmd_xe_handler_f *hdlr)
81 {
82 	uint64_t afar;
83 	uint16_t synd;
84 	uint8_t afar_status, synd_status;
85 	nvlist_t *rsrc;
86 	char *typenm;
87 	uint64_t disp;
88 	int minorvers = 1;
89 
90 	if (nvlist_lookup_pairs(nvl, 0,
91 	    FM_EREPORT_PAYLOAD_NAME_AFAR, DATA_TYPE_UINT64, &afar,
92 	    FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS, DATA_TYPE_UINT8, &afar_status,
93 	    FM_EREPORT_PAYLOAD_NAME_SYND, DATA_TYPE_UINT16, &synd,
94 	    FM_EREPORT_PAYLOAD_NAME_SYND_STATUS, DATA_TYPE_UINT8, &synd_status,
95 	    FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, DATA_TYPE_STRING, &typenm,
96 	    FM_EREPORT_PAYLOAD_NAME_RESOURCE, DATA_TYPE_NVLIST, &rsrc,
97 	    NULL) != 0)
98 		return (CMD_EVD_BAD);
99 
100 	if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
101 	    &disp) != 0)
102 		minorvers = 0;
103 
104 	return (hdlr(hdl, ep, nvl, class, afar, afar_status, synd,
105 	    synd_status, cmd_mem_name2type(typenm, minorvers), disp, rsrc));
106 }
107 
108 /*ARGSUSED*/
109 cmd_evdisp_t
cmd_ce(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)110 cmd_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
111     cmd_errcl_t clcode)
112 {
113 	return (xe_common(hdl, ep, nvl, class, cmd_ce_common));
114 }
115 
116 /*ARGSUSED*/
117 cmd_evdisp_t
cmd_ue(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)118 cmd_ue(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
119     cmd_errcl_t clcode)
120 {
121 	return (xe_common(hdl, ep, nvl, class, cmd_ue_common));
122 }
123 
124 cmd_evdisp_t
cmd_frx(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)125 cmd_frx(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
126     cmd_errcl_t clcode)
127 {
128 	cmd_errcl_t matchmask = (clcode == CMD_ERRCL_FRC ? (CMD_ERRCL_RCE |
129 	    CMD_ERRCL_IOCE) : (CMD_ERRCL_RUE | CMD_ERRCL_IOUE));
130 
131 	return (cmd_rxefrx_common(hdl, ep, nvl, class, clcode, matchmask));
132 }
133 
134 /*
135  * When we complete an IOxE/RxE FRx pair, we have enough information to
136  * create either a CE or a UE, as appropriate.  Before dispatching the
137  * joined event to the xE handler, we need to generate the FMRI for the
138  * named DIMM.  While one of the events may already contain a resource FMRI,
139  * said FMRI is incomplete.  The detector didn't have the necessary
140  * information (the AFAR, the AFSR, *and* the syndrome) needed to create
141  * a DIMM-level FMRI.
142  */
143 static cmd_evdisp_t
iorxefrx_synthesize(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,uint64_t afar,uint8_t afar_status,uint64_t afsr,uint16_t synd,uint8_t synd_status,ce_dispact_t type,uint64_t disp,cmd_xe_handler_f * hdlr)144 iorxefrx_synthesize(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
145     const char *class, uint64_t afar, uint8_t afar_status, uint64_t afsr,
146     uint16_t synd, uint8_t synd_status, ce_dispact_t type, uint64_t disp,
147     cmd_xe_handler_f *hdlr)
148 {
149 	nvlist_t *fmri;
150 	int rc;
151 
152 	if ((fmri = cmd_dimm_fmri_derive(hdl, afar, synd, afsr)) == NULL)
153 		return (CMD_EVD_UNUSED);
154 
155 	rc = hdlr(hdl, ep, nvl, class, afar, afar_status, synd, synd_status,
156 	    type, disp, fmri);
157 
158 	nvlist_free(fmri);
159 
160 	return (rc);
161 }
162 
163 static cmd_iorxefrx_t *
iorxefrx_match(fmd_hdl_t * hdl,cmd_errcl_t errcl,cmd_errcl_t matchmask,uint_t det_agentid,uint_t afsr_agentid)164 iorxefrx_match(fmd_hdl_t *hdl, cmd_errcl_t errcl, cmd_errcl_t matchmask,
165     uint_t det_agentid, uint_t afsr_agentid)
166 {
167 	cmd_iorxefrx_t *rf;
168 
169 	for (rf = cmd_list_next(&cmd.cmd_iorxefrx); rf != NULL;
170 	    rf = cmd_list_next(rf)) {
171 
172 		fmd_hdl_debug(hdl, "rf->rf_errcl = %llx, matchmask = %llx\n"
173 		    "rf->rf_det_agentid = %lx, afsr_agentid = %lx\n"
174 		    "rf->rf_afsr_agentid = %lx, det_agentid = %lx\n",
175 		    rf->rf_errcl, matchmask, rf->rf_det_agentid, afsr_agentid,
176 		    rf->rf_afsr_agentid, det_agentid);
177 
178 		if ((rf->rf_errcl & matchmask) == 0)
179 			continue;
180 
181 		/*
182 		 * For IOxEs we are unable to match based on both the detector
183 		 * and the captured Agent Id in the AFSR, because the bridge
184 		 * captures it's own Agent Id instead of the remote CPUs.
185 		 *
186 		 * Also, the LSB of Tomatillo's jpid is aliased for each chip
187 		 * and therefore needs to be factored out of our matching.
188 		 */
189 		if ((CMD_ERRCL_ISIOXE(rf->rf_errcl) ||
190 		    CMD_ERRCL_ISIOXE(errcl)) &&
191 		    ((rf->rf_afsr_agentid & TOM_AID_MATCH_MASK) ==
192 		    (afsr_agentid & TOM_AID_MATCH_MASK)))
193 			return (rf);
194 
195 		/*
196 		 * Check for both here since IOxE is not involved
197 		 */
198 		if ((rf->rf_afsr_agentid == det_agentid) &&
199 		    (rf->rf_det_agentid == afsr_agentid))
200 			return (rf);
201 	}
202 
203 	return (NULL);
204 }
205 
206 /*
207  * Got an RxE or an FRx.  FRx ereports can be matched with RxE ereports and
208  * vice versa.  FRx ereports can also be matched with IOxE ereports.
209  */
210 cmd_evdisp_t
cmd_rxefrx_common(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode,cmd_errcl_t matchmask)211 cmd_rxefrx_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
212     const char *class, cmd_errcl_t clcode, cmd_errcl_t matchmask)
213 {
214 	cmd_xe_handler_f *hdlr;
215 	cmd_iorxefrx_t *rfmatch, *rferr;
216 	cmd_cpu_t *cpu;
217 	char *typenm;
218 	int isrxe = CMD_ERRCL_MATCH(clcode, CMD_ERRCL_RCE | CMD_ERRCL_RUE);
219 	int isce = CMD_ERRCL_MATCH(clcode, CMD_ERRCL_RCE | CMD_ERRCL_FRC);
220 	int rc;
221 	int minorvers = 1;
222 	uint8_t level = clcode & CMD_ERRCL_LEVEL_EXTRACT;
223 
224 	clcode &= CMD_ERRCL_LEVEL_MASK;
225 	rferr = fmd_hdl_zalloc(hdl, sizeof (cmd_iorxefrx_t), FMD_SLEEP);
226 
227 	if (nvlist_lookup_pairs(nvl, 0,
228 	    FM_EREPORT_PAYLOAD_NAME_SYND, DATA_TYPE_UINT16, &rferr->rf_synd,
229 	    FM_EREPORT_PAYLOAD_NAME_SYND_STATUS, DATA_TYPE_UINT8,
230 	    &rferr->rf_synd_status,
231 	    FM_EREPORT_PAYLOAD_NAME_AFAR, DATA_TYPE_UINT64, &rferr->rf_afar,
232 	    FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS, DATA_TYPE_UINT8,
233 	    &rferr->rf_afar_status,
234 	    FM_EREPORT_PAYLOAD_NAME_AFSR, DATA_TYPE_UINT64, &rferr->rf_afsr,
235 	    FM_EREPORT_PAYLOAD_NAME_ERR_TYPE, DATA_TYPE_STRING, &typenm,
236 	    NULL) != 0) {
237 		fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
238 		return (CMD_EVD_BAD);
239 	}
240 	if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
241 	    &rferr->rf_disp) != 0)
242 		minorvers = 0;
243 
244 	rferr->rf_type = cmd_mem_name2type(typenm, minorvers);
245 
246 	if ((cpu = cmd_cpu_lookup_from_detector(hdl, nvl, class,
247 	    level)) == NULL) {
248 		fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
249 		return (CMD_EVD_UNUSED);
250 	}
251 
252 	if (!isrxe && rferr->rf_synd_status != AFLT_STAT_VALID) {
253 		fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
254 		return (CMD_EVD_UNUSED);
255 	}
256 
257 	if (isrxe) {
258 		rferr->rf_afsr_agentid = (rferr->rf_afsr &
259 		    USIIIi_AFSR_JREQ) >> USIIIi_AFSR_JREQ_SHIFT;
260 	} else {
261 		rferr->rf_afsr_agentid = (rferr->rf_afsr &
262 		    USIIIi_AFSR_AID) >> USIIIi_AFSR_AID_SHIFT;
263 	}
264 
265 	rferr->rf_errcl = clcode;
266 	rferr->rf_det_agentid = cpu->cpu_cpuid;
267 
268 	if ((rfmatch = iorxefrx_match(hdl, clcode, matchmask,
269 	    rferr->rf_det_agentid, rferr->rf_afsr_agentid)) == NULL) {
270 		cmd_iorxefrx_queue(hdl, rferr);
271 		return (CMD_EVD_OK);
272 	}
273 
274 	/*
275 	 * Found a match.  Send a synthesized ereport to the appropriate
276 	 * routine.
277 	 */
278 	fmd_hdl_debug(hdl, "matched %cE %llx with %llx", "UC"[isce],
279 	    rferr->rf_errcl, rfmatch->rf_errcl);
280 
281 	hdlr = (isce ? cmd_ce_common : cmd_ue_common);
282 	if (isrxe) {
283 		rc = iorxefrx_synthesize(hdl, ep, nvl, class, rferr->rf_afar,
284 		    rferr->rf_afar_status, rfmatch->rf_afsr, rfmatch->rf_synd,
285 		    rfmatch->rf_synd_status, rferr->rf_type, rferr->rf_disp,
286 		    hdlr);
287 	} else {
288 		rc = iorxefrx_synthesize(hdl, ep, nvl, class, rfmatch->rf_afar,
289 		    rfmatch->rf_afar_status, rferr->rf_afsr, rferr->rf_synd,
290 		    rferr->rf_synd_status, rfmatch->rf_type, rferr->rf_disp,
291 		    hdlr);
292 	}
293 
294 	cmd_iorxefrx_free(hdl, rfmatch);
295 	fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
296 
297 	return (rc);
298 }
299 
300 /*
301  * This fire IOxE must be matched with an FRx before UE/CE processing
302  * is possible.
303  *
304  * Note that for fire ereports we don't receive AFSR, AFAR, AFAR-Status
305  * and SYND values but we can derive the AFAR from the payload value
306  * FIRE_JBC_JITEL1.  We may receive a TYPNM value.
307  */
308 static cmd_evdisp_t
cmd_ioxefrx_fire(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t errcl,cmd_errcl_t matchmask)309 cmd_ioxefrx_fire(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
310     const char *class, cmd_errcl_t errcl, cmd_errcl_t matchmask)
311 {
312 	cmd_xe_handler_f *hdlr;
313 	cmd_iorxefrx_t *rfmatch, *rferr;
314 	uint64_t afar;
315 	int isce = CMD_ERRCL_MATCH(errcl, CMD_ERRCL_IOCE);
316 	char *portid_str;
317 	char *path = NULL;
318 	char *typenm = NULL;
319 	nvlist_t *det = NULL;
320 	int rc;
321 	int minorvers = 1;
322 
323 	rferr = fmd_hdl_zalloc(hdl, sizeof (cmd_iorxefrx_t), FMD_SLEEP);
324 
325 	/*
326 	 * Lookup device path of host bridge.
327 	 */
328 	(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det);
329 	(void) nvlist_lookup_string(det, FM_FMRI_DEV_PATH, &path);
330 
331 	/*
332 	 * get Jbus port id from the device path
333 	 */
334 	portid_str = strrchr(path, '@') + 1;
335 	rferr->rf_det_agentid = strtol(portid_str, NULL, 16);
336 
337 	rferr->rf_errcl = errcl;
338 	rferr->rf_afsr_agentid = FIRE_AID;
339 	rferr->rf_afar_status = AFLT_STAT_VALID;
340 	rferr->rf_synd_status = AFLT_STAT_VALID;
341 
342 	/*
343 	 * Extract the afar from the payload
344 	 */
345 	(void) nvlist_lookup_uint64(nvl, FIRE_JBC_JITEL1, &afar);
346 	rferr->rf_afar = afar & FIRE_JBC_ADDR_MASK;
347 
348 	rferr->rf_afsr = 0;
349 	rferr->rf_synd = 0;
350 
351 	if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
352 	    &typenm) == 0)
353 		rferr->rf_type = cmd_mem_name2type(typenm, minorvers);
354 
355 	/*
356 	 * Need to send in the io_jpid that we get from the device path above
357 	 * for both the det_agentid and the afsr_agentid, since the CPU does not
358 	 * capture the same address as the bridge.  The bridge has the LSB
359 	 * aliased and the CPU is missing the MSB.
360 	 */
361 	if ((rfmatch = iorxefrx_match(hdl, rferr->rf_errcl, matchmask,
362 	    rferr->rf_det_agentid, rferr->rf_afsr_agentid)) == NULL) {
363 		cmd_iorxefrx_queue(hdl, rferr);
364 		return (CMD_EVD_OK);
365 		}
366 
367 	/* Found a match.  Synthesize an ereport for UE/CE processing. */
368 	fmd_hdl_debug(hdl, "matched %cE %llx with %llx\n", "UC"[isce],
369 	    rferr->rf_errcl, rfmatch->rf_errcl);
370 
371 	hdlr = (isce ? cmd_ce_common : cmd_ue_common);
372 	rc = iorxefrx_synthesize(hdl, ep, nvl, class, rferr->rf_afar,
373 	    rferr->rf_afar_status, rfmatch->rf_afsr, rfmatch->rf_synd,
374 	    rfmatch->rf_synd_status, rferr->rf_type, rferr->rf_disp, hdlr);
375 
376 	cmd_iorxefrx_free(hdl, rfmatch);
377 	fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
378 
379 	return (rc);
380 }
381 
382 /* This IOxE must be matched with an FRx before UE/CE processing is possible */
383 static cmd_evdisp_t
cmd_ioxefrx_common(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t errcl,cmd_errcl_t matchmask)384 cmd_ioxefrx_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
385     const char *class, cmd_errcl_t errcl, cmd_errcl_t matchmask)
386 {
387 	cmd_xe_handler_f *hdlr;
388 	cmd_iorxefrx_t *rfmatch, *rferr;
389 	char *typenm;
390 	int isce = CMD_ERRCL_MATCH(errcl, CMD_ERRCL_IOCE);
391 	char *portid_str;
392 	char *path = NULL;
393 	nvlist_t *det = NULL;
394 	int rc;
395 	int minorvers = 1;
396 
397 	rferr = fmd_hdl_zalloc(hdl, sizeof (cmd_iorxefrx_t), FMD_SLEEP);
398 
399 	if (nvlist_lookup_pairs(nvl, 0,
400 	    PCI_ECC_AFAR, DATA_TYPE_UINT64, &rferr->rf_afar,
401 	    PCI_ECC_AFSR, DATA_TYPE_UINT64, &rferr->rf_afsr,
402 	    PCI_ECC_SYND, DATA_TYPE_UINT16, &rferr->rf_synd,
403 	    PCI_ECC_TYPE, DATA_TYPE_STRING, &typenm,
404 	    NULL) != 0) {
405 		fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
406 		return (CMD_EVD_BAD);
407 	}
408 
409 	if (nvlist_lookup_uint64(nvl, PCI_ECC_DISP, &rferr->rf_disp) != 0)
410 		minorvers = 0;
411 
412 	rferr->rf_type = cmd_mem_name2type(typenm, minorvers);
413 	rferr->rf_errcl = errcl;
414 
415 	/*
416 	 * Lookup device path of host bridge.
417 	 */
418 	(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR, &det);
419 	(void) nvlist_lookup_string(det, FM_FMRI_DEV_PATH, &path);
420 
421 	/*
422 	 * get Jbus port id from the device path
423 	 */
424 	portid_str = strrchr(path, '@') + 1;
425 	rferr->rf_det_agentid = strtol(portid_str, NULL, 16);
426 
427 	rferr->rf_afsr_agentid = (rferr->rf_afsr &
428 	    SCHIZO_ECC_UE_AFSR_AGENT_MID) >> SCHIZO_ECC_UE_AFSR_AGENT_MID_SHIFT;
429 
430 	/*
431 	 * Only 4 bits of the Jbus AID are sent on the Jbus.  MSB is the one
432 	 * that is chosen not to make the trip.  This is not in any of the Jbus
433 	 * or Tomatillo documents and was discovered during testing and verified
434 	 * by Jalapeno H/W designer.
435 	 */
436 	rferr->rf_afsr_agentid &= 0xf;
437 	rferr->rf_afar_status = AFLT_STAT_VALID;
438 	rferr->rf_synd_status = AFLT_STAT_VALID;
439 
440 	/*
441 	 * Need to send in the io_jpid that we get from the device path above
442 	 * for both the det_agentid and the afsr_agentid, since the CPU does not
443 	 * capture the same address as the bridge.  The bridge has the LSB
444 	 * aliased and the CPU is missing the MSB.
445 	 */
446 	if ((rfmatch = iorxefrx_match(hdl, rferr->rf_errcl, matchmask,
447 	    rferr->rf_det_agentid, rferr->rf_afsr_agentid)) == NULL) {
448 		cmd_iorxefrx_queue(hdl, rferr);
449 		return (CMD_EVD_OK);
450 	}
451 
452 	/* Found a match.  Synthesize an ereport for UE/CE processing. */
453 	fmd_hdl_debug(hdl, "matched %cE %llx with %llx\n", "UC"[isce],
454 	    rferr->rf_errcl, rfmatch->rf_errcl);
455 
456 	hdlr = (isce ? cmd_ce_common : cmd_ue_common);
457 	rc = iorxefrx_synthesize(hdl, ep, nvl, class, rferr->rf_afar,
458 	    rferr->rf_afar_status, rfmatch->rf_afsr, rfmatch->rf_synd,
459 	    rfmatch->rf_synd_status, rferr->rf_type, rferr->rf_disp, hdlr);
460 
461 	cmd_iorxefrx_free(hdl, rfmatch);
462 	fmd_hdl_free(hdl, rferr, sizeof (cmd_iorxefrx_t));
463 
464 	return (rc);
465 }
466 
467 /* IOxE ereports that don't need matching with FRx ereports */
468 static cmd_evdisp_t
ioxe_common(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)469 ioxe_common(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
470     cmd_errcl_t clcode)
471 {
472 	int isce = CMD_ERRCL_MATCH(clcode, CMD_ERRCL_IOCE);
473 	cmd_xe_handler_f *hdlr = isce ? cmd_ce_common : cmd_ue_common;
474 	uint64_t afar;
475 	uint16_t synd;
476 	nvlist_t *rsrc;
477 	char *typenm;
478 	uint64_t disp;
479 	int minorvers = 1;
480 
481 	if (nvlist_lookup_pairs(nvl, 0,
482 	    PCI_ECC_AFAR, DATA_TYPE_UINT64, &afar,
483 	    PCI_ECC_SYND, DATA_TYPE_UINT16, &synd,
484 	    PCI_ECC_TYPE, DATA_TYPE_STRING, &typenm,
485 	    PCI_ECC_RESOURCE, DATA_TYPE_NVLIST, &rsrc,
486 	    NULL) != 0)
487 		return (CMD_EVD_BAD);
488 
489 	if (nvlist_lookup_uint64(nvl, PCI_ECC_DISP, &disp) != 0)
490 		minorvers = 0;
491 
492 	return (hdlr(hdl, ep, nvl, class, afar, AFLT_STAT_VALID, synd,
493 	    AFLT_STAT_VALID, cmd_mem_name2type(typenm, minorvers), disp,
494 	    rsrc));
495 }
496 
497 cmd_evdisp_t
cmd_rxe(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)498 cmd_rxe(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
499     cmd_errcl_t clcode)
500 {
501 	cmd_errcl_t matchmask = (clcode == CMD_ERRCL_RCE ? CMD_ERRCL_FRC :
502 	    CMD_ERRCL_FRU);
503 
504 	return (cmd_rxefrx_common(hdl, ep, nvl, class, clcode, matchmask));
505 }
506 
507 cmd_evdisp_t
cmd_ioxe(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)508 cmd_ioxe(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
509     cmd_errcl_t clcode)
510 {
511 	cmd_errcl_t matchmask = (clcode == CMD_ERRCL_IOCE ? CMD_ERRCL_FRC :
512 	    CMD_ERRCL_FRU);
513 
514 	if (fmd_nvl_class_match(hdl, nvl, "ereport.io.tom.*")) {
515 		return (cmd_ioxefrx_common(hdl, ep, nvl, class, clcode,
516 		    matchmask));
517 	} else  if (fmd_nvl_class_match(hdl, nvl, "ereport.io.fire.*")) {
518 			return (cmd_ioxefrx_fire(hdl, ep, nvl, class, clcode,
519 			    matchmask));
520 	} else
521 		return (ioxe_common(hdl, ep, nvl, class, clcode));
522 }
523 
524 /*ARGSUSED*/
525 cmd_evdisp_t
cmd_ioxe_sec(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,cmd_errcl_t clcode)526 cmd_ioxe_sec(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
527     cmd_errcl_t clcode)
528 {
529 	/*
530 	 * Secondary IOxE's can't be used to identify failed or failing
531 	 * resources, as they don't contain enough information.  Ignore them.
532 	 */
533 	return (CMD_EVD_OK);
534 }
535 
536 /*ARGSUSED*/
537 ulong_t
cmd_mem_get_phys_pages(fmd_hdl_t * hdl)538 cmd_mem_get_phys_pages(fmd_hdl_t *hdl)
539 {
540 	return (sysconf(_SC_PHYS_PAGES));
541 }
542 
543 /*
544  * sun4u bit position as function of e_synd,
545  * from JPS1 Implementation Supplement table P-7
546  * Encode bit positions as follows:
547  * 0-127 data bits 0-127
548  * 128-136 check bits 0-8 (Cn = 128+n)
549  * no error or multibit error = -1 (not valid CE)
550  */
551 
552 int esynd2bit [] = {
553 	-1, 128, 129, -1, 130, -1, -1, 47,
554 	131, -1, -1, 53, -1, 41, 29, -1, /* 000-00F */
555 	132, -1, -1, 50, -1, 38, 25, -1,
556 	-1, 33, 24, -1, 11, -1, -1, 16, /* 010-01F */
557 	133, -1, -1, 46, -1, 37, 19, -1,
558 	-1, 31, 32, -1,  7, -1, -1, 10, /* 020-02F */
559 	-1, 40, 13, -1, 59, -1, -1, 66,
560 	-1, -1, -1,  0, -1, 67, 71, -1, /* 030-03F */
561 	134, -1, -1, 43, -1, 36, 18, -1,
562 	-1, 49, 15, -1, 63, -1, -1,  6, /* 040-04F */
563 	-1, 44, 28, -1, -1, -1, -1, 52,
564 	68, -1, -1, 62, -1, -1, -1, -1, /* 050-05F */
565 	-1, 26, 106, -1, 64, -1, -1,  2,
566 	120, -1, -1, -1, -1, -1, -1, -1, /* 060-06F */
567 	116, -1, -1, -1, -1, -1, -1, -1,
568 	-1, 58, 54, -1, -1, -1, -1, -1, /* 070-07F */
569 	135, -1, -1, 42, -1, 35, 17, -1,
570 	-1, 45, 14, -1, 21, -1, -1,  5, /* 080-08F */
571 	-1, 27, -1, -1, 99, -1, -1,  3,
572 	114, -1, -1, 20, -1, -1, -1, -1, /* 090-09F */
573 	-1, 23, 113, -1, 112, -1, -1, 51,
574 	95, -1, -1, -1, -1, -1, -1, -1, /* 0A0-0AF */
575 	103, -1, -1, -1, -1, -1, -1, -1,
576 	-1, 48, -1, -1, 73, -1, -1, -1, /* 0B0-0BF */
577 	-1, 22, 110, -1, 109, -1, -1,  9,
578 	108, -1, -1, -1, -1, -1, -1, -1, /* 0C0-0CF */
579 	102, -1, -1, -1, -1, -1, -1, -1,
580 	-1, -1, -1, -1, -1, -1, -1, -1, /* 0D0-0DF */
581 	98, -1, -1, -1, -1, -1, -1, -1,
582 	-1, -1, -1, -1, -1, -1, -1, -1, /* 0E0-0EF */
583 	-1, -1, -1, -1, -1, -1, -1, -1,
584 	56, -1, -1, -1, -1, -1, -1, -1, /* 0F0-0FF */
585 	136, -1, -1, 39, -1, 34, 105, -1,
586 	-1, 30, 104, -1, 101, -1, -1,  4, /* 100-10F */
587 	-1, -1, 100, -1, 83, -1, -1, 12,
588 	87, -1, -1, 57, -1, -1, -1, -1, /* 110-11F */
589 	-1, 97, 82, -1, 78, -1, -1,  1,
590 	96, -1, -1, -1, -1, -1, -1, -1, /* 120-12F */
591 	94, -1, -1, -1, -1, -1, -1, -1,
592 	-1, -1, 79, -1, 69, -1, -1, -1, /* 130-13F */
593 	-1, 93, 92, -1, 91, -1, -1,  8,
594 	90, -1, -1, -1, -1, -1, -1, -1, /* 140-14F */
595 	89, -1, -1, -1, -1, -1, -1, -1,
596 	-1, -1, -1, -1, -1, -1, -1, -1, /* 150-15F */
597 	86, -1, -1, -1, -1, -1, -1, -1,
598 	-1, -1, -1, -1, -1, -1, -1, -1, /* 160-16F */
599 	-1, -1, -1, -1, -1, -1, -1, -1,
600 	60, -1, -1, -1, -1, -1, -1, -1, /* 170-17F */
601 	-1, 88, 85, -1, 84, -1, -1, 55,
602 	81, -1, -1, -1, -1, -1, -1, -1, /* 180-18F */
603 	77, -1, -1, -1, -1, -1, -1, -1,
604 	-1, -1, -1, -1, -1, -1, -1, -1, /* 190-19F */
605 	74, -1, -1, -1, -1, -1, -1, -1,
606 	-1, -1, -1, -1, -1, -1, -1, -1, /* 1A0-1AF */
607 	-1, 70, 107, -1, 65, -1, -1, -1,
608 	127, -1, -1, -1, -1, -1, -1, -1, /* 1B0-1BF */
609 	80, -1, -1, 72, -1, 119, 118, -1,
610 	-1, 126, 76, -1, 125, -1, -1, -1, /* 1C0-1CF */
611 	-1, 115, 124, -1, 75, -1, -1, -1,
612 	61, -1, -1, -1, -1, -1, -1, -1, /* 1D0-1DF */
613 	-1, 123, 122, -1, 121, -1, -1, -1,
614 	117, -1, -1, -1, -1, -1, -1, -1, /* 1E0-1EF */
615 	111, -1, -1, -1, -1, -1, -1, -1,
616 	-1, -1, -1, -1, -1, -1, -1, -1  /* 1F0-1FF */
617 };
618 
619 int msynd2bit [] = {  /* msynd 0-F */
620 	-1, 140, 141,  -1,
621 	142, -1,  -1, 137,
622 	143, -1,  -1, 138,
623 	-1, 139,  -1,  -1
624 };
625 
626 int
cmd_synd2upos(uint16_t syndrome)627 cmd_synd2upos(uint16_t syndrome)
628 {
629 	return (esynd2bit[syndrome]);
630 }
631 
632 const char *fmd_fmri_get_platform();
633 
634 #define	DP_MAX	25
635 
636 const char *slotname[] = {
637 	"Slot A", "Slot B", "Slot C", "Slot D"};
638 
639 typedef struct fault_info {
640 	uint32_t id;
641 	int count;
642 } fault_info_t;
643 
644 struct plat2id_map {
645 	char *platnm;
646 	int id;
647 } id_plat[] = {
648 	{"SUNW,Sun-Fire-15000",		1},
649 	{"SUNW,Sun-Fire",		2},
650 	{"SUNW,Netra-T12",		2},
651 	{"SUNW,Sun-Fire-480R",		3},
652 	{"SUNW,Sun-Fire-V490",		3},
653 	{"SUNW,Sun-Fire-V440",		3},
654 	{"SUNW,Sun-Fire-V445",		3},
655 	{"SUNW,Netra-440",		3},
656 	{"SUNW,Sun-Fire-880",		4},
657 	{"SUNW,Sun-Fire-V890",		4},
658 	{NULL,				0}
659 };
660 
661 /*ARGSUSED*/
662 void
cmd_to_hashed_addr(uint64_t * addr,uint64_t afar,const char * class)663 cmd_to_hashed_addr(uint64_t *addr, uint64_t afar, const char *class)
664 {
665 	*addr = afar;
666 }
667 
668 /*ARGSUSED*/
669 int
cmd_same_datapath_dimms(cmd_dimm_t * d1,cmd_dimm_t * d2)670 cmd_same_datapath_dimms(cmd_dimm_t *d1, cmd_dimm_t *d2)
671 {
672 	return (1);
673 }
674 
675 static int
cmd_get_platform()676 cmd_get_platform()
677 {
678 	const char *platname;
679 	int id = -1;
680 	int i;
681 
682 	platname = fmd_fmri_get_platform();
683 	for (i = 0; id_plat[i].platnm != NULL; i++) {
684 		if (strcmp(platname, id_plat[i].platnm) == 0) {
685 			id = id_plat[i].id;
686 			break;
687 		}
688 	}
689 	return (id);
690 }
691 
692 static int
cmd_get_boardid(uint32_t cpuid)693 cmd_get_boardid(uint32_t cpuid)
694 {
695 	int boardid;
696 	int id = cmd_get_platform();
697 
698 	switch (id) {
699 	case 1:
700 		boardid = ((cpuid >> 5) & 0x1f);
701 		break;
702 	case 2:
703 		boardid = ((cpuid & 0x1f) / 4);
704 		break;
705 
706 	case 3:
707 		cpuid = cpuid & 0x07;
708 		boardid = ((cpuid % 2) == 0) ? 0 : 1;
709 		break;
710 	case 4:
711 		cpuid = cpuid & 0x07;
712 		if ((cpuid % 2) == 0)
713 			boardid = (cpuid < 4) ? 0 : 2;
714 		else
715 			boardid = (cpuid < 5) ? 1 : 3;
716 		break;
717 	default:
718 		boardid = 5;
719 		break;
720 	}
721 
722 	return (boardid);
723 }
724 
725 static void
cmd_get_faulted_comp(fmd_hdl_t * hdl,cmd_dimm_t * d1,cmd_dimm_t * d2,uint16_t upos,fault_info_t ** fault_list,int cpu)726 cmd_get_faulted_comp(fmd_hdl_t *hdl, cmd_dimm_t *d1, cmd_dimm_t *d2,
727     uint16_t upos, fault_info_t **fault_list, int cpu)
728 {
729 	cmd_mq_t *ip;
730 	int i, j, k, idj;
731 	uint32_t id;
732 	uint32_t *cpuid = NULL;
733 	int max_rpt;
734 
735 	max_rpt = 2 * cmd.cmd_nupos;
736 
737 	cpuid = fmd_hdl_alloc(hdl, max_rpt * sizeof (uint32_t), FMD_SLEEP);
738 
739 	if (cpuid == NULL)
740 		return;
741 
742 	for (i = 0, j = 0; i < CMD_MAX_CKWDS; i++) {
743 		for (ip = cmd_list_next(&d1->mq_root[i]); ip != NULL;
744 		    ip = cmd_list_next(ip)) {
745 			if (upos == ip->mq_unit_position) {
746 				cpuid[j] = ip->mq_cpuid;
747 				j++;
748 			}
749 			if (j >= cmd.cmd_nupos)
750 				break;
751 		}
752 		if (j >= cmd.cmd_nupos)
753 			break;
754 	}
755 
756 	for (i = 0; i < CMD_MAX_CKWDS; i++) {
757 		for (ip = cmd_list_next(&d2->mq_root[i]); ip != NULL;
758 		    ip = cmd_list_next(ip)) {
759 			if (upos == ip->mq_unit_position) {
760 				cpuid[j] = ip->mq_cpuid;
761 				j++;
762 			}
763 			if (j >= max_rpt)
764 				break;
765 		}
766 		if (j >= max_rpt)
767 			break;
768 	}
769 
770 	for (i = 0, k = 0; i < max_rpt; i++) {
771 		if (cpuid[i] == ULONG_MAX)
772 			continue;
773 		id = (cpu == 0) ? cmd_get_boardid(cpuid[i]) : cpuid[i];
774 		fault_list[k] = fmd_hdl_alloc(hdl,
775 		    sizeof (fault_info_t), FMD_SLEEP);
776 		if (fault_list[k] == NULL)
777 			break;
778 		fault_list[k]->count = 1;
779 		fault_list[k]->id = id;
780 		for (j = i + 1; j < max_rpt; j++) {
781 			if (cpuid[j] == ULONG_MAX)
782 				continue;
783 			idj = (cpu == 0) ? cmd_get_boardid(cpuid[j]) : cpuid[j];
784 			if (id == idj) {
785 				fault_list[k]->count++;
786 				cpuid[j] = ULONG_MAX;
787 			}
788 		}
789 		k++;
790 	}
791 
792 	fmd_hdl_free(hdl, cpuid, max_rpt * sizeof (uint32_t));
793 }
794 
795 /*ARGSUSED*/
796 static nvlist_t *
cmd_board_mkfru(fmd_hdl_t * hdl,char * frustr)797 cmd_board_mkfru(fmd_hdl_t *hdl, char *frustr)
798 {
799 	nvlist_t *hcel, *fru;
800 	int err;
801 
802 	if (frustr == NULL)
803 		return (NULL);
804 
805 	if (nvlist_alloc(&hcel, NV_UNIQUE_NAME, 0) != 0)
806 		return (NULL);
807 
808 	err = nvlist_add_string(hcel, FM_FMRI_HC_NAME,
809 	    FM_FMRI_LEGACY_HC);
810 	err |= nvlist_add_string(hcel, FM_FMRI_HC_ID, frustr);
811 	if (err != 0) {
812 		nvlist_free(hcel);
813 		return (NULL);
814 	}
815 
816 	if (nvlist_alloc(&fru, NV_UNIQUE_NAME, 0) != 0) {
817 		nvlist_free(hcel);
818 		return (NULL);
819 	}
820 	err = nvlist_add_uint8(fru, FM_VERSION, FM_HC_SCHEME_VERSION);
821 	err |= nvlist_add_string(fru, FM_FMRI_SCHEME,
822 	    FM_FMRI_SCHEME_HC);
823 	err |= nvlist_add_string(fru, FM_FMRI_HC_ROOT, "");
824 	err |= nvlist_add_uint32(fru, FM_FMRI_HC_LIST_SZ, 1);
825 	err |= nvlist_add_nvlist_array(fru, FM_FMRI_HC_LIST, &hcel, 1);
826 	if (err != 0) {
827 		nvlist_free(fru);
828 		nvlist_free(hcel);
829 		return (NULL);
830 	}
831 	nvlist_free(hcel);
832 	return (fru);
833 }
834 
835 /*
836  * Startcat, Serengeti, V4xx, and V8xx: fault the system boards of
837  * the detectors in proportion to the number of ereports out of 8
838  * Other systems: fault the detectors in proportion to the number of
839  * ereports out of 8
840  */
841 void
cmd_gen_datapath_fault(fmd_hdl_t * hdl,cmd_dimm_t * d1,cmd_dimm_t * d2,uint16_t upos,nvlist_t * det)842 cmd_gen_datapath_fault(fmd_hdl_t *hdl, cmd_dimm_t *d1, cmd_dimm_t *d2,
843     uint16_t upos, nvlist_t *det)
844 {
845 	char frustr[DP_MAX];
846 	fmd_case_t *cp;
847 	int i, ratio, type, fault_cpu, max_rpt;
848 	uint32_t id;
849 	uint8_t cpumask;
850 	char *cpustr;
851 	fault_info_t **fault_list = NULL;
852 	nvlist_t *fru = NULL, *asru = NULL, *flt = NULL;
853 
854 	max_rpt = cmd.cmd_nupos * 2;
855 	fault_list = fmd_hdl_alloc(hdl,
856 	    max_rpt * sizeof (fault_info_t *), FMD_SLEEP);
857 
858 	if (fault_list == NULL)
859 		return;
860 
861 	for (i = 0; i < max_rpt; i++)
862 		fault_list[i] = NULL;
863 
864 	type = cmd_get_platform();
865 
866 	fault_cpu = (type == -1) ? 1 : 0;
867 
868 	cmd_get_faulted_comp(hdl, d1, d2, upos, fault_list, fault_cpu);
869 
870 	cp = fmd_case_open(hdl, NULL);
871 
872 	for (i = 0; i < max_rpt; i++) {
873 		if (fault_list[i] == NULL)
874 			continue;
875 		id = fault_list[i]->id;
876 
877 		switch (type) {
878 		case 1:
879 			(void) snprintf(frustr, DP_MAX, "EX%d", id);
880 			break;
881 		case 2:
882 			(void) snprintf(frustr, DP_MAX, "/N0/SB%d", id);
883 			break;
884 		case 3:
885 		case 4:
886 			(void) snprintf(frustr, DP_MAX, slotname[id]);
887 			break;
888 		default:
889 			cpustr = cmd_cpu_getfrustr_by_id(hdl, id);
890 			if (nvlist_lookup_uint8(det, FM_FMRI_CPU_MASK, &cpumask)
891 			    == 0) {
892 				asru = cmd_cpu_fmri_create(id, cpumask);
893 				(void) fmd_nvl_fmri_expand(hdl, asru);
894 			}
895 			break;
896 		}
897 
898 		ratio = (fault_list[i]->count * 100) / (cmd.cmd_nupos * 2);
899 
900 		if (fault_cpu) {
901 			fru = cmd_cpu_mkfru(hdl, cpustr, NULL, NULL);
902 			fmd_hdl_strfree(hdl, cpustr);
903 			if (fru == NULL) {
904 				nvlist_free(asru);
905 				break;
906 			}
907 			flt = cmd_nvl_create_fault(hdl, "fault.memory.datapath",
908 			    ratio, asru, fru, asru);
909 			nvlist_free(asru);
910 		} else {
911 			fru = cmd_board_mkfru(hdl, frustr);
912 			if (fru == NULL)
913 				break;
914 			flt = cmd_nvl_create_fault(hdl, "fault.memory.datapath",
915 			    ratio, fru, fru, fru);
916 		}
917 
918 		fmd_case_add_suspect(hdl, cp, flt);
919 
920 		/* free up memory */
921 		nvlist_free(fru);
922 	}
923 
924 	fmd_case_solve(hdl, cp);
925 
926 	for (i = 0; i < max_rpt; i++) {
927 		if (fault_list[i] != NULL)
928 			fmd_hdl_free(hdl, fault_list[i], sizeof (fault_info_t));
929 	}
930 
931 	fmd_hdl_free(hdl, fault_list, sizeof (fault_info_t *) * max_rpt);
932 }
933