1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25/*
26 * PCI ECC support
27 */
28
29#include <sys/types.h>
30#include <sys/systm.h>		/* for strrchr */
31#include <sys/kmem.h>
32#include <sys/sunddi.h>
33#include <sys/intr.h>
34#include <sys/async.h>		/* struct async_flt */
35#include <sys/ddi_impldefs.h>
36#include <sys/machsystm.h>
37#include <sys/sysmacros.h>
38#include <sys/fm/protocol.h>
39#include <sys/fm/util.h>
40#include <sys/fm/io/pci.h>
41#include <sys/fm/io/sun4upci.h>
42#include <sys/fm/io/ddi.h>
43#include <sys/pci/pci_obj.h>	/* ld/st physio */
44#include <sys/cpuvar.h>
45#include <sys/errclassify.h>
46#include <sys/cpu_module.h>
47#include <sys/async.h>
48
49/*LINTLIBRARY*/
50
51static void ecc_disable(ecc_t *, int);
52static void ecc_delayed_ce(void *);
53static uint64_t ecc_read_afsr(ecc_intr_info_t *);
54static void ecc_ereport_post(dev_info_t *dip, ecc_errstate_t *ecc_err);
55
56clock_t pci_ecc_panic_delay = 200;
57int ecc_ce_delay_secs = 6;	/* number of sec to delay reenabling of CEs */
58int ecc_ce_delayed = 1;		/* global for enabling/disabling CE delay */
59
60void
61ecc_create(pci_t *pci_p)
62{
63#ifdef DEBUG
64	dev_info_t *dip = pci_p->pci_dip;
65#endif
66	uint64_t cb_base_pa = pci_p->pci_cb_p->cb_base_pa;
67	ecc_t *ecc_p;
68
69	ecc_p = (ecc_t *)kmem_zalloc(sizeof (ecc_t), KM_SLEEP);
70	ecc_p->ecc_pci_cmn_p = pci_p->pci_common_p;
71	pci_p->pci_ecc_p = ecc_p;
72
73	ecc_p->ecc_ue.ecc_p = ecc_p;
74	ecc_p->ecc_ue.ecc_type = CBNINTR_UE;
75	ecc_p->ecc_ce.ecc_p = ecc_p;
76	ecc_p->ecc_ce.ecc_type = CBNINTR_CE;
77
78	pci_ecc_setup(ecc_p);
79
80	/*
81	 * Determine the virtual addresses of the streaming cache
82	 * control/status and flush registers.
83	 */
84	ecc_p->ecc_csr_pa = cb_base_pa + COMMON_ECC_CSR_OFFSET;
85	ecc_p->ecc_ue.ecc_afsr_pa = cb_base_pa + COMMON_UE_AFSR_OFFSET;
86	ecc_p->ecc_ue.ecc_afar_pa = cb_base_pa + COMMON_UE_AFAR_OFFSET;
87	ecc_p->ecc_ce.ecc_afsr_pa = cb_base_pa + COMMON_CE_AFSR_OFFSET;
88	ecc_p->ecc_ce.ecc_afar_pa = cb_base_pa + COMMON_CE_AFAR_OFFSET;
89
90	DEBUG1(DBG_ATTACH, dip, "ecc_create: csr=%x\n", ecc_p->ecc_csr_pa);
91	DEBUG2(DBG_ATTACH, dip, "ecc_create: ue_afsr=%x, ue_afar=%x\n",
92	    ecc_p->ecc_ue.ecc_afsr_pa, ecc_p->ecc_ue.ecc_afar_pa);
93	DEBUG2(DBG_ATTACH, dip, "ecc_create: ce_afsr=%x, ce_afar=%x\n",
94	    ecc_p->ecc_ce.ecc_afsr_pa, ecc_p->ecc_ce.ecc_afar_pa);
95
96	ecc_configure(pci_p);
97
98	/*
99	 * Register routines to be called from system error handling code.
100	 */
101	bus_func_register(BF_TYPE_ERRDIS, (busfunc_t)ecc_disable_nowait, ecc_p);
102}
103
104int
105ecc_register_intr(pci_t *pci_p)
106{
107	ecc_t *ecc_p = pci_p->pci_ecc_p;
108	int ret;
109
110	/*
111	 * Install the UE and CE error interrupt handlers.
112	 */
113	if ((ret = pci_ecc_add_intr(pci_p, CBNINTR_UE, &ecc_p->ecc_ue)) !=
114	    DDI_SUCCESS)
115		return (ret);
116	if ((ret = pci_ecc_add_intr(pci_p, CBNINTR_CE, &ecc_p->ecc_ce)) !=
117	    DDI_SUCCESS)
118		return (ret);
119
120	return (DDI_SUCCESS);
121}
122
123void
124ecc_destroy(pci_t *pci_p)
125{
126	ecc_t *ecc_p = pci_p->pci_ecc_p;
127
128	DEBUG0(DBG_DETACH, pci_p->pci_dip, "ecc_destroy:\n");
129
130	/*
131	 * Disable UE and CE ECC error interrupts.
132	 */
133	ecc_disable_wait(ecc_p);
134
135	/*
136	 * Remove the ECC interrupt handlers.
137	 */
138	pci_ecc_rem_intr(pci_p, CBNINTR_UE, &ecc_p->ecc_ue);
139	pci_ecc_rem_intr(pci_p, CBNINTR_CE, &ecc_p->ecc_ce);
140
141	/*
142	 * Unregister our error handling functions.
143	 */
144	bus_func_unregister(BF_TYPE_ERRDIS,
145	    (busfunc_t)ecc_disable_nowait, ecc_p);
146	/*
147	 * If a timer has been set, unset it.
148	 */
149	(void) untimeout(ecc_p->ecc_to_id);
150
151	kmem_free(ecc_p, sizeof (ecc_t));
152	pci_p->pci_ecc_p = NULL;
153}
154
155void
156ecc_configure(pci_t *pci_p)
157{
158	ecc_t *ecc_p = pci_p->pci_ecc_p;
159	dev_info_t *dip = pci_p->pci_dip;
160	uint64_t l;
161
162	/*
163	 * Clear any pending ECC errors.
164	 */
165	DEBUG0(DBG_ATTACH, dip, "ecc_configure: clearing UE and CE errors\n");
166	l = (COMMON_ECC_UE_AFSR_E_MASK << COMMON_ECC_UE_AFSR_PE_SHIFT) |
167	    (COMMON_ECC_UE_AFSR_E_MASK << COMMON_ECC_UE_AFSR_SE_SHIFT);
168	stdphysio(ecc_p->ecc_ue.ecc_afsr_pa, l);
169
170	l = (COMMON_ECC_CE_AFSR_E_MASK << COMMON_ECC_CE_AFSR_PE_SHIFT) |
171	    (COMMON_ECC_CE_AFSR_E_MASK << COMMON_ECC_CE_AFSR_SE_SHIFT);
172	stdphysio(ecc_p->ecc_ce.ecc_afsr_pa, l);
173
174	/*
175	 * Enable ECC error detections via the control register.
176	 */
177	DEBUG0(DBG_ATTACH, dip, "ecc_configure: enabling UE CE detection\n");
178	l = COMMON_ECC_CTRL_ECC_EN;
179	if (ecc_error_intr_enable)
180		l |= COMMON_ECC_CTRL_UE_INTEN | COMMON_ECC_CTRL_CE_INTEN;
181	stdphysio(ecc_p->ecc_csr_pa, l);
182}
183
184void
185ecc_enable_intr(pci_t *pci_p)
186{
187	cb_enable_nintr(pci_p, CBNINTR_UE);
188	cb_enable_nintr(pci_p, CBNINTR_CE);
189}
190
191void
192ecc_disable_wait(ecc_t *ecc_p)
193{
194	ecc_disable(ecc_p, IB_INTR_WAIT);
195}
196
197uint_t
198ecc_disable_nowait(ecc_t *ecc_p)
199{
200	ecc_disable(ecc_p, IB_INTR_NOWAIT);
201	return (BF_NONE);
202}
203
204static void
205ecc_disable(ecc_t *ecc_p, int wait)
206{
207	cb_t *cb_p = ecc_p->ecc_pci_cmn_p->pci_common_cb_p;
208	uint64_t csr_pa = ecc_p->ecc_csr_pa;
209	uint64_t csr = lddphysio(csr_pa);
210
211	csr &= ~(COMMON_ECC_CTRL_UE_INTEN | COMMON_ECC_CTRL_CE_INTEN);
212	stdphysio(csr_pa, csr);
213
214	cb_disable_nintr(cb_p, CBNINTR_UE, wait);
215	cb_disable_nintr(cb_p, CBNINTR_CE, wait);
216}
217
218/*
219 * I/O ECC error handling:
220 *
221 * Below are the generic functions that handle PCI(pcisch, pcipsy) detected
222 * ECC errors.
223 *
224 * The registered interrupt handler for both pcisch and pcipsy is ecc_intr(),
225 * it's function is to receive the error, capture some state, and pass that on
226 * to the ecc_err_handler() for reporting purposes.
227 *
228 * ecc_err_handler() gathers more state(via ecc_errstate_get) and attempts
229 * to handle and report the error. ecc_err_handler() must determine if we need
230 * to panic due to this error (via pci_ecc_classify, which also decodes the
231 * ECC afsr), and if any side effects exist that may have caused or are due
232 * to this error. PBM errors related to the ECC error may exist, to report
233 * them we call pci_pbm_err_handler() and call ndi_fm_handler_dispatch() so
234 * that the child devices can log their pci errors.
235 *
236 * To report the error we must also get the syndrome and unum, which can not
237 * be done in high level interrupted context. Therefore we have an error
238 * queue(pci_ecc_queue) which we dispatch errors to, to report the errors
239 * (ecc_err_drain()).
240 *
241 * ecc_err_drain() will be called when either the softint is triggered
242 * or the system is panicing. Either way it will gather more information
243 * about the error from the CPU(via ecc_cpu_call(), ecc.c), attempt to
244 * retire the faulty page(if error is a UE), and report the detected error.
245 *
246 * ecc_delayed_ce() is called via timeout from ecc_err_handler() following
247 * the receipt of a CE interrupt.  It will be called after 6ms and check to
248 * see if any new CEs are present, if so we will log and another timeout will
249 * be set by(ecc_err_handler()).  If no CEs are present then it will re-enable
250 * CEs by clearing the previous interrupt.  This is to keep the system going
251 * in the event of a CE storm.
252 */
253
254/*
255 * Function used to get ECC AFSR register
256 */
257static uint64_t
258ecc_read_afsr(ecc_intr_info_t *ecc_ii_p)
259{
260	uint_t i;
261	uint64_t afsr = 0ull;
262
263	ASSERT((ecc_ii_p->ecc_type == CBNINTR_UE) ||
264	    (ecc_ii_p->ecc_type == CBNINTR_CE));
265	if (!ecc_ii_p->ecc_errpndg_mask)
266		return (lddphysio(ecc_ii_p->ecc_afsr_pa));
267
268	for (i = 0; i < pci_ecc_afsr_retries; i++) {
269
270		/*
271		 * If we timeout, the logging routine will
272		 * know because it will see the ERRPNDG bits
273		 * set in the AFSR.
274		 */
275		afsr = lddphysio(ecc_ii_p->ecc_afsr_pa);
276		if ((afsr & ecc_ii_p->ecc_errpndg_mask) == 0)
277			break;
278	}
279	return (afsr);
280}
281
282/*
283 * IO detected ECC error interrupt handler, calls ecc_err_handler to post
284 * error reports and handle the interrupt. Re-entry into ecc_err_handler
285 * is protected by the per-chip mutex pci_fm_mutex.
286 */
287uint_t
288ecc_intr(caddr_t a)
289{
290	ecc_intr_info_t *ecc_ii_p = (ecc_intr_info_t *)a;
291	ecc_t *ecc_p = ecc_ii_p->ecc_p;
292	pci_common_t *cmn_p = ecc_p->ecc_pci_cmn_p;
293	ecc_errstate_t ecc_err;
294	int ret = DDI_FM_OK;
295
296	bzero(&ecc_err, sizeof (ecc_errstate_t));
297	ecc_err.ecc_ena = fm_ena_generate(0, FM_ENA_FMT1);
298	ecc_err.ecc_ii_p = *ecc_ii_p;
299	ecc_err.ecc_p = ecc_p;
300	ecc_err.ecc_caller = PCI_ECC_CALL;
301
302	mutex_enter(&cmn_p->pci_fm_mutex);
303	ret = ecc_err_handler(&ecc_err);
304	mutex_exit(&cmn_p->pci_fm_mutex);
305	if (ret == DDI_FM_FATAL) {
306		/*
307		 * Need delay here to allow CPUs to handle related traps,
308		 * such as FRUs for USIIIi systems.
309		 */
310		DELAY(pci_ecc_panic_delay);
311		fm_panic("Fatal PCI UE Error");
312	}
313
314	return (DDI_INTR_CLAIMED);
315}
316
317/*
318 * Function used to gather IO ECC error state.
319 */
320static void
321ecc_errstate_get(ecc_errstate_t *ecc_err_p)
322{
323	ecc_t *ecc_p;
324	uint_t bus_id;
325
326	ASSERT(ecc_err_p);
327
328	ecc_p = ecc_err_p->ecc_ii_p.ecc_p;
329	bus_id = ecc_p->ecc_pci_cmn_p->pci_common_id;
330
331	ASSERT(MUTEX_HELD(&ecc_p->ecc_pci_cmn_p->pci_fm_mutex));
332	/*
333	 * Read the fault registers.
334	 */
335	ecc_err_p->ecc_afsr = ecc_read_afsr(&ecc_err_p->ecc_ii_p);
336	ecc_err_p->ecc_afar = lddphysio(ecc_err_p->ecc_ii_p.ecc_afar_pa);
337
338	ecc_err_p->ecc_offset = ((ecc_err_p->ecc_afsr &
339	    ecc_err_p->ecc_ii_p.ecc_offset_mask) >>
340	    ecc_err_p->ecc_ii_p.ecc_offset_shift) <<
341	    ecc_err_p->ecc_ii_p.ecc_size_log2;
342
343	ecc_err_p->ecc_aflt.flt_id = gethrtime();
344	ecc_err_p->ecc_aflt.flt_stat = ecc_err_p->ecc_afsr;
345	ecc_err_p->ecc_aflt.flt_addr = P2ALIGN(ecc_err_p->ecc_afar, 64) +
346	    ecc_err_p->ecc_offset;
347	ecc_err_p->ecc_aflt.flt_bus_id = bus_id;
348	ecc_err_p->ecc_aflt.flt_inst = CPU->cpu_id;
349	ecc_err_p->ecc_aflt.flt_status = ECC_IOBUS;
350	ecc_err_p->ecc_aflt.flt_in_memory =
351	    (pf_is_memory(ecc_err_p->ecc_afar >> MMU_PAGESHIFT))? 1: 0;
352	ecc_err_p->ecc_aflt.flt_class = BUS_FAULT;
353}
354
355/*
356 * ecc_pci_check: Called by ecc_err_handler() this function is responsible
357 * for calling pci_pbm_err_handler() for both sides of the schizo/psycho
358 * and calling their children error handlers(via ndi_fm_handler_dispatch()).
359 */
360static int
361ecc_pci_check(ecc_t *ecc_p, uint64_t fme_ena)
362{
363	ddi_fm_error_t derr;
364	int i;
365	int ret;
366
367	ASSERT(MUTEX_HELD(&ecc_p->ecc_pci_cmn_p->pci_fm_mutex));
368
369	bzero(&derr, sizeof (ddi_fm_error_t));
370	derr.fme_version = DDI_FME_VERSION;
371	derr.fme_ena = fme_ena;
372	ret = DDI_FM_NONFATAL;
373
374	/*
375	 * Need to report any PBM errors which may have caused or
376	 * resulted from this error.
377	 *
378	 * Each psycho or schizo is represented by a pair of pci nodes
379	 * in the device tree.
380	 */
381	for (i = 0; i < 2; i++) {
382		dev_info_t *dip;
383		pci_t *pci_p;
384
385		/* Make sure PBM PCI node exists */
386		pci_p = ecc_p->ecc_pci_cmn_p->pci_p[i];
387		if (pci_p == NULL)
388			continue;
389
390		dip = pci_p->pci_dip;
391		if (pci_pbm_err_handler(dip, &derr, (void *)pci_p,
392		    PCI_ECC_CALL) == DDI_FM_FATAL)
393			ret = DDI_FM_FATAL;
394	}
395	if (ret == DDI_FM_FATAL)
396		return (DDI_FM_FATAL);
397	else
398		return (DDI_FM_NONFATAL);
399}
400
401/*
402 * Function used to handle and log IO detected ECC errors, can be called by
403 * ecc_intr and pci_err_callback(trap callback). Protected by pci_fm_mutex.
404 */
405int
406ecc_err_handler(ecc_errstate_t *ecc_err_p)
407{
408	uint64_t pri_err, sec_err;
409	ecc_intr_info_t *ecc_ii_p = &ecc_err_p->ecc_ii_p;
410	ecc_t *ecc_p = ecc_ii_p->ecc_p;
411	pci_t *pci_p;
412	cb_t *cb_p;
413	int fatal = 0;
414	int nonfatal = 0;
415	ecc_errstate_t ecc_sec_err;
416	uint64_t sec_tmp;
417	int i;
418	uint64_t afsr_err[] = { COMMON_ECC_AFSR_E_PIO,
419				COMMON_ECC_AFSR_E_DRD,
420				COMMON_ECC_AFSR_E_DWR };
421
422
423	ASSERT(MUTEX_HELD(&ecc_p->ecc_pci_cmn_p->pci_fm_mutex));
424
425	pci_p = ecc_p->ecc_pci_cmn_p->pci_p[0];
426	if (pci_p == NULL)
427		pci_p = ecc_p->ecc_pci_cmn_p->pci_p[1];
428
429	cb_p = ecc_p->ecc_pci_cmn_p->pci_common_cb_p;
430
431	ecc_errstate_get(ecc_err_p);
432	pri_err = (ecc_err_p->ecc_afsr >> COMMON_ECC_UE_AFSR_PE_SHIFT) &
433	    COMMON_ECC_UE_AFSR_E_MASK;
434
435	sec_err = (ecc_err_p->ecc_afsr >> COMMON_ECC_UE_AFSR_SE_SHIFT) &
436	    COMMON_ECC_UE_AFSR_E_MASK;
437
438	switch (ecc_ii_p->ecc_type) {
439	case CBNINTR_UE:
440		if (pri_err) {
441			ecc_err_p->ecc_aflt.flt_synd =
442			    pci_ecc_get_synd(ecc_err_p->ecc_afsr);
443			ecc_err_p->ecc_pri = 1;
444			pci_ecc_classify(pri_err, ecc_err_p);
445			errorq_dispatch(pci_ecc_queue, (void *)ecc_err_p,
446			    sizeof (ecc_errstate_t),
447			    ecc_err_p->ecc_aflt.flt_panic);
448		}
449		if (sec_err) {
450			ecc_sec_err = *ecc_err_p;
451			ecc_sec_err.ecc_pri = 0;
452			/*
453			 * Secondary errors are cumulative so we need to loop
454			 * through to capture them all.
455			 */
456			for (i = 0; i < 3; i++) {
457				sec_tmp = sec_err & afsr_err[i];
458				if (sec_tmp) {
459					pci_ecc_classify(sec_tmp, &ecc_sec_err);
460					ecc_ereport_post(pci_p->pci_dip,
461					    &ecc_sec_err);
462				}
463			}
464		}
465		/*
466		 * Check for PCI bus errors that may have resulted from or
467		 * caused this UE.
468		 */
469		if (ecc_err_p->ecc_caller == PCI_ECC_CALL &&
470		    ecc_pci_check(ecc_p, ecc_err_p->ecc_ena) == DDI_FM_FATAL)
471			ecc_err_p->ecc_aflt.flt_panic = 1;
472
473		if (ecc_err_p->ecc_aflt.flt_panic &&
474		    ecc_err_p->ecc_aflt.flt_in_memory)
475			panic_aflt = ecc_err_p->ecc_aflt;
476
477		if (ecc_err_p->ecc_aflt.flt_panic) {
478			/*
479			 * Disable all further errors since this will be
480			 * treated as a fatal error.
481			 */
482			(void) ecc_disable_nowait(ecc_p);
483			fatal++;
484		}
485		break;
486
487	case CBNINTR_CE:
488		if (pri_err) {
489			ecc_err_p->ecc_pri = 1;
490			pci_ecc_classify(pri_err, ecc_err_p);
491			ecc_err_p->ecc_aflt.flt_synd =
492			    pci_ecc_get_synd(ecc_err_p->ecc_afsr);
493			ce_scrub(&ecc_err_p->ecc_aflt);
494			errorq_dispatch(pci_ecc_queue, (void *)ecc_err_p,
495			    sizeof (ecc_errstate_t), ERRORQ_ASYNC);
496			nonfatal++;
497		}
498		if (sec_err) {
499			ecc_sec_err = *ecc_err_p;
500			ecc_sec_err.ecc_pri = 0;
501			/*
502			 * Secondary errors are cumulative so we need to loop
503			 * through to capture them all.
504			 */
505			for (i = 0; i < 3; i++) {
506				sec_tmp = sec_err & afsr_err[i];
507				if (sec_tmp) {
508					pci_ecc_classify(sec_tmp, &ecc_sec_err);
509					ecc_ereport_post(pci_p->pci_dip,
510					    &ecc_sec_err);
511				}
512			}
513			nonfatal++;
514		}
515		break;
516
517	default:
518		return (DDI_FM_OK);
519	}
520	/* Clear the errors */
521	stdphysio(ecc_ii_p->ecc_afsr_pa, ecc_err_p->ecc_afsr);
522	/*
523	 * Clear the interrupt if called by ecc_intr and UE error or if called
524	 * by ecc_intr and CE error and delayed CE interrupt handling is
525	 * turned off.
526	 */
527	if ((ecc_err_p->ecc_caller == PCI_ECC_CALL &&
528	    ecc_ii_p->ecc_type == CBNINTR_UE && !fatal) ||
529	    (ecc_err_p->ecc_caller == PCI_ECC_CALL &&
530	    ecc_ii_p->ecc_type == CBNINTR_CE && !ecc_ce_delayed))
531		cb_clear_nintr(cb_p, ecc_ii_p->ecc_type);
532	if (!fatal && !nonfatal)
533		return (DDI_FM_OK);
534	else if (fatal)
535		return (DDI_FM_FATAL);
536	return (DDI_FM_NONFATAL);
537}
538
539/*
540 * Called from ecc_err_drain below for CBINTR_CE case.
541 */
542static int
543ecc_err_cexdiag(ecc_errstate_t *ecc_err, errorq_elem_t *eqep)
544{
545	struct async_flt *ecc = &ecc_err->ecc_aflt;
546	uint64_t errors;
547
548	if (page_retire_check(ecc->flt_addr, &errors) == EINVAL) {
549		CE_XDIAG_SETSKIPCODE(ecc->flt_disp, CE_XDIAG_SKIP_NOPP);
550		return (0);
551	} else if (errors != PR_OK) {
552		CE_XDIAG_SETSKIPCODE(ecc->flt_disp, CE_XDIAG_SKIP_PAGEDET);
553		return (0);
554	} else {
555		return (ce_scrub_xdiag_recirc(ecc, pci_ecc_queue, eqep,
556		    offsetof(ecc_errstate_t, ecc_aflt)));
557	}
558}
559
560/*
561 * Function used to drain pci_ecc_queue, either during panic or after softint
562 * is generated, to log IO detected ECC errors.
563 */
564/*ARGSUSED*/
565void
566ecc_err_drain(void *not_used, ecc_errstate_t *ecc_err, errorq_elem_t *eqep)
567{
568	struct async_flt *ecc = &ecc_err->ecc_aflt;
569	pci_t *pci_p = ecc_err->ecc_p->ecc_pci_cmn_p->pci_p[0];
570	int ecc_type = ecc_err->ecc_ii_p.ecc_type;
571
572	if (pci_p == NULL)
573		pci_p = ecc_err->ecc_p->ecc_pci_cmn_p->pci_p[1];
574
575	if (ecc->flt_class == RECIRC_BUS_FAULT) {
576		/*
577		 * Perform any additional actions that occur after the
578		 * ecc_err_cexdiag below and post the ereport.
579		 */
580		ecc->flt_class = BUS_FAULT;
581		ecc_err->ecc_err_type = flt_to_error_type(ecc);
582		ecc_ereport_post(pci_p->pci_dip, ecc_err);
583		return;
584	}
585
586	ecc_cpu_call(ecc, ecc_err->ecc_unum, (ecc_type == CBNINTR_UE) ?
587	    ECC_IO_UE : ECC_IO_CE);
588
589	switch (ecc_type) {
590	case CBNINTR_UE:
591		if (ecc_err->ecc_pg_ret == 1) {
592			(void) page_retire(ecc->flt_addr, PR_UE);
593		}
594		ecc_err->ecc_err_type = flt_to_error_type(ecc);
595		break;
596
597	case CBNINTR_CE:
598		/*
599		 * Setup timeout (if CE detected via interrupt) to
600		 * re-enable CE interrupts if no more CEs are detected.
601		 * This is to protect against CE storms.
602		 */
603		if (ecc_ce_delayed &&
604		    ecc_err->ecc_caller == PCI_ECC_CALL &&
605		    ecc_err->ecc_p->ecc_to_id == 0) {
606			ecc_err->ecc_p->ecc_to_id = timeout(ecc_delayed_ce,
607			    (void *)ecc_err->ecc_p,
608			    drv_usectohz((clock_t)ecc_ce_delay_secs *
609			    MICROSEC));
610		}
611
612		/* ecc_err_cexdiag returns nonzero to recirculate */
613		if (CE_XDIAG_EXT_ALG_APPLIED(ecc->flt_disp) &&
614		    ecc_err_cexdiag(ecc_err, eqep))
615			return;
616		ecc_err->ecc_err_type = flt_to_error_type(ecc);
617		break;
618	}
619
620	ecc_ereport_post(pci_p->pci_dip, ecc_err);
621}
622
623static void
624ecc_delayed_ce(void *arg)
625{
626	ecc_t *ecc_p = (ecc_t *)arg;
627	pci_common_t *cmn_p;
628	cb_t *cb_p;
629
630	ASSERT(ecc_p);
631
632	cmn_p = ecc_p->ecc_pci_cmn_p;
633	cb_p = cmn_p->pci_common_cb_p;
634	/*
635	 * If no more CE errors are found then enable interrupts(by
636	 * clearing the previous interrupt), else send in for logging
637	 * and the timeout should be set again.
638	 */
639	ecc_p->ecc_to_id = 0;
640	if (!((ecc_read_afsr(&ecc_p->ecc_ce) >>
641	    COMMON_ECC_UE_AFSR_PE_SHIFT) & COMMON_ECC_UE_AFSR_E_MASK)) {
642		cb_clear_nintr(cb_p, ecc_p->ecc_ce.ecc_type);
643	} else {
644		ecc_errstate_t ecc_err;
645
646		bzero(&ecc_err, sizeof (ecc_errstate_t));
647		ecc_err.ecc_ena = fm_ena_generate(0, FM_ENA_FMT1);
648		ecc_err.ecc_ii_p = ecc_p->ecc_ce;
649		ecc_err.ecc_p = ecc_p;
650		ecc_err.ecc_caller = PCI_ECC_CALL;
651
652		mutex_enter(&cmn_p->pci_fm_mutex);
653		(void) ecc_err_handler(&ecc_err);
654		mutex_exit(&cmn_p->pci_fm_mutex);
655	}
656}
657
658/*
659 * Function used to post IO detected ECC ereports.
660 */
661static void
662ecc_ereport_post(dev_info_t *dip, ecc_errstate_t *ecc_err)
663{
664	char buf[FM_MAX_CLASS], dev_path[MAXPATHLEN], *ptr;
665	struct i_ddi_fmhdl *fmhdl = DEVI(dip)->devi_fmhdl;
666	nvlist_t *ereport, *detector;
667	nv_alloc_t *nva;
668	errorq_elem_t *eqep;
669
670	/*
671	 * We do not use ddi_fm_ereport_post because we need to set a
672	 * special detector here. Since we do not have a device path for
673	 * the bridge chip we use what we think it should be to aid in
674	 * diagnosis. This path fmri is created by pci_fmri_create()
675	 * during initialization.
676	 */
677	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s", DDI_IO_CLASS,
678	    ecc_err->ecc_bridge_type, ecc_err->ecc_aflt.flt_erpt_class);
679
680	ecc_err->ecc_ena = ecc_err->ecc_ena ? ecc_err->ecc_ena :
681	    fm_ena_generate(0, FM_ENA_FMT1);
682
683	eqep = errorq_reserve(fmhdl->fh_errorq);
684	if (eqep == NULL)
685		return;
686
687	ereport = errorq_elem_nvl(fmhdl->fh_errorq, eqep);
688	nva = errorq_elem_nva(fmhdl->fh_errorq, eqep);
689	detector = fm_nvlist_create(nva);
690
691	ASSERT(ereport);
692	ASSERT(nva);
693	ASSERT(detector);
694
695	ddi_pathname(dip, dev_path);
696	ptr = strrchr(dev_path, (int)',');
697
698	if (ptr)
699		*ptr = '\0';
700
701	fm_fmri_dev_set(detector, FM_DEV_SCHEME_VERSION, NULL, dev_path,
702	    NULL, NULL);
703
704	if (ecc_err->ecc_pri) {
705		if ((ecc_err->ecc_fmri = fm_nvlist_create(nva)) != NULL) {
706			char sid[DIMM_SERIAL_ID_LEN] = "";
707			uint64_t offset = (uint64_t)-1;
708			int len;
709			int ret;
710
711			ret = cpu_get_mem_sid(ecc_err->ecc_unum, sid,
712			    DIMM_SERIAL_ID_LEN, &len);
713
714			if (ret == 0) {
715				(void) cpu_get_mem_offset(
716				    ecc_err->ecc_aflt.flt_addr, &offset);
717			}
718
719			fm_fmri_mem_set(ecc_err->ecc_fmri,
720			    FM_MEM_SCHEME_VERSION, NULL, ecc_err->ecc_unum,
721			    (ret == 0) ? sid : NULL, offset);
722		}
723		fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
724		    ecc_err->ecc_ena, detector,
725		    PCI_ECC_AFSR, DATA_TYPE_UINT64, ecc_err->ecc_afsr,
726		    PCI_ECC_AFAR, DATA_TYPE_UINT64, ecc_err->ecc_aflt.flt_addr,
727		    PCI_ECC_CTRL, DATA_TYPE_UINT64, ecc_err->ecc_ctrl,
728		    PCI_ECC_SYND, DATA_TYPE_UINT16, ecc_err->ecc_aflt.flt_synd,
729		    PCI_ECC_TYPE, DATA_TYPE_STRING, ecc_err->ecc_err_type,
730		    PCI_ECC_DISP, DATA_TYPE_UINT64, ecc_err->ecc_aflt.flt_disp,
731		    PCI_ECC_RESOURCE, DATA_TYPE_NVLIST, ecc_err->ecc_fmri,
732		    NULL);
733	} else {
734		fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
735		    ecc_err->ecc_ena, detector,
736		    PCI_ECC_AFSR, DATA_TYPE_UINT64, ecc_err->ecc_afsr,
737		    PCI_ECC_CTRL, DATA_TYPE_UINT64, ecc_err->ecc_ctrl,
738		    NULL);
739	}
740	errorq_commit(fmhdl->fh_errorq, eqep, ERRORQ_ASYNC);
741}
742