125cf1a30Sjl /*
225cf1a30Sjl  * CDDL HEADER START
325cf1a30Sjl  *
425cf1a30Sjl  * The contents of this file are subject to the terms of the
525cf1a30Sjl  * Common Development and Distribution License (the "License").
625cf1a30Sjl  * You may not use this file except in compliance with the License.
725cf1a30Sjl  *
825cf1a30Sjl  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
925cf1a30Sjl  * or http://www.opensolaris.org/os/licensing.
1025cf1a30Sjl  * See the License for the specific language governing permissions
1125cf1a30Sjl  * and limitations under the License.
1225cf1a30Sjl  *
1325cf1a30Sjl  * When distributing Covered Code, include this CDDL HEADER in each
1425cf1a30Sjl  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1525cf1a30Sjl  * If applicable, add the following below this CDDL HEADER, with the
1625cf1a30Sjl  * fields enclosed by brackets "[]" replaced with your own identifying
1725cf1a30Sjl  * information: Portions Copyright [yyyy] [name of copyright owner]
1825cf1a30Sjl  *
1925cf1a30Sjl  * CDDL HEADER END
2025cf1a30Sjl  */
2125cf1a30Sjl /*
2225cf1a30Sjl  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
2325cf1a30Sjl  * Use is subject to license terms.
2425cf1a30Sjl  */
2525cf1a30Sjl 
2625cf1a30Sjl #pragma ident	"%Z%%M%	%I%	%E% SMI"
2725cf1a30Sjl 
2825cf1a30Sjl /*
2925cf1a30Sjl  * CMU-CH ECC support
3025cf1a30Sjl  */
3125cf1a30Sjl 
3225cf1a30Sjl #include <sys/types.h>
3325cf1a30Sjl #include <sys/systm.h>
3425cf1a30Sjl #include <sys/kmem.h>
3525cf1a30Sjl #include <sys/sunddi.h>
3625cf1a30Sjl #include <sys/intr.h>
3725cf1a30Sjl #include <sys/async.h>
3825cf1a30Sjl #include <sys/ddi_impldefs.h>
3925cf1a30Sjl #include <sys/machsystm.h>
4025cf1a30Sjl #include <sys/sysmacros.h>
4125cf1a30Sjl #include <sys/fm/protocol.h>
4225cf1a30Sjl #include <sys/fm/util.h>
4325cf1a30Sjl #include <sys/fm/io/pci.h>
4425cf1a30Sjl #include <sys/fm/io/sun4upci.h>
4525cf1a30Sjl #include <sys/fm/io/ddi.h>
4625cf1a30Sjl #include <sys/pcicmu/pcicmu.h>
4725cf1a30Sjl 
4825cf1a30Sjl static void pcmu_ecc_disable(pcmu_ecc_t *, int);
4925cf1a30Sjl static uint64_t pcmu_ecc_read_afsr(pcmu_ecc_intr_info_t *);
5025cf1a30Sjl static void pcmu_ecc_ereport_post(dev_info_t *dip,
5125cf1a30Sjl     pcmu_ecc_errstate_t *ecc_err);
5225cf1a30Sjl 
5325cf1a30Sjl clock_t pcmu_pecc_panic_delay = 200;
5425cf1a30Sjl 
5525cf1a30Sjl void
pcmu_ecc_create(pcmu_t * pcmu_p)5625cf1a30Sjl pcmu_ecc_create(pcmu_t *pcmu_p)
5725cf1a30Sjl {
5825cf1a30Sjl 	uint64_t pcb_base_pa = pcmu_p->pcmu_cb_p->pcb_base_pa;
5925cf1a30Sjl 	pcmu_ecc_t *pecc_p;
6025cf1a30Sjl 	/* LINTED variable */
6125cf1a30Sjl 	dev_info_t *dip = pcmu_p->pcmu_dip;
6225cf1a30Sjl 
6325cf1a30Sjl 	pecc_p = (pcmu_ecc_t *)kmem_zalloc(sizeof (pcmu_ecc_t), KM_SLEEP);
6425cf1a30Sjl 	pecc_p->pecc_pcmu_p = pcmu_p;
6525cf1a30Sjl 	pcmu_p->pcmu_pecc_p = pecc_p;
6625cf1a30Sjl 
6725cf1a30Sjl 	pecc_p->pecc_ue.pecc_p = pecc_p;
6825cf1a30Sjl 	pecc_p->pecc_ue.pecc_type = CBNINTR_UE;
6925cf1a30Sjl 
7025cf1a30Sjl 	pcmu_ecc_setup(pecc_p);
7125cf1a30Sjl 
7225cf1a30Sjl 	/*
7325cf1a30Sjl 	 * Determine the virtual addresses of the streaming cache
7425cf1a30Sjl 	 * control/status and flush registers.
7525cf1a30Sjl 	 */
7625cf1a30Sjl 	pecc_p->pecc_csr_pa = pcb_base_pa + PCMU_ECC_CSR_OFFSET;
7725cf1a30Sjl 	pecc_p->pecc_ue.pecc_afsr_pa = pcb_base_pa + PCMU_UE_AFSR_OFFSET;
7825cf1a30Sjl 	pecc_p->pecc_ue.pecc_afar_pa = pcb_base_pa + PCMU_UE_AFAR_OFFSET;
7925cf1a30Sjl 
8025cf1a30Sjl 	PCMU_DBG1(PCMU_DBG_ATTACH, dip, "pcmu_ecc_create: csr=%x\n",
8125cf1a30Sjl 	    pecc_p->pecc_csr_pa);
8225cf1a30Sjl 	PCMU_DBG2(PCMU_DBG_ATTACH, dip,
8325cf1a30Sjl 	    "pcmu_ecc_create: ue_afsr=%x, ue_afar=%x\n",
8425cf1a30Sjl 	    pecc_p->pecc_ue.pecc_afsr_pa, pecc_p->pecc_ue.pecc_afar_pa);
8525cf1a30Sjl 
8625cf1a30Sjl 	pcmu_ecc_configure(pcmu_p);
8725cf1a30Sjl 
8825cf1a30Sjl 	/*
8925cf1a30Sjl 	 * Register routines to be called from system error handling code.
9025cf1a30Sjl 	 */
9125cf1a30Sjl 	bus_func_register(BF_TYPE_ERRDIS,
9225cf1a30Sjl 	    (busfunc_t)pcmu_ecc_disable_nowait, pecc_p);
9325cf1a30Sjl }
9425cf1a30Sjl 
9525cf1a30Sjl int
pcmu_ecc_register_intr(pcmu_t * pcmu_p)9625cf1a30Sjl pcmu_ecc_register_intr(pcmu_t *pcmu_p)
9725cf1a30Sjl {
9825cf1a30Sjl 	pcmu_ecc_t *pecc_p = pcmu_p->pcmu_pecc_p;
9925cf1a30Sjl 	int ret;
10025cf1a30Sjl 
10125cf1a30Sjl 	/*
10225cf1a30Sjl 	 * Install the UE error interrupt handlers.
10325cf1a30Sjl 	 */
10425cf1a30Sjl 	ret = pcmu_ecc_add_intr(pcmu_p, CBNINTR_UE, &pecc_p->pecc_ue);
10525cf1a30Sjl 	return (ret);
10625cf1a30Sjl }
10725cf1a30Sjl 
10825cf1a30Sjl void
pcmu_ecc_destroy(pcmu_t * pcmu_p)10925cf1a30Sjl pcmu_ecc_destroy(pcmu_t *pcmu_p)
11025cf1a30Sjl {
11125cf1a30Sjl 	pcmu_ecc_t *pecc_p = pcmu_p->pcmu_pecc_p;
11225cf1a30Sjl 
11325cf1a30Sjl 	PCMU_DBG0(PCMU_DBG_DETACH, pcmu_p->pcmu_dip, "pcmu_ecc_destroy:\n");
11425cf1a30Sjl 
11525cf1a30Sjl 	/*
11625cf1a30Sjl 	 * Disable UE ECC error interrupts.
11725cf1a30Sjl 	 */
11825cf1a30Sjl 	pcmu_ecc_disable_wait(pecc_p);
11925cf1a30Sjl 
12025cf1a30Sjl 	/*
12125cf1a30Sjl 	 * Remove the ECC interrupt handlers.
12225cf1a30Sjl 	 */
12325cf1a30Sjl 	pcmu_ecc_rem_intr(pcmu_p, CBNINTR_UE, &pecc_p->pecc_ue);
12425cf1a30Sjl 
12525cf1a30Sjl 	/*
12625cf1a30Sjl 	 * Unregister our error handling functions.
12725cf1a30Sjl 	 */
12825cf1a30Sjl 	bus_func_unregister(BF_TYPE_ERRDIS,
12925cf1a30Sjl 	    (busfunc_t)pcmu_ecc_disable_nowait, pecc_p);
13025cf1a30Sjl 	/*
13125cf1a30Sjl 	 * If a timer has been set, unset it.
13225cf1a30Sjl 	 */
13325cf1a30Sjl 	(void) untimeout(pecc_p->pecc_tout_id);
13425cf1a30Sjl 	kmem_free(pecc_p, sizeof (pcmu_ecc_t));
13525cf1a30Sjl 	pcmu_p->pcmu_pecc_p = NULL;
13625cf1a30Sjl }
13725cf1a30Sjl 
13825cf1a30Sjl void
pcmu_ecc_configure(pcmu_t * pcmu_p)13925cf1a30Sjl pcmu_ecc_configure(pcmu_t *pcmu_p)
14025cf1a30Sjl {
14125cf1a30Sjl 	pcmu_ecc_t *pecc_p = pcmu_p->pcmu_pecc_p;
14225cf1a30Sjl 	uint64_t l;
14325cf1a30Sjl 	/* LINTED variable */
14425cf1a30Sjl 	dev_info_t *dip = pcmu_p->pcmu_dip;
14525cf1a30Sjl 
14625cf1a30Sjl 	/*
14725cf1a30Sjl 	 * Clear any pending ECC errors.
14825cf1a30Sjl 	 */
14925cf1a30Sjl 	PCMU_DBG0(PCMU_DBG_ATTACH, dip,
15025cf1a30Sjl 	    "pcmu_ecc_configure: clearing UE errors\n");
15125cf1a30Sjl 	l = (PCMU_ECC_UE_AFSR_E_MASK << PCMU_ECC_UE_AFSR_PE_SHIFT) |
15225cf1a30Sjl 	    (PCMU_ECC_UE_AFSR_E_MASK << PCMU_ECC_UE_AFSR_SE_SHIFT);
15325cf1a30Sjl 	stdphysio(pecc_p->pecc_ue.pecc_afsr_pa, l);
15425cf1a30Sjl 
15525cf1a30Sjl 	/*
15625cf1a30Sjl 	 * Enable ECC error detections via the control register.
15725cf1a30Sjl 	 */
15825cf1a30Sjl 	PCMU_DBG0(PCMU_DBG_ATTACH, dip,
15925cf1a30Sjl 	    "pcmu_ecc_configure: enabling UE detection\n");
16025cf1a30Sjl 	l = PCMU_ECC_CTRL_ECC_EN;
16125cf1a30Sjl 	if (ecc_error_intr_enable)
16225cf1a30Sjl 		l |= PCMU_ECC_CTRL_UE_INTEN;
16325cf1a30Sjl 	stdphysio(pecc_p->pecc_csr_pa, l);
16425cf1a30Sjl }
16525cf1a30Sjl 
16625cf1a30Sjl void
pcmu_ecc_enable_intr(pcmu_t * pcmu_p)16725cf1a30Sjl pcmu_ecc_enable_intr(pcmu_t *pcmu_p)
16825cf1a30Sjl {
16925cf1a30Sjl 	pcmu_cb_enable_nintr(pcmu_p, CBNINTR_UE);
17025cf1a30Sjl }
17125cf1a30Sjl 
17225cf1a30Sjl void
pcmu_ecc_disable_wait(pcmu_ecc_t * pecc_p)17325cf1a30Sjl pcmu_ecc_disable_wait(pcmu_ecc_t *pecc_p)
17425cf1a30Sjl {
17525cf1a30Sjl 	pcmu_ecc_disable(pecc_p, PCMU_IB_INTR_WAIT);
17625cf1a30Sjl }
17725cf1a30Sjl 
17825cf1a30Sjl uint_t
pcmu_ecc_disable_nowait(pcmu_ecc_t * pecc_p)17925cf1a30Sjl pcmu_ecc_disable_nowait(pcmu_ecc_t *pecc_p)
18025cf1a30Sjl {
18125cf1a30Sjl 	pcmu_ecc_disable(pecc_p, PCMU_IB_INTR_NOWAIT);
18225cf1a30Sjl 	return (BF_NONE);
18325cf1a30Sjl }
18425cf1a30Sjl 
18525cf1a30Sjl static void
pcmu_ecc_disable(pcmu_ecc_t * pecc_p,int wait)18625cf1a30Sjl pcmu_ecc_disable(pcmu_ecc_t *pecc_p, int wait)
18725cf1a30Sjl {
18825cf1a30Sjl 	pcmu_cb_t *pcb_p = pecc_p->pecc_pcmu_p->pcmu_cb_p;
18925cf1a30Sjl 	uint64_t csr_pa = pecc_p->pecc_csr_pa;
19025cf1a30Sjl 	uint64_t csr = lddphysio(csr_pa);
19125cf1a30Sjl 
19225cf1a30Sjl 	csr &= ~(PCMU_ECC_CTRL_UE_INTEN);
19325cf1a30Sjl 	stdphysio(csr_pa, csr);
19425cf1a30Sjl 	pcmu_cb_disable_nintr(pcb_p, CBNINTR_UE, wait);
19525cf1a30Sjl }
19625cf1a30Sjl 
19725cf1a30Sjl /*
19825cf1a30Sjl  * I/O ECC error handling:
19925cf1a30Sjl  *
20025cf1a30Sjl  * Below are the generic functions that handle detected ECC errors.
20125cf1a30Sjl  *
20225cf1a30Sjl  * The registered interrupt handler is pcmu_ecc_intr(), it's function
20325cf1a30Sjl  * is to receive the error, capture some state, and pass that on to
20425cf1a30Sjl  * the pcmu_ecc_err_handler() for reporting purposes.
20525cf1a30Sjl  *
20625cf1a30Sjl  * pcmu_ecc_err_handler() gathers more state(via pcmu_ecc_errstate_get)
20725cf1a30Sjl  * and attempts to handle and report the error. pcmu_ecc_err_handler()
20825cf1a30Sjl  * must determine if we need to panic due to this error (via
20925cf1a30Sjl  * pcmu_ecc_classify, which also decodes the * ECC afsr), and if any
21025cf1a30Sjl  * side effects exist that may have caused or are due * to this error.
21125cf1a30Sjl  * PBM errors related to the ECC error may exist, to report
21225cf1a30Sjl  * them we call pcmu_pbm_err_handler().
21325cf1a30Sjl  *
21425cf1a30Sjl  * To report the error we must also get the syndrome and unum, which can not
21525cf1a30Sjl  * be done in high level interrupted context. Therefore we have an error
21625cf1a30Sjl  * queue(pcmu_ecc_queue) which we dispatch errors to, to report the errors
21725cf1a30Sjl  * (pcmu_ecc_err_drain()).
21825cf1a30Sjl  *
21925cf1a30Sjl  * pcmu_ecc_err_drain() will be called when either the softint is triggered
22025cf1a30Sjl  * or the system is panicing. Either way it will gather more information
22125cf1a30Sjl  * about the error from the CPU(via ecc_cpu_call(), ecc.c), attempt to
22225cf1a30Sjl  * retire the faulty page(if error is a UE), and report the detected error.
22325cf1a30Sjl  *
22425cf1a30Sjl  */
22525cf1a30Sjl 
22625cf1a30Sjl /*
22725cf1a30Sjl  * Function used to get ECC AFSR register
22825cf1a30Sjl  */
22925cf1a30Sjl static uint64_t
pcmu_ecc_read_afsr(pcmu_ecc_intr_info_t * ecc_ii_p)23025cf1a30Sjl pcmu_ecc_read_afsr(pcmu_ecc_intr_info_t *ecc_ii_p)
23125cf1a30Sjl {
23225cf1a30Sjl 	ASSERT(ecc_ii_p->pecc_type == CBNINTR_UE);
23325cf1a30Sjl 	return (lddphysio(ecc_ii_p->pecc_afsr_pa));
23425cf1a30Sjl }
23525cf1a30Sjl 
23625cf1a30Sjl /*
23725cf1a30Sjl  * IO detected ECC error interrupt handler, calls pcmu_ecc_err_handler to post
23825cf1a30Sjl  * error reports and handle the interrupt. Re-entry into pcmu_ecc_err_handler
23925cf1a30Sjl  * is protected by the per-chip mutex pcmu_err_mutex.
24025cf1a30Sjl  */
24125cf1a30Sjl uint_t
pcmu_ecc_intr(caddr_t a)24225cf1a30Sjl pcmu_ecc_intr(caddr_t a)
24325cf1a30Sjl {
24425cf1a30Sjl 	pcmu_ecc_intr_info_t *ecc_ii_p = (pcmu_ecc_intr_info_t *)a;
24525cf1a30Sjl 	pcmu_ecc_t *pecc_p = ecc_ii_p->pecc_p;
24625cf1a30Sjl 	pcmu_t *pcmu_p = pecc_p->pecc_pcmu_p;
24725cf1a30Sjl 	pcmu_ecc_errstate_t ecc_err;
24825cf1a30Sjl 	int ret = DDI_FM_OK;
24925cf1a30Sjl 
25025cf1a30Sjl 	bzero(&ecc_err, sizeof (pcmu_ecc_errstate_t));
25125cf1a30Sjl 	ecc_err.ecc_ena = fm_ena_generate(0, FM_ENA_FMT1); /* RAGS */
25225cf1a30Sjl 	ecc_err.ecc_ii_p = *ecc_ii_p;
25325cf1a30Sjl 	ecc_err.pecc_p = pecc_p;
25425cf1a30Sjl 	ecc_err.ecc_caller = PCI_ECC_CALL;
25525cf1a30Sjl 
25625cf1a30Sjl 	mutex_enter(&pcmu_p->pcmu_err_mutex);
25725cf1a30Sjl 	ret = pcmu_ecc_err_handler(&ecc_err);
25825cf1a30Sjl 	mutex_exit(&pcmu_p->pcmu_err_mutex);
25925cf1a30Sjl 	if (ret == DDI_FM_FATAL) {
26025cf1a30Sjl 		/*
26125cf1a30Sjl 		 * Need delay here to allow CPUs to handle related traps,
26225cf1a30Sjl 		 * such as FRUs for USIIIi systems.
26325cf1a30Sjl 		 */
26425cf1a30Sjl 		DELAY(pcmu_pecc_panic_delay);
26525cf1a30Sjl 		cmn_err(CE_PANIC, "Fatal PCI UE Error");
26625cf1a30Sjl 	}
26725cf1a30Sjl 
26825cf1a30Sjl 	return (DDI_INTR_CLAIMED);
26925cf1a30Sjl }
27025cf1a30Sjl 
27125cf1a30Sjl /*
27225cf1a30Sjl  * Function used to gather IO ECC error state.
27325cf1a30Sjl  */
27425cf1a30Sjl static void
pcmu_ecc_errstate_get(pcmu_ecc_errstate_t * ecc_err_p)27525cf1a30Sjl pcmu_ecc_errstate_get(pcmu_ecc_errstate_t *ecc_err_p)
27625cf1a30Sjl {
27725cf1a30Sjl 	pcmu_ecc_t *pecc_p;
27825cf1a30Sjl 	uint_t bus_id;
27925cf1a30Sjl 
28025cf1a30Sjl 	ASSERT(ecc_err_p);
28125cf1a30Sjl 
28225cf1a30Sjl 	pecc_p = ecc_err_p->ecc_ii_p.pecc_p;
28325cf1a30Sjl 	bus_id = pecc_p->pecc_pcmu_p->pcmu_id;
28425cf1a30Sjl 
28525cf1a30Sjl 	ASSERT(MUTEX_HELD(&pecc_p->pecc_pcmu_p->pcmu_err_mutex));
28625cf1a30Sjl 	/*
28725cf1a30Sjl 	 * Read the fault registers.
28825cf1a30Sjl 	 */
28925cf1a30Sjl 	ecc_err_p->ecc_afsr = pcmu_ecc_read_afsr(&ecc_err_p->ecc_ii_p);
29025cf1a30Sjl 	ecc_err_p->ecc_afar = lddphysio(ecc_err_p->ecc_ii_p.pecc_afar_pa);
29125cf1a30Sjl 
29225cf1a30Sjl 	ecc_err_p->ecc_offset = ((ecc_err_p->ecc_afsr &
29325cf1a30Sjl 	    ecc_err_p->ecc_ii_p.pecc_offset_mask) >>
29425cf1a30Sjl 	    ecc_err_p->ecc_ii_p.pecc_offset_shift) <<
29525cf1a30Sjl 	    ecc_err_p->ecc_ii_p.pecc_size_log2;
29625cf1a30Sjl 
29725cf1a30Sjl 	ecc_err_p->ecc_aflt.flt_id = gethrtime();
29825cf1a30Sjl 	ecc_err_p->ecc_aflt.flt_stat = ecc_err_p->ecc_afsr;
29925cf1a30Sjl 	ecc_err_p->ecc_aflt.flt_addr = P2ALIGN(ecc_err_p->ecc_afar, 64) +
30025cf1a30Sjl 	    ecc_err_p->ecc_offset;
30125cf1a30Sjl 	ecc_err_p->ecc_aflt.flt_bus_id = bus_id;
30225cf1a30Sjl 	ecc_err_p->ecc_aflt.flt_inst = 0;
30325cf1a30Sjl 	ecc_err_p->ecc_aflt.flt_status = ECC_IOBUS;
30425cf1a30Sjl 	ecc_err_p->ecc_aflt.flt_in_memory = 0;
30525cf1a30Sjl 	ecc_err_p->ecc_aflt.flt_class = BUS_FAULT;
30625cf1a30Sjl }
30725cf1a30Sjl 
30825cf1a30Sjl /*
30925cf1a30Sjl  * pcmu_ecc_check: Called by pcmu_ecc_err_handler() this function is responsible
31025cf1a30Sjl  * for calling pcmu_pbm_err_handler() and calling their children error
31125cf1a30Sjl  * handlers(via ndi_fm_handler_dispatch()).
31225cf1a30Sjl  */
31325cf1a30Sjl static int
pcmu_ecc_check(pcmu_ecc_t * pecc_p,uint64_t fme_ena)31425cf1a30Sjl pcmu_ecc_check(pcmu_ecc_t *pecc_p, uint64_t fme_ena)
31525cf1a30Sjl {
31625cf1a30Sjl 	ddi_fm_error_t derr;
31725cf1a30Sjl 	int ret;
31825cf1a30Sjl 	pcmu_t *pcmu_p;
31925cf1a30Sjl 
32025cf1a30Sjl 
32125cf1a30Sjl 	ASSERT(MUTEX_HELD(&pecc_p->pecc_pcmu_p->pcmu_err_mutex));
32225cf1a30Sjl 
32325cf1a30Sjl 	bzero(&derr, sizeof (ddi_fm_error_t));
32425cf1a30Sjl 	derr.fme_version = DDI_FME_VERSION;
32525cf1a30Sjl 	derr.fme_ena = fme_ena;
32625cf1a30Sjl 	ret = DDI_FM_NONFATAL;
32725cf1a30Sjl 
32825cf1a30Sjl 	/*
32925cf1a30Sjl 	 * Need to report any PBM errors which may have caused or
33025cf1a30Sjl 	 * resulted from this error.
33125cf1a30Sjl 	 */
33225cf1a30Sjl 	pcmu_p = pecc_p->pecc_pcmu_p;
33325cf1a30Sjl 	if (pcmu_pbm_err_handler(pcmu_p->pcmu_dip, &derr, (void *)pcmu_p,
33425cf1a30Sjl 	    PCI_ECC_CALL) == DDI_FM_FATAL)
33525cf1a30Sjl 		ret = DDI_FM_FATAL;
33625cf1a30Sjl 
33725cf1a30Sjl 	if (ret == DDI_FM_FATAL)
33825cf1a30Sjl 		return (DDI_FM_FATAL);
33925cf1a30Sjl 	else
34025cf1a30Sjl 		return (DDI_FM_NONFATAL);
34125cf1a30Sjl }
34225cf1a30Sjl 
34325cf1a30Sjl /*
34425cf1a30Sjl  * Function used to handle and log IO detected ECC errors, can be called by
34525cf1a30Sjl  * pcmu_ecc_intr and pcmu_err_callback(trap callback). Protected by
34625cf1a30Sjl  * pcmu_err_mutex.
34725cf1a30Sjl  */
34825cf1a30Sjl int
pcmu_ecc_err_handler(pcmu_ecc_errstate_t * ecc_err_p)34925cf1a30Sjl pcmu_ecc_err_handler(pcmu_ecc_errstate_t *ecc_err_p)
35025cf1a30Sjl {
35125cf1a30Sjl 	/* LINTED variable */
35225cf1a30Sjl 	uint64_t pri_err, sec_err;
35325cf1a30Sjl 	pcmu_ecc_intr_info_t *ecc_ii_p = &ecc_err_p->ecc_ii_p;
35425cf1a30Sjl 	pcmu_ecc_t *pecc_p = ecc_ii_p->pecc_p;
35525cf1a30Sjl 	/* LINTED variable */
35625cf1a30Sjl 	pcmu_t *pcmu_p;
35725cf1a30Sjl 	pcmu_cb_t *pcb_p;
35825cf1a30Sjl 	int fatal = 0;
35925cf1a30Sjl 	int nonfatal = 0;
36025cf1a30Sjl 
36125cf1a30Sjl 	ASSERT(MUTEX_HELD(&pecc_p->pecc_pcmu_p->pcmu_err_mutex));
36225cf1a30Sjl 
36325cf1a30Sjl 	pcmu_p = pecc_p->pecc_pcmu_p;
36425cf1a30Sjl 	pcb_p = pecc_p->pecc_pcmu_p->pcmu_cb_p;
36525cf1a30Sjl 
36625cf1a30Sjl 	pcmu_ecc_errstate_get(ecc_err_p);
36725cf1a30Sjl 	pri_err = (ecc_err_p->ecc_afsr >> PCMU_ECC_UE_AFSR_PE_SHIFT) &
36825cf1a30Sjl 		PCMU_ECC_UE_AFSR_E_MASK;
36925cf1a30Sjl 
37025cf1a30Sjl 	sec_err = (ecc_err_p->ecc_afsr >> PCMU_ECC_UE_AFSR_SE_SHIFT) &
37125cf1a30Sjl 		PCMU_ECC_UE_AFSR_E_MASK;
37225cf1a30Sjl 
37325cf1a30Sjl 	switch (ecc_ii_p->pecc_type) {
37425cf1a30Sjl 	case CBNINTR_UE:
37525cf1a30Sjl 		if (pri_err) {
37625cf1a30Sjl 			ecc_err_p->ecc_aflt.flt_synd = 0;
37725cf1a30Sjl 			ecc_err_p->pecc_pri = 1;
37825cf1a30Sjl 			pcmu_ecc_classify(pri_err, ecc_err_p);
37925cf1a30Sjl 			errorq_dispatch(pcmu_ecc_queue, (void *)ecc_err_p,
38025cf1a30Sjl 				sizeof (pcmu_ecc_errstate_t),
38125cf1a30Sjl 				ecc_err_p->ecc_aflt.flt_panic);
38225cf1a30Sjl 		}
38325cf1a30Sjl 		if (sec_err) {
38425cf1a30Sjl 			pcmu_ecc_errstate_t ecc_sec_err;
38525cf1a30Sjl 
38625cf1a30Sjl 			ecc_sec_err = *ecc_err_p;
38725cf1a30Sjl 			ecc_sec_err.pecc_pri = 0;
38825cf1a30Sjl 			pcmu_ecc_classify(sec_err, &ecc_sec_err);
38925cf1a30Sjl 			pcmu_ecc_ereport_post(pcmu_p->pcmu_dip,
39025cf1a30Sjl 					&ecc_sec_err);
39125cf1a30Sjl 		}
39225cf1a30Sjl 		/*
39325cf1a30Sjl 		 * Check for PCI bus errors that may have resulted from or
39425cf1a30Sjl 		 * caused this UE.
39525cf1a30Sjl 		 */
39625cf1a30Sjl 		if (ecc_err_p->ecc_caller == PCI_ECC_CALL &&
39725cf1a30Sjl 		    pcmu_ecc_check(pecc_p, ecc_err_p->ecc_ena) == DDI_FM_FATAL)
39825cf1a30Sjl 			ecc_err_p->ecc_aflt.flt_panic = 1;
39925cf1a30Sjl 
40025cf1a30Sjl 		if (ecc_err_p->ecc_aflt.flt_panic) {
40125cf1a30Sjl 			/*
40225cf1a30Sjl 			 * Disable all further errors since this will be
40325cf1a30Sjl 			 * treated as a fatal error.
40425cf1a30Sjl 			 */
40525cf1a30Sjl 			(void) pcmu_ecc_disable_nowait(pecc_p);
40625cf1a30Sjl 			fatal++;
40725cf1a30Sjl 		}
40825cf1a30Sjl 		break;
40925cf1a30Sjl 
41025cf1a30Sjl 	default:
41125cf1a30Sjl 		return (DDI_FM_OK);
41225cf1a30Sjl 	}
41325cf1a30Sjl 	/* Clear the errors */
41425cf1a30Sjl 	stdphysio(ecc_ii_p->pecc_afsr_pa, ecc_err_p->ecc_afsr);
41525cf1a30Sjl 	/*
41625cf1a30Sjl 	 * Clear the interrupt if called by pcmu_ecc_intr and UE error
41725cf1a30Sjl 	 * or if called by pcmu_ecc_intr and CE error and delayed CE
41825cf1a30Sjl 	 * interrupt handling is turned off.
41925cf1a30Sjl 	 */
42025cf1a30Sjl 	if (ecc_err_p->ecc_caller == PCI_ECC_CALL &&
42125cf1a30Sjl 	    ecc_ii_p->pecc_type == CBNINTR_UE && !fatal)
42225cf1a30Sjl 		pcmu_cb_clear_nintr(pcb_p, ecc_ii_p->pecc_type);
42325cf1a30Sjl 	if (!fatal && !nonfatal)
42425cf1a30Sjl 		return (DDI_FM_OK);
42525cf1a30Sjl 	else if (fatal)
42625cf1a30Sjl 		return (DDI_FM_FATAL);
42725cf1a30Sjl 	return (DDI_FM_NONFATAL);
42825cf1a30Sjl }
42925cf1a30Sjl 
43025cf1a30Sjl /*
43125cf1a30Sjl  * Function used to drain pcmu_ecc_queue, either during panic or after softint
43225cf1a30Sjl  * is generated, to log IO detected ECC errors.
43325cf1a30Sjl  */
434*bfec485cSjimand /* ARGSUSED */
43525cf1a30Sjl void
pcmu_ecc_err_drain(void * not_used,pcmu_ecc_errstate_t * ecc_err)43625cf1a30Sjl pcmu_ecc_err_drain(void *not_used, pcmu_ecc_errstate_t *ecc_err)
43725cf1a30Sjl {
43825cf1a30Sjl 	struct async_flt *ecc = &ecc_err->ecc_aflt;
43925cf1a30Sjl 	pcmu_t *pcmu_p = ecc_err->pecc_p->pecc_pcmu_p;
44025cf1a30Sjl 
44125cf1a30Sjl 	ecc_cpu_call(ecc, ecc_err->ecc_unum, ECC_IO_UE);
44225cf1a30Sjl 	ecc_err->ecc_err_type = "U";
44325cf1a30Sjl 	pcmu_ecc_ereport_post(pcmu_p->pcmu_dip, ecc_err);
44425cf1a30Sjl }
44525cf1a30Sjl 
44625cf1a30Sjl /*
44725cf1a30Sjl  * Function used to post IO detected ECC ereports.
44825cf1a30Sjl  */
44925cf1a30Sjl static void
pcmu_ecc_ereport_post(dev_info_t * dip,pcmu_ecc_errstate_t * ecc_err)45025cf1a30Sjl pcmu_ecc_ereport_post(dev_info_t *dip, pcmu_ecc_errstate_t *ecc_err)
45125cf1a30Sjl {
45225cf1a30Sjl 	char *aux_msg;
45325cf1a30Sjl 	pcmu_t *pcmu_p;
45425cf1a30Sjl 	int instance = ddi_get_instance(dip);
45525cf1a30Sjl 
45625cf1a30Sjl 	pcmu_p = get_pcmu_soft_state(instance);
45725cf1a30Sjl 	if (ecc_err->pecc_pri) {
45825cf1a30Sjl 		aux_msg = "PIO primary uncorrectable error";
45925cf1a30Sjl 	} else {
46025cf1a30Sjl 		aux_msg = "PIO secondary uncorrectable error";
46125cf1a30Sjl 	}
46225cf1a30Sjl 	cmn_err(CE_WARN, "%s %s: %s %s=0x%lx, %s=0x%lx, %s=0x%x",
46325cf1a30Sjl 		(pcmu_p->pcmu_pcbm_p)->pcbm_nameinst_str,
46425cf1a30Sjl 		(pcmu_p->pcmu_pcbm_p)->pcbm_nameaddr_str,
46525cf1a30Sjl 		aux_msg, PCI_ECC_AFSR, ecc_err->ecc_afsr,
46625cf1a30Sjl 		PCI_ECC_AFAR, ecc_err->ecc_aflt.flt_addr,
46725cf1a30Sjl 		"portid", ecc_err->ecc_aflt.flt_bus_id);
46825cf1a30Sjl }
469