xref: /illumos-gate/usr/src/uts/sun4v/os/error.c (revision 9d0d62ad)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5a812d870Sayznaga  * Common Development and Distribution License (the "License").
6a812d870Sayznaga  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22a6a91161SJason Beloro  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #include <sys/types.h>
277c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>
282ae0af4bSep #include <sys/sysmacros.h>
297c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
307c478bd9Sstevel@tonic-gate #include <sys/async.h>
317c478bd9Sstevel@tonic-gate #include <sys/ontrap.h>
327c478bd9Sstevel@tonic-gate #include <sys/ddifm.h>
337c478bd9Sstevel@tonic-gate #include <sys/hypervisor_api.h>
347c478bd9Sstevel@tonic-gate #include <sys/errorq.h>
357c478bd9Sstevel@tonic-gate #include <sys/promif.h>
367c478bd9Sstevel@tonic-gate #include <sys/prom_plat.h>
377c478bd9Sstevel@tonic-gate #include <sys/x_call.h>
387c478bd9Sstevel@tonic-gate #include <sys/error.h>
397c478bd9Sstevel@tonic-gate #include <sys/fm/util.h>
403cac8019Srf #include <sys/ivintr.h>
4144961713Sgirish #include <sys/archsystm.h>
427c478bd9Sstevel@tonic-gate 
437c478bd9Sstevel@tonic-gate #define	MAX_CE_FLTS		10
447c478bd9Sstevel@tonic-gate #define	MAX_ASYNC_FLTS		6
457c478bd9Sstevel@tonic-gate 
467c478bd9Sstevel@tonic-gate errorq_t *ue_queue;			/* queue of uncorrectable errors */
477c478bd9Sstevel@tonic-gate errorq_t *ce_queue;			/* queue of correctable errors */
487c478bd9Sstevel@tonic-gate 
497c478bd9Sstevel@tonic-gate /*
507c478bd9Sstevel@tonic-gate  * Being used by memory test driver.
517c478bd9Sstevel@tonic-gate  * ce_verbose_memory - covers CEs in DIMMs
527c478bd9Sstevel@tonic-gate  * ce_verbose_other - covers "others" (ecache, IO, etc.)
537c478bd9Sstevel@tonic-gate  *
547c478bd9Sstevel@tonic-gate  * If the value is 0, nothing is logged.
557c478bd9Sstevel@tonic-gate  * If the value is 1, the error is logged to the log file, but not console.
567c478bd9Sstevel@tonic-gate  * If the value is 2, the error is logged to the log file and console.
577c478bd9Sstevel@tonic-gate  */
587c478bd9Sstevel@tonic-gate int	ce_verbose_memory = 1;
597c478bd9Sstevel@tonic-gate int	ce_verbose_other = 1;
607c478bd9Sstevel@tonic-gate 
617c478bd9Sstevel@tonic-gate int	ce_show_data = 0;
627c478bd9Sstevel@tonic-gate int	ce_debug = 0;
637c478bd9Sstevel@tonic-gate int	ue_debug = 0;
647c478bd9Sstevel@tonic-gate int	reset_debug = 0;
657c478bd9Sstevel@tonic-gate 
667c478bd9Sstevel@tonic-gate /*
677c478bd9Sstevel@tonic-gate  * Tunables for controlling the handling of asynchronous faults (AFTs). Setting
687c478bd9Sstevel@tonic-gate  * these to non-default values on a non-DEBUG kernel is NOT supported.
697c478bd9Sstevel@tonic-gate  */
707c478bd9Sstevel@tonic-gate int	aft_verbose = 0;	/* log AFT messages > 1 to log only */
717c478bd9Sstevel@tonic-gate int	aft_panic = 0;		/* panic (not reboot) on fatal usermode AFLT */
727c478bd9Sstevel@tonic-gate int	aft_testfatal = 0;	/* force all AFTs to panic immediately */
737c478bd9Sstevel@tonic-gate 
743cac8019Srf /*
75a60fc142Srf  * Used for vbsc hostshutdown (power-off button)
763cac8019Srf  */
773cac8019Srf int	err_shutdown_triggered = 0;	/* only once */
78b0fc0e77Sgovinda uint64_t err_shutdown_inum = 0;	/* used to pull the trigger */
793cac8019Srf 
80a60fc142Srf /*
81a60fc142Srf  * Used to print NRE/RE via system variable or kmdb
82a60fc142Srf  */
83a60fc142Srf int		printerrh = 0;		/* see /etc/system */
84a60fc142Srf static void	errh_er_print(errh_er_t *, const char *);
85a60fc142Srf kmutex_t	errh_print_lock;
86a60fc142Srf 
877c478bd9Sstevel@tonic-gate /*
887c478bd9Sstevel@tonic-gate  * Defined in bus_func.c but initialised in error_init
897c478bd9Sstevel@tonic-gate  */
907c478bd9Sstevel@tonic-gate extern kmutex_t bfd_lock;
917c478bd9Sstevel@tonic-gate 
927c478bd9Sstevel@tonic-gate static uint32_t rq_overflow_count = 0;		/* counter for rq overflow */
937c478bd9Sstevel@tonic-gate 
947c478bd9Sstevel@tonic-gate static void cpu_queue_one_event(errh_async_flt_t *);
957c478bd9Sstevel@tonic-gate static uint32_t count_entries_on_queue(uint64_t, uint64_t, uint32_t);
96db874c57Selowe static void errh_page_retire(errh_async_flt_t *, uchar_t);
977c478bd9Sstevel@tonic-gate static int errh_error_protected(struct regs *, struct async_flt *, int *);
987c478bd9Sstevel@tonic-gate static void errh_rq_full(struct async_flt *);
997c478bd9Sstevel@tonic-gate static void ue_drain(void *, struct async_flt *, errorq_elem_t *);
1007c478bd9Sstevel@tonic-gate static void ce_drain(void *, struct async_flt *, errorq_elem_t *);
10144961713Sgirish static void errh_handle_attr(errh_async_flt_t *);
10244961713Sgirish static void errh_handle_asr(errh_async_flt_t *);
1037c478bd9Sstevel@tonic-gate 
1047c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1057c478bd9Sstevel@tonic-gate void
1067c478bd9Sstevel@tonic-gate process_resumable_error(struct regs *rp, uint32_t head_offset,
1077c478bd9Sstevel@tonic-gate     uint32_t tail_offset)
1087c478bd9Sstevel@tonic-gate {
1097c478bd9Sstevel@tonic-gate 	struct machcpu *mcpup;
1107c478bd9Sstevel@tonic-gate 	struct async_flt *aflt;
1117c478bd9Sstevel@tonic-gate 	errh_async_flt_t errh_flt;
1127c478bd9Sstevel@tonic-gate 	errh_er_t *head_va;
1137c478bd9Sstevel@tonic-gate 
1147c478bd9Sstevel@tonic-gate 	mcpup = &(CPU->cpu_m);
1157c478bd9Sstevel@tonic-gate 
1167c478bd9Sstevel@tonic-gate 	while (head_offset != tail_offset) {
1177c478bd9Sstevel@tonic-gate 		/* kernel buffer starts right after the resumable queue */
1187c478bd9Sstevel@tonic-gate 		head_va = (errh_er_t *)(mcpup->cpu_rq_va + head_offset +
1197c478bd9Sstevel@tonic-gate 		    CPU_RQ_SIZE);
1207c478bd9Sstevel@tonic-gate 		/* Copy the error report to local buffer */
1217c478bd9Sstevel@tonic-gate 		bzero(&errh_flt, sizeof (errh_async_flt_t));
1227c478bd9Sstevel@tonic-gate 		bcopy((char *)head_va, &(errh_flt.errh_er),
1237c478bd9Sstevel@tonic-gate 		    sizeof (errh_er_t));
1247c478bd9Sstevel@tonic-gate 
125a60fc142Srf 		mcpup->cpu_rq_lastre = head_va;
126a60fc142Srf 		if (printerrh)
127a60fc142Srf 			errh_er_print(&errh_flt.errh_er, "RQ");
128a60fc142Srf 
1297c478bd9Sstevel@tonic-gate 		/* Increment the queue head */
1307c478bd9Sstevel@tonic-gate 		head_offset += Q_ENTRY_SIZE;
1317c478bd9Sstevel@tonic-gate 		/* Wrap around */
1327c478bd9Sstevel@tonic-gate 		head_offset &= (CPU_RQ_SIZE - 1);
1337c478bd9Sstevel@tonic-gate 
1347c478bd9Sstevel@tonic-gate 		/* set error handle to zero so it can hold new error report */
1357c478bd9Sstevel@tonic-gate 		head_va->ehdl = 0;
1367c478bd9Sstevel@tonic-gate 
1377c478bd9Sstevel@tonic-gate 		switch (errh_flt.errh_er.desc) {
1387c478bd9Sstevel@tonic-gate 		case ERRH_DESC_UCOR_RE:
13944961713Sgirish 			/*
14044961713Sgirish 			 * Check error attribute, handle individual error
14144961713Sgirish 			 * if it is needed.
14244961713Sgirish 			 */
14344961713Sgirish 			errh_handle_attr(&errh_flt);
1447c478bd9Sstevel@tonic-gate 			break;
1457c478bd9Sstevel@tonic-gate 
1463cac8019Srf 		case ERRH_DESC_WARN_RE:
1473cac8019Srf 			/*
1483cac8019Srf 			 * Power-off requested, but handle it one time only.
1493cac8019Srf 			 */
1503cac8019Srf 			if (!err_shutdown_triggered) {
1513cac8019Srf 				setsoftint(err_shutdown_inum);
1523cac8019Srf 				++err_shutdown_triggered;
1533cac8019Srf 			}
1543cac8019Srf 			continue;
1553cac8019Srf 
1567c478bd9Sstevel@tonic-gate 		default:
1577c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "Error Descriptor 0x%llx "
1587c478bd9Sstevel@tonic-gate 			    " invalid in resumable error handler",
1597c478bd9Sstevel@tonic-gate 			    (long long) errh_flt.errh_er.desc);
1607c478bd9Sstevel@tonic-gate 			continue;
1617c478bd9Sstevel@tonic-gate 		}
1627c478bd9Sstevel@tonic-gate 
1637c478bd9Sstevel@tonic-gate 		aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt);
1647c478bd9Sstevel@tonic-gate 		aflt->flt_id = gethrtime();
1657c478bd9Sstevel@tonic-gate 		aflt->flt_bus_id = getprocessorid();
1667c478bd9Sstevel@tonic-gate 		aflt->flt_class = CPU_FAULT;
1677c478bd9Sstevel@tonic-gate 		aflt->flt_prot = AFLT_PROT_NONE;
1687c478bd9Sstevel@tonic-gate 		aflt->flt_priv = (((errh_flt.errh_er.attr & ERRH_MODE_MASK)
1697c478bd9Sstevel@tonic-gate 		    >> ERRH_MODE_SHIFT) == ERRH_MODE_PRIV);
1707c478bd9Sstevel@tonic-gate 
1717c478bd9Sstevel@tonic-gate 		if (errh_flt.errh_er.attr & ERRH_ATTR_CPU)
1727c478bd9Sstevel@tonic-gate 			/* If it is an error on other cpu */
1737c478bd9Sstevel@tonic-gate 			aflt->flt_panic = 1;
1747c478bd9Sstevel@tonic-gate 		else
1757c478bd9Sstevel@tonic-gate 			aflt->flt_panic = 0;
1767c478bd9Sstevel@tonic-gate 
1777c478bd9Sstevel@tonic-gate 		/*
1787c478bd9Sstevel@tonic-gate 		 * Handle resumable queue full case.
1797c478bd9Sstevel@tonic-gate 		 */
1807c478bd9Sstevel@tonic-gate 		if (errh_flt.errh_er.attr & ERRH_ATTR_RQF) {
1817c478bd9Sstevel@tonic-gate 			(void) errh_rq_full(aflt);
1827c478bd9Sstevel@tonic-gate 		}
1837c478bd9Sstevel@tonic-gate 
1847c478bd9Sstevel@tonic-gate 		/*
1857c478bd9Sstevel@tonic-gate 		 * Queue the error on ce or ue queue depend on flt_panic.
1867c478bd9Sstevel@tonic-gate 		 * Even if flt_panic is set, the code still keep processing
1877c478bd9Sstevel@tonic-gate 		 * the rest element on rq until the panic starts.
1887c478bd9Sstevel@tonic-gate 		 */
1897c478bd9Sstevel@tonic-gate 		(void) cpu_queue_one_event(&errh_flt);
1907c478bd9Sstevel@tonic-gate 
1917c478bd9Sstevel@tonic-gate 		/*
1927c478bd9Sstevel@tonic-gate 		 * Panic here if aflt->flt_panic has been set.
1937c478bd9Sstevel@tonic-gate 		 * Enqueued errors will be logged as part of the panic flow.
1947c478bd9Sstevel@tonic-gate 		 */
1957c478bd9Sstevel@tonic-gate 		if (aflt->flt_panic) {
1967c478bd9Sstevel@tonic-gate 			fm_panic("Unrecoverable error on another CPU");
1977c478bd9Sstevel@tonic-gate 		}
1987c478bd9Sstevel@tonic-gate 	}
1997c478bd9Sstevel@tonic-gate }
2007c478bd9Sstevel@tonic-gate 
2017c478bd9Sstevel@tonic-gate void
202ad559ebfSwh process_nonresumable_error(struct regs *rp, uint64_t flags,
2037c478bd9Sstevel@tonic-gate     uint32_t head_offset, uint32_t tail_offset)
2047c478bd9Sstevel@tonic-gate {
2057c478bd9Sstevel@tonic-gate 	struct machcpu *mcpup;
2067c478bd9Sstevel@tonic-gate 	struct async_flt *aflt;
2077c478bd9Sstevel@tonic-gate 	errh_async_flt_t errh_flt;
2087c478bd9Sstevel@tonic-gate 	errh_er_t *head_va;
2097c478bd9Sstevel@tonic-gate 	int trampolined = 0;
2107c478bd9Sstevel@tonic-gate 	int expected = DDI_FM_ERR_UNEXPECTED;
2117c478bd9Sstevel@tonic-gate 	uint64_t exec_mode;
212ad559ebfSwh 	uint8_t u_spill_fill;
2137c478bd9Sstevel@tonic-gate 
2147c478bd9Sstevel@tonic-gate 	mcpup = &(CPU->cpu_m);
2157c478bd9Sstevel@tonic-gate 
2167c478bd9Sstevel@tonic-gate 	while (head_offset != tail_offset) {
2177c478bd9Sstevel@tonic-gate 		/* kernel buffer starts right after the nonresumable queue */
2187c478bd9Sstevel@tonic-gate 		head_va = (errh_er_t *)(mcpup->cpu_nrq_va + head_offset +
2197c478bd9Sstevel@tonic-gate 		    CPU_NRQ_SIZE);
2207c478bd9Sstevel@tonic-gate 
2217c478bd9Sstevel@tonic-gate 		/* Copy the error report to local buffer */
2227c478bd9Sstevel@tonic-gate 		bzero(&errh_flt, sizeof (errh_async_flt_t));
2237c478bd9Sstevel@tonic-gate 
2247c478bd9Sstevel@tonic-gate 		bcopy((char *)head_va, &(errh_flt.errh_er),
2257c478bd9Sstevel@tonic-gate 		    sizeof (errh_er_t));
2267c478bd9Sstevel@tonic-gate 
227a60fc142Srf 		mcpup->cpu_nrq_lastnre = head_va;
228a60fc142Srf 		if (printerrh)
229a60fc142Srf 			errh_er_print(&errh_flt.errh_er, "NRQ");
230a60fc142Srf 
2317c478bd9Sstevel@tonic-gate 		/* Increment the queue head */
2327c478bd9Sstevel@tonic-gate 		head_offset += Q_ENTRY_SIZE;
2337c478bd9Sstevel@tonic-gate 		/* Wrap around */
2347c478bd9Sstevel@tonic-gate 		head_offset &= (CPU_NRQ_SIZE - 1);
2357c478bd9Sstevel@tonic-gate 
2367c478bd9Sstevel@tonic-gate 		/* set error handle to zero so it can hold new error report */
2377c478bd9Sstevel@tonic-gate 		head_va->ehdl = 0;
2387c478bd9Sstevel@tonic-gate 
2397c478bd9Sstevel@tonic-gate 		aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt);
2407c478bd9Sstevel@tonic-gate 
2417c478bd9Sstevel@tonic-gate 		trampolined = 0;
2427c478bd9Sstevel@tonic-gate 
2437c478bd9Sstevel@tonic-gate 		if (errh_flt.errh_er.attr & ERRH_ATTR_PIO)
2447c478bd9Sstevel@tonic-gate 			aflt->flt_class = BUS_FAULT;
2457c478bd9Sstevel@tonic-gate 		else
2467c478bd9Sstevel@tonic-gate 			aflt->flt_class = CPU_FAULT;
2477c478bd9Sstevel@tonic-gate 
2487c478bd9Sstevel@tonic-gate 		aflt->flt_id = gethrtime();
2497c478bd9Sstevel@tonic-gate 		aflt->flt_bus_id = getprocessorid();
2507c478bd9Sstevel@tonic-gate 		aflt->flt_pc = (caddr_t)rp->r_pc;
2517c478bd9Sstevel@tonic-gate 		exec_mode = (errh_flt.errh_er.attr & ERRH_MODE_MASK)
2527c478bd9Sstevel@tonic-gate 		    >> ERRH_MODE_SHIFT;
2537c478bd9Sstevel@tonic-gate 		aflt->flt_priv = (exec_mode == ERRH_MODE_PRIV ||
2547c478bd9Sstevel@tonic-gate 		    exec_mode == ERRH_MODE_UNKNOWN);
2557c478bd9Sstevel@tonic-gate 		aflt->flt_prot = AFLT_PROT_NONE;
256ad559ebfSwh 		aflt->flt_tl = (uchar_t)(flags & ERRH_TL_MASK);
2577c478bd9Sstevel@tonic-gate 		aflt->flt_panic = ((aflt->flt_tl != 0) ||
2587c478bd9Sstevel@tonic-gate 		    (aft_testfatal != 0));
2597c478bd9Sstevel@tonic-gate 
260ad559ebfSwh 		/*
261ad559ebfSwh 		 * For the first error packet on the queue, check if it
262ad559ebfSwh 		 * happened in user fill/spill trap.
263ad559ebfSwh 		 */
264ad559ebfSwh 		if (flags & ERRH_U_SPILL_FILL) {
265ad559ebfSwh 			u_spill_fill = 1;
266ad559ebfSwh 			/* clear the user fill/spill flag in flags */
267ad559ebfSwh 			flags = (uint64_t)aflt->flt_tl;
268ad559ebfSwh 		} else
269ad559ebfSwh 			u_spill_fill = 0;
270ad559ebfSwh 
2717c478bd9Sstevel@tonic-gate 		switch (errh_flt.errh_er.desc) {
2727c478bd9Sstevel@tonic-gate 		case ERRH_DESC_PR_NRE:
273ad559ebfSwh 			if (u_spill_fill) {
274ad559ebfSwh 				aflt->flt_panic = 0;
275ad559ebfSwh 				break;
276ad559ebfSwh 			}
2777c478bd9Sstevel@tonic-gate 			/*
278*9d0d62adSJason Beloro 			 * Fall through, precise fault also need to check
279*9d0d62adSJason Beloro 			 * to see if it was protected.
2807c478bd9Sstevel@tonic-gate 			 */
281ad559ebfSwh 			/*FALLTHRU*/
2827c478bd9Sstevel@tonic-gate 
2837c478bd9Sstevel@tonic-gate 		case ERRH_DESC_DEF_NRE:
2847c478bd9Sstevel@tonic-gate 			/*
2857c478bd9Sstevel@tonic-gate 			 * If the trap occurred in privileged mode at TL=0,
2867c478bd9Sstevel@tonic-gate 			 * we need to check to see if we were executing
2877c478bd9Sstevel@tonic-gate 			 * in kernel under on_trap() or t_lofault
2885a5604afSrf 			 * protection. If so, and if it was a PIO or MEM
2895a5604afSrf 			 * error, then modify the saved registers so that
2905a5604afSrf 			 * we return from the trap to the appropriate
2915a5604afSrf 			 * trampoline routine.
2927c478bd9Sstevel@tonic-gate 			 */
2935a5604afSrf 			if (aflt->flt_priv == 1 && aflt->flt_tl == 0 &&
2945a5604afSrf 			    ((errh_flt.errh_er.attr & ERRH_ATTR_PIO) ||
2955a5604afSrf 			    (errh_flt.errh_er.attr & ERRH_ATTR_MEM))) {
2967c478bd9Sstevel@tonic-gate 				trampolined =
2977c478bd9Sstevel@tonic-gate 				    errh_error_protected(rp, aflt, &expected);
2985a5604afSrf 			}
2997c478bd9Sstevel@tonic-gate 
3007c478bd9Sstevel@tonic-gate 			if (!aflt->flt_priv || aflt->flt_prot ==
3017c478bd9Sstevel@tonic-gate 			    AFLT_PROT_COPY) {
3027c478bd9Sstevel@tonic-gate 				aflt->flt_panic |= aft_panic;
3037c478bd9Sstevel@tonic-gate 			} else if (!trampolined &&
304c4b03495Srf 			    (aflt->flt_class != BUS_FAULT)) {
3057c478bd9Sstevel@tonic-gate 				aflt->flt_panic = 1;
3067c478bd9Sstevel@tonic-gate 			}
3077c478bd9Sstevel@tonic-gate 
30844961713Sgirish 			/*
30944961713Sgirish 			 * Check error attribute, handle individual error
31044961713Sgirish 			 * if it is needed.
31144961713Sgirish 			 */
31244961713Sgirish 			errh_handle_attr(&errh_flt);
31344961713Sgirish 
3147c478bd9Sstevel@tonic-gate 			/*
3157c478bd9Sstevel@tonic-gate 			 * If PIO error, we need to query the bus nexus
3167c478bd9Sstevel@tonic-gate 			 * for fatal errors.
3177c478bd9Sstevel@tonic-gate 			 */
3187c478bd9Sstevel@tonic-gate 			if (aflt->flt_class == BUS_FAULT) {
319*9d0d62adSJason Beloro 				aflt->flt_addr = errh_flt.errh_er.ra;
3207c478bd9Sstevel@tonic-gate 				errh_cpu_run_bus_error_handlers(aflt,
3217c478bd9Sstevel@tonic-gate 				    expected);
3227c478bd9Sstevel@tonic-gate 			}
3237c478bd9Sstevel@tonic-gate 
3247c478bd9Sstevel@tonic-gate 			break;
3257c478bd9Sstevel@tonic-gate 
326b72ed0ffSiskreen 		case ERRH_DESC_USER_DCORE:
327b72ed0ffSiskreen 			/*
328b72ed0ffSiskreen 			 * User generated panic. Call panic directly
329b72ed0ffSiskreen 			 * since there are no FMA e-reports to
330b72ed0ffSiskreen 			 * display.
331b72ed0ffSiskreen 			 */
332b72ed0ffSiskreen 
333b72ed0ffSiskreen 			panic("Panic - Generated at user request");
334b72ed0ffSiskreen 
335b72ed0ffSiskreen 			break;
336b72ed0ffSiskreen 
3377c478bd9Sstevel@tonic-gate 		default:
3384cacbdcfSwh 			cmn_err(CE_WARN, "Panic - Error Descriptor 0x%llx "
3394cacbdcfSwh 			    " invalid in non-resumable error handler",
3407c478bd9Sstevel@tonic-gate 			    (long long) errh_flt.errh_er.desc);
3414cacbdcfSwh 			aflt->flt_panic = 1;
3424cacbdcfSwh 			break;
3437c478bd9Sstevel@tonic-gate 		}
3447c478bd9Sstevel@tonic-gate 
3457c478bd9Sstevel@tonic-gate 		/*
3467c478bd9Sstevel@tonic-gate 		 * Queue the error report for further processing. If
3477c478bd9Sstevel@tonic-gate 		 * flt_panic is set, code still process other errors
3487c478bd9Sstevel@tonic-gate 		 * in the queue until the panic routine stops the
3497c478bd9Sstevel@tonic-gate 		 * kernel.
3507c478bd9Sstevel@tonic-gate 		 */
3517c478bd9Sstevel@tonic-gate 		(void) cpu_queue_one_event(&errh_flt);
3527c478bd9Sstevel@tonic-gate 
3537c478bd9Sstevel@tonic-gate 		/*
3547c478bd9Sstevel@tonic-gate 		 * Panic here if aflt->flt_panic has been set.
3557c478bd9Sstevel@tonic-gate 		 * Enqueued errors will be logged as part of the panic flow.
3567c478bd9Sstevel@tonic-gate 		 */
3577c478bd9Sstevel@tonic-gate 		if (aflt->flt_panic) {
3587c478bd9Sstevel@tonic-gate 			fm_panic("Unrecoverable hardware error");
3597c478bd9Sstevel@tonic-gate 		}
3607c478bd9Sstevel@tonic-gate 
3617c478bd9Sstevel@tonic-gate 		/*
362db874c57Selowe 		 * Call page_retire() to handle memory errors.
3637c478bd9Sstevel@tonic-gate 		 */
3647c478bd9Sstevel@tonic-gate 		if (errh_flt.errh_er.attr & ERRH_ATTR_MEM)
365db874c57Selowe 			errh_page_retire(&errh_flt, PR_UE);
3667c478bd9Sstevel@tonic-gate 
3677c478bd9Sstevel@tonic-gate 		/*
368*9d0d62adSJason Beloro 		 * If we queued an error and the it was in user mode, or
369*9d0d62adSJason Beloro 		 * protected by t_lofault, or user_spill_fill is set, we
370*9d0d62adSJason Beloro 		 * set AST flag so the queue will be drained before
371*9d0d62adSJason Beloro 		 * returning to user mode.
3727c478bd9Sstevel@tonic-gate 		 */
373*9d0d62adSJason Beloro 		if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY ||
374*9d0d62adSJason Beloro 		    u_spill_fill) {
3757c478bd9Sstevel@tonic-gate 			int pcb_flag = 0;
3767c478bd9Sstevel@tonic-gate 
3777c478bd9Sstevel@tonic-gate 			if (aflt->flt_class == CPU_FAULT)
3787c478bd9Sstevel@tonic-gate 				pcb_flag |= ASYNC_HWERR;
3797c478bd9Sstevel@tonic-gate 			else if (aflt->flt_class == BUS_FAULT)
3807c478bd9Sstevel@tonic-gate 				pcb_flag |= ASYNC_BERR;
3817c478bd9Sstevel@tonic-gate 
3827c478bd9Sstevel@tonic-gate 			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
3837c478bd9Sstevel@tonic-gate 			aston(curthread);
3847c478bd9Sstevel@tonic-gate 		}
3857c478bd9Sstevel@tonic-gate 	}
3867c478bd9Sstevel@tonic-gate }
3877c478bd9Sstevel@tonic-gate 
3887c478bd9Sstevel@tonic-gate /*
3897c478bd9Sstevel@tonic-gate  * For PIO errors, this routine calls nexus driver's error
3907c478bd9Sstevel@tonic-gate  * callback routines. If the callback routine returns fatal, and
3917c478bd9Sstevel@tonic-gate  * we are in kernel or unknow mode without any error protection,
3927c478bd9Sstevel@tonic-gate  * we need to turn on the panic flag.
3937c478bd9Sstevel@tonic-gate  */
3947c478bd9Sstevel@tonic-gate void
3957c478bd9Sstevel@tonic-gate errh_cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
3967c478bd9Sstevel@tonic-gate {
3977c478bd9Sstevel@tonic-gate 	int status;
3987c478bd9Sstevel@tonic-gate 	ddi_fm_error_t de;
3997c478bd9Sstevel@tonic-gate 
4007c478bd9Sstevel@tonic-gate 	bzero(&de, sizeof (ddi_fm_error_t));
4017c478bd9Sstevel@tonic-gate 
4027c478bd9Sstevel@tonic-gate 	de.fme_version = DDI_FME_VERSION;
4037c478bd9Sstevel@tonic-gate 	de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1);
4047c478bd9Sstevel@tonic-gate 	de.fme_flag = expected;
4057c478bd9Sstevel@tonic-gate 	de.fme_bus_specific = (void *)aflt->flt_addr;
4067c478bd9Sstevel@tonic-gate 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
4077c478bd9Sstevel@tonic-gate 
4087c478bd9Sstevel@tonic-gate 	/*
4097c478bd9Sstevel@tonic-gate 	 * If error is protected, it will jump to proper routine
4107c478bd9Sstevel@tonic-gate 	 * to handle the handle; if it is in user level, we just
4117c478bd9Sstevel@tonic-gate 	 * kill the user process; if the driver thinks the error is
4127c478bd9Sstevel@tonic-gate 	 * not fatal, we can drive on. If none of above are true,
4137c478bd9Sstevel@tonic-gate 	 * we panic
4147c478bd9Sstevel@tonic-gate 	 */
4157c478bd9Sstevel@tonic-gate 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (aflt->flt_priv == 1) &&
4167c478bd9Sstevel@tonic-gate 	    (status == DDI_FM_FATAL))
4177c478bd9Sstevel@tonic-gate 		aflt->flt_panic = 1;
4187c478bd9Sstevel@tonic-gate }
4197c478bd9Sstevel@tonic-gate 
4207c478bd9Sstevel@tonic-gate /*
4217c478bd9Sstevel@tonic-gate  * This routine checks to see if we are under any error protection when
4227c478bd9Sstevel@tonic-gate  * the error happens. If we are under error protection, we unwind to
4237c478bd9Sstevel@tonic-gate  * the protection and indicate fault.
4247c478bd9Sstevel@tonic-gate  */
4257c478bd9Sstevel@tonic-gate static int
4267c478bd9Sstevel@tonic-gate errh_error_protected(struct regs *rp, struct async_flt *aflt, int *expected)
4277c478bd9Sstevel@tonic-gate {
4287c478bd9Sstevel@tonic-gate 	int trampolined = 0;
4297c478bd9Sstevel@tonic-gate 	ddi_acc_hdl_t *hp;
4307c478bd9Sstevel@tonic-gate 
4317c478bd9Sstevel@tonic-gate 	if (curthread->t_ontrap != NULL) {
4327c478bd9Sstevel@tonic-gate 		on_trap_data_t *otp = curthread->t_ontrap;
4337c478bd9Sstevel@tonic-gate 
4347c478bd9Sstevel@tonic-gate 		if (otp->ot_prot & OT_DATA_EC) {
4357c478bd9Sstevel@tonic-gate 			aflt->flt_prot = AFLT_PROT_EC;
4367c478bd9Sstevel@tonic-gate 			otp->ot_trap |= OT_DATA_EC;
4377c478bd9Sstevel@tonic-gate 			rp->r_pc = otp->ot_trampoline;
4387c478bd9Sstevel@tonic-gate 			rp->r_npc = rp->r_pc +4;
4397c478bd9Sstevel@tonic-gate 			trampolined = 1;
4407c478bd9Sstevel@tonic-gate 		}
4417c478bd9Sstevel@tonic-gate 
4427c478bd9Sstevel@tonic-gate 		if (otp->ot_prot & OT_DATA_ACCESS) {
4437c478bd9Sstevel@tonic-gate 			aflt->flt_prot = AFLT_PROT_ACCESS;
4447c478bd9Sstevel@tonic-gate 			otp->ot_trap |= OT_DATA_ACCESS;
4457c478bd9Sstevel@tonic-gate 			rp->r_pc = otp->ot_trampoline;
4467c478bd9Sstevel@tonic-gate 			rp->r_npc = rp->r_pc + 4;
4477c478bd9Sstevel@tonic-gate 			trampolined = 1;
4487c478bd9Sstevel@tonic-gate 			/*
4497c478bd9Sstevel@tonic-gate 			 * for peek and caut_gets
4507c478bd9Sstevel@tonic-gate 			 * errors are expected
4517c478bd9Sstevel@tonic-gate 			 */
4527c478bd9Sstevel@tonic-gate 			hp = (ddi_acc_hdl_t *)otp->ot_handle;
4537c478bd9Sstevel@tonic-gate 			if (!hp)
4547c478bd9Sstevel@tonic-gate 				*expected = DDI_FM_ERR_PEEK;
4557c478bd9Sstevel@tonic-gate 			else if (hp->ah_acc.devacc_attr_access ==
4567c478bd9Sstevel@tonic-gate 			    DDI_CAUTIOUS_ACC)
4577c478bd9Sstevel@tonic-gate 				*expected = DDI_FM_ERR_EXPECTED;
4587c478bd9Sstevel@tonic-gate 		}
4597c478bd9Sstevel@tonic-gate 	} else if (curthread->t_lofault) {
4607c478bd9Sstevel@tonic-gate 		aflt->flt_prot = AFLT_PROT_COPY;
4617c478bd9Sstevel@tonic-gate 		rp->r_g1 = EFAULT;
4627c478bd9Sstevel@tonic-gate 		rp->r_pc = curthread->t_lofault;
4637c478bd9Sstevel@tonic-gate 		rp->r_npc = rp->r_pc + 4;
4647c478bd9Sstevel@tonic-gate 		trampolined = 1;
4657c478bd9Sstevel@tonic-gate 	}
4667c478bd9Sstevel@tonic-gate 
4677c478bd9Sstevel@tonic-gate 	return (trampolined);
4687c478bd9Sstevel@tonic-gate }
4697c478bd9Sstevel@tonic-gate 
4707c478bd9Sstevel@tonic-gate /*
4717c478bd9Sstevel@tonic-gate  * Queue one event.
4727c478bd9Sstevel@tonic-gate  */
4737c478bd9Sstevel@tonic-gate static void
4747c478bd9Sstevel@tonic-gate cpu_queue_one_event(errh_async_flt_t *errh_fltp)
4757c478bd9Sstevel@tonic-gate {
4767c478bd9Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)errh_fltp;
4777c478bd9Sstevel@tonic-gate 	errorq_t *eqp;
4787c478bd9Sstevel@tonic-gate 
4797c478bd9Sstevel@tonic-gate 	if (aflt->flt_panic)
4807c478bd9Sstevel@tonic-gate 		eqp = ue_queue;
4817c478bd9Sstevel@tonic-gate 	else
4827c478bd9Sstevel@tonic-gate 		eqp = ce_queue;
4837c478bd9Sstevel@tonic-gate 
4847c478bd9Sstevel@tonic-gate 	errorq_dispatch(eqp, errh_fltp, sizeof (errh_async_flt_t),
4857c478bd9Sstevel@tonic-gate 	    aflt->flt_panic);
4867c478bd9Sstevel@tonic-gate }
4877c478bd9Sstevel@tonic-gate 
4887c478bd9Sstevel@tonic-gate /*
4897c478bd9Sstevel@tonic-gate  * The cpu_async_log_err() function is called by the ce/ue_drain() function to
4907c478bd9Sstevel@tonic-gate  * handle logging for CPU events that are dequeued.  As such, it can be invoked
4917c478bd9Sstevel@tonic-gate  * from softint context, from AST processing in the trap() flow, or from the
4927c478bd9Sstevel@tonic-gate  * panic flow.  We decode the CPU-specific data, and log appropriate messages.
4937c478bd9Sstevel@tonic-gate  */
4947c478bd9Sstevel@tonic-gate void
4957c478bd9Sstevel@tonic-gate cpu_async_log_err(void *flt)
4967c478bd9Sstevel@tonic-gate {
4977c478bd9Sstevel@tonic-gate 	errh_async_flt_t *errh_fltp = (errh_async_flt_t *)flt;
4987c478bd9Sstevel@tonic-gate 	errh_er_t *errh_erp = (errh_er_t *)&errh_fltp->errh_er;
4997c478bd9Sstevel@tonic-gate 
5007c478bd9Sstevel@tonic-gate 	switch (errh_erp->desc) {
5017c478bd9Sstevel@tonic-gate 	case ERRH_DESC_UCOR_RE:
5027c478bd9Sstevel@tonic-gate 		if (errh_erp->attr & ERRH_ATTR_MEM) {
5037c478bd9Sstevel@tonic-gate 			/*
504db874c57Selowe 			 * Turn on the PR_UE flag. The page will be
5057c478bd9Sstevel@tonic-gate 			 * scrubbed when it is freed.
5067c478bd9Sstevel@tonic-gate 			 */
507db874c57Selowe 			errh_page_retire(errh_fltp, PR_UE);
5087c478bd9Sstevel@tonic-gate 		}
5097c478bd9Sstevel@tonic-gate 
5107c478bd9Sstevel@tonic-gate 		break;
5117c478bd9Sstevel@tonic-gate 
5127c478bd9Sstevel@tonic-gate 	case ERRH_DESC_PR_NRE:
5137c478bd9Sstevel@tonic-gate 	case ERRH_DESC_DEF_NRE:
5147c478bd9Sstevel@tonic-gate 		if (errh_erp->attr & ERRH_ATTR_MEM) {
5157c478bd9Sstevel@tonic-gate 			/*
5167c478bd9Sstevel@tonic-gate 			 * For non-resumable memory error, retire
5177c478bd9Sstevel@tonic-gate 			 * the page here.
5187c478bd9Sstevel@tonic-gate 			 */
519db874c57Selowe 			errh_page_retire(errh_fltp, PR_UE);
5203461bce3Swh 
5213461bce3Swh 			/*
5223461bce3Swh 			 * If we are going to panic, scrub the page first
5233461bce3Swh 			 */
5243461bce3Swh 			if (errh_fltp->cmn_asyncflt.flt_panic)
525*9d0d62adSJason Beloro 				mem_scrub(errh_fltp->errh_er.ra,
5263461bce3Swh 				    errh_fltp->errh_er.sz);
5277c478bd9Sstevel@tonic-gate 		}
5287c478bd9Sstevel@tonic-gate 		break;
5297c478bd9Sstevel@tonic-gate 
5307c478bd9Sstevel@tonic-gate 	default:
5317c478bd9Sstevel@tonic-gate 		break;
5327c478bd9Sstevel@tonic-gate 	}
5337c478bd9Sstevel@tonic-gate }
5347c478bd9Sstevel@tonic-gate 
5357c478bd9Sstevel@tonic-gate /*
5367c478bd9Sstevel@tonic-gate  * Called from ce_drain().
5377c478bd9Sstevel@tonic-gate  */
5387c478bd9Sstevel@tonic-gate void
5397c478bd9Sstevel@tonic-gate cpu_ce_log_err(struct async_flt *aflt)
5407c478bd9Sstevel@tonic-gate {
5417c478bd9Sstevel@tonic-gate 	switch (aflt->flt_class) {
5427c478bd9Sstevel@tonic-gate 	case CPU_FAULT:
5437c478bd9Sstevel@tonic-gate 		cpu_async_log_err(aflt);
5447c478bd9Sstevel@tonic-gate 		break;
5457c478bd9Sstevel@tonic-gate 
5467c478bd9Sstevel@tonic-gate 	case BUS_FAULT:
5477c478bd9Sstevel@tonic-gate 		cpu_async_log_err(aflt);
5487c478bd9Sstevel@tonic-gate 		break;
5497c478bd9Sstevel@tonic-gate 
5507c478bd9Sstevel@tonic-gate 	default:
5517c478bd9Sstevel@tonic-gate 		break;
5527c478bd9Sstevel@tonic-gate 	}
5537c478bd9Sstevel@tonic-gate }
5547c478bd9Sstevel@tonic-gate 
5557c478bd9Sstevel@tonic-gate /*
5567c478bd9Sstevel@tonic-gate  * Called from ue_drain().
5577c478bd9Sstevel@tonic-gate  */
5587c478bd9Sstevel@tonic-gate void
5597c478bd9Sstevel@tonic-gate cpu_ue_log_err(struct async_flt *aflt)
5607c478bd9Sstevel@tonic-gate {
5617c478bd9Sstevel@tonic-gate 	switch (aflt->flt_class) {
5627c478bd9Sstevel@tonic-gate 	case CPU_FAULT:
5637c478bd9Sstevel@tonic-gate 		cpu_async_log_err(aflt);
5647c478bd9Sstevel@tonic-gate 		break;
5657c478bd9Sstevel@tonic-gate 
5667c478bd9Sstevel@tonic-gate 	case BUS_FAULT:
5677c478bd9Sstevel@tonic-gate 		cpu_async_log_err(aflt);
5687c478bd9Sstevel@tonic-gate 		break;
5697c478bd9Sstevel@tonic-gate 
5707c478bd9Sstevel@tonic-gate 	default:
5717c478bd9Sstevel@tonic-gate 		break;
5727c478bd9Sstevel@tonic-gate 	}
5737c478bd9Sstevel@tonic-gate }
5747c478bd9Sstevel@tonic-gate 
5757c478bd9Sstevel@tonic-gate /*
5767c478bd9Sstevel@tonic-gate  * Turn on flag on the error memory region.
5777c478bd9Sstevel@tonic-gate  */
5787c478bd9Sstevel@tonic-gate static void
579db874c57Selowe errh_page_retire(errh_async_flt_t *errh_fltp, uchar_t flag)
5807c478bd9Sstevel@tonic-gate {
581*9d0d62adSJason Beloro 	uint64_t flt_real_addr_start = errh_fltp->errh_er.ra;
5827c478bd9Sstevel@tonic-gate 	uint64_t flt_real_addr_end = flt_real_addr_start +
5837c478bd9Sstevel@tonic-gate 	    errh_fltp->errh_er.sz - 1;
5847c478bd9Sstevel@tonic-gate 	int64_t current_addr;
5857c478bd9Sstevel@tonic-gate 
5867c478bd9Sstevel@tonic-gate 	if (errh_fltp->errh_er.sz == 0)
5877c478bd9Sstevel@tonic-gate 		return;
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate 	for (current_addr = flt_real_addr_start;
5907c478bd9Sstevel@tonic-gate 	    current_addr < flt_real_addr_end; current_addr += MMU_PAGESIZE) {
591db874c57Selowe 		(void) page_retire(current_addr, flag);
5927c478bd9Sstevel@tonic-gate 	}
5937c478bd9Sstevel@tonic-gate }
5947c478bd9Sstevel@tonic-gate 
5957c478bd9Sstevel@tonic-gate void
5967c478bd9Sstevel@tonic-gate mem_scrub(uint64_t paddr, uint64_t len)
5977c478bd9Sstevel@tonic-gate {
5987c478bd9Sstevel@tonic-gate 	uint64_t pa, length, scrubbed_len;
5997c478bd9Sstevel@tonic-gate 
6007c478bd9Sstevel@tonic-gate 	pa = paddr;
6017c478bd9Sstevel@tonic-gate 	length = len;
6027c478bd9Sstevel@tonic-gate 	scrubbed_len = 0;
6037c478bd9Sstevel@tonic-gate 
6043461bce3Swh 	while (length > 0) {
6053461bce3Swh 		if (hv_mem_scrub(pa, length, &scrubbed_len) != H_EOK)
6067c478bd9Sstevel@tonic-gate 			break;
6077c478bd9Sstevel@tonic-gate 
6087c478bd9Sstevel@tonic-gate 		pa += scrubbed_len;
6097c478bd9Sstevel@tonic-gate 		length -= scrubbed_len;
6107c478bd9Sstevel@tonic-gate 	}
6117c478bd9Sstevel@tonic-gate }
6127c478bd9Sstevel@tonic-gate 
613ad559ebfSwh /*
6142ae0af4bSep  * Call hypervisor to flush the memory region.
6152ae0af4bSep  * Both va and len must be MMU_PAGESIZE aligned.
6162ae0af4bSep  * Returns the total number of bytes flushed.
617ad559ebfSwh  */
6182ae0af4bSep uint64_t
6195ccb2ff8Swh mem_sync(caddr_t orig_va, size_t orig_len)
6207c478bd9Sstevel@tonic-gate {
6217c478bd9Sstevel@tonic-gate 	uint64_t pa, length, flushed;
6222ae0af4bSep 	uint64_t chunk_len = MMU_PAGESIZE;
6232ae0af4bSep 	uint64_t total_flushed = 0;
6245ccb2ff8Swh 	uint64_t va, len;
6257c478bd9Sstevel@tonic-gate 
6265ccb2ff8Swh 	if (orig_len == 0)
6272ae0af4bSep 		return (total_flushed);
6287c478bd9Sstevel@tonic-gate 
6295ccb2ff8Swh 	/* align va */
6305ccb2ff8Swh 	va = P2ALIGN_TYPED(orig_va, MMU_PAGESIZE, uint64_t);
6315ccb2ff8Swh 	/* round up len to MMU_PAGESIZE aligned */
6325ccb2ff8Swh 	len = P2ROUNDUP_TYPED(orig_va + orig_len, MMU_PAGESIZE, uint64_t) - va;
6335ccb2ff8Swh 
6342ae0af4bSep 	while (len > 0) {
6352ae0af4bSep 		pa = va_to_pa((caddr_t)va);
6362ae0af4bSep 		if (pa == (uint64_t)-1)
6372ae0af4bSep 			return (total_flushed);
6387c478bd9Sstevel@tonic-gate 
6392ae0af4bSep 		length = chunk_len;
6402ae0af4bSep 		flushed = 0;
641ad559ebfSwh 
6422ae0af4bSep 		while (length > 0) {
6432ae0af4bSep 			if (hv_mem_sync(pa, length, &flushed) != H_EOK)
6442ae0af4bSep 				return (total_flushed);
6457c478bd9Sstevel@tonic-gate 
6462ae0af4bSep 			pa += flushed;
6472ae0af4bSep 			length -= flushed;
6482ae0af4bSep 			total_flushed += flushed;
6492ae0af4bSep 		}
6507c478bd9Sstevel@tonic-gate 
6512ae0af4bSep 		va += chunk_len;
6522ae0af4bSep 		len -= chunk_len;
6537c478bd9Sstevel@tonic-gate 	}
6542ae0af4bSep 
6552ae0af4bSep 	return (total_flushed);
6567c478bd9Sstevel@tonic-gate }
6577c478bd9Sstevel@tonic-gate 
6587c478bd9Sstevel@tonic-gate /*
6597c478bd9Sstevel@tonic-gate  * If resumable queue is full, we need to check if any cpu is in
6607c478bd9Sstevel@tonic-gate  * error state. If not, we drive on. If yes, we need to panic. The
6617c478bd9Sstevel@tonic-gate  * hypervisor call hv_cpu_state() is being used for checking the
662367c34e9Srf  * cpu state.  And reset %tick_compr in case tick-compare was lost.
6637c478bd9Sstevel@tonic-gate  */
6647c478bd9Sstevel@tonic-gate static void
6657c478bd9Sstevel@tonic-gate errh_rq_full(struct async_flt *afltp)
6667c478bd9Sstevel@tonic-gate {
6677c478bd9Sstevel@tonic-gate 	processorid_t who;
6687c478bd9Sstevel@tonic-gate 	uint64_t cpu_state;
6697c478bd9Sstevel@tonic-gate 	uint64_t retval;
670367c34e9Srf 	uint64_t current_tick;
671367c34e9Srf 
672367c34e9Srf 	current_tick = (uint64_t)gettick();
673367c34e9Srf 	tickcmpr_set(current_tick);
6747c478bd9Sstevel@tonic-gate 
6757c478bd9Sstevel@tonic-gate 	for (who = 0; who < NCPU; who++)
6767c478bd9Sstevel@tonic-gate 		if (CPU_IN_SET(cpu_ready_set, who)) {
6777c478bd9Sstevel@tonic-gate 			retval = hv_cpu_state(who, &cpu_state);
6787c478bd9Sstevel@tonic-gate 			if (retval != H_EOK || cpu_state == CPU_STATE_ERROR) {
6797c478bd9Sstevel@tonic-gate 				afltp->flt_panic = 1;
6807c478bd9Sstevel@tonic-gate 				break;
6817c478bd9Sstevel@tonic-gate 			}
6827c478bd9Sstevel@tonic-gate 		}
6837c478bd9Sstevel@tonic-gate }
6847c478bd9Sstevel@tonic-gate 
6857c478bd9Sstevel@tonic-gate /*
6867c478bd9Sstevel@tonic-gate  * Return processor specific async error structure
6877c478bd9Sstevel@tonic-gate  * size used.
6887c478bd9Sstevel@tonic-gate  */
6897c478bd9Sstevel@tonic-gate int
6907c478bd9Sstevel@tonic-gate cpu_aflt_size(void)
6917c478bd9Sstevel@tonic-gate {
6927c478bd9Sstevel@tonic-gate 	return (sizeof (errh_async_flt_t));
6937c478bd9Sstevel@tonic-gate }
6947c478bd9Sstevel@tonic-gate 
6957c478bd9Sstevel@tonic-gate #define	SZ_TO_ETRS_SHIFT	6
6967c478bd9Sstevel@tonic-gate 
6977c478bd9Sstevel@tonic-gate /*
6987c478bd9Sstevel@tonic-gate  * Message print out when resumable queue is overflown
6997c478bd9Sstevel@tonic-gate  */
7007c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7017c478bd9Sstevel@tonic-gate void
7027c478bd9Sstevel@tonic-gate rq_overflow(struct regs *rp, uint64_t head_offset,
7037c478bd9Sstevel@tonic-gate     uint64_t tail_offset)
7047c478bd9Sstevel@tonic-gate {
7057c478bd9Sstevel@tonic-gate 	rq_overflow_count++;
7067c478bd9Sstevel@tonic-gate }
7077c478bd9Sstevel@tonic-gate 
7087c478bd9Sstevel@tonic-gate /*
7097c478bd9Sstevel@tonic-gate  * Handler to process a fatal error.  This routine can be called from a
7107c478bd9Sstevel@tonic-gate  * softint, called from trap()'s AST handling, or called from the panic flow.
7117c478bd9Sstevel@tonic-gate  */
7127c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7137c478bd9Sstevel@tonic-gate static void
7147c478bd9Sstevel@tonic-gate ue_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep)
7157c478bd9Sstevel@tonic-gate {
7167c478bd9Sstevel@tonic-gate 	cpu_ue_log_err(aflt);
7177c478bd9Sstevel@tonic-gate }
7187c478bd9Sstevel@tonic-gate 
7197c478bd9Sstevel@tonic-gate /*
7207c478bd9Sstevel@tonic-gate  * Handler to process a correctable error.  This routine can be called from a
7217c478bd9Sstevel@tonic-gate  * softint.  We just call the CPU module's logging routine.
7227c478bd9Sstevel@tonic-gate  */
7237c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7247c478bd9Sstevel@tonic-gate static void
7257c478bd9Sstevel@tonic-gate ce_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep)
7267c478bd9Sstevel@tonic-gate {
7277c478bd9Sstevel@tonic-gate 	cpu_ce_log_err(aflt);
7287c478bd9Sstevel@tonic-gate }
7297c478bd9Sstevel@tonic-gate 
7303cac8019Srf /*
7313cac8019Srf  * Handler to process vbsc hostshutdown (power-off button).
7323cac8019Srf  */
7333cac8019Srf static int
7343cac8019Srf err_shutdown_softintr()
7353cac8019Srf {
7363cac8019Srf 	cmn_err(CE_WARN, "Power-off requested, system will now shutdown.");
7373cac8019Srf 	do_shutdown();
7383cac8019Srf 
7393cac8019Srf 	/*
7403cac8019Srf 	 * just in case do_shutdown() fails
7413cac8019Srf 	 */
7423cac8019Srf 	(void) timeout((void(*)(void *))power_down, NULL, 100 * hz);
7433cac8019Srf 	return (DDI_INTR_CLAIMED);
7443cac8019Srf }
7453cac8019Srf 
7467c478bd9Sstevel@tonic-gate /*
7477c478bd9Sstevel@tonic-gate  * Allocate error queue sizes based on max_ncpus.  max_ncpus is set just
7487c478bd9Sstevel@tonic-gate  * after ncpunode has been determined.  ncpus is set in start_other_cpus
7497c478bd9Sstevel@tonic-gate  * which is called after error_init() but may change dynamically.
7507c478bd9Sstevel@tonic-gate  */
7517c478bd9Sstevel@tonic-gate void
7527c478bd9Sstevel@tonic-gate error_init(void)
7537c478bd9Sstevel@tonic-gate {
7547c478bd9Sstevel@tonic-gate 	char tmp_name[MAXSYSNAME];
755fa9e4066Sahrens 	pnode_t node;
7567c478bd9Sstevel@tonic-gate 	size_t size = cpu_aflt_size();
7577c478bd9Sstevel@tonic-gate 
7587c478bd9Sstevel@tonic-gate 	/*
7597c478bd9Sstevel@tonic-gate 	 * Initialize the correctable and uncorrectable error queues.
7607c478bd9Sstevel@tonic-gate 	 */
7617c478bd9Sstevel@tonic-gate 	ue_queue = errorq_create("ue_queue", (errorq_func_t)ue_drain, NULL,
7627c478bd9Sstevel@tonic-gate 	    MAX_ASYNC_FLTS * (max_ncpus + 1), size, PIL_2, ERRORQ_VITAL);
7637c478bd9Sstevel@tonic-gate 
7647c478bd9Sstevel@tonic-gate 	ce_queue = errorq_create("ce_queue", (errorq_func_t)ce_drain, NULL,
7657c478bd9Sstevel@tonic-gate 	    MAX_CE_FLTS * (max_ncpus + 1), size, PIL_1, 0);
7667c478bd9Sstevel@tonic-gate 
7677c478bd9Sstevel@tonic-gate 	if (ue_queue == NULL || ce_queue == NULL)
7687c478bd9Sstevel@tonic-gate 		panic("failed to create required system error queue");
7697c478bd9Sstevel@tonic-gate 
7703cac8019Srf 	/*
7713cac8019Srf 	 * Setup interrupt handler for power-off button.
7723cac8019Srf 	 */
7733cac8019Srf 	err_shutdown_inum = add_softintr(PIL_9,
774b0fc0e77Sgovinda 	    (softintrfunc)err_shutdown_softintr, NULL, SOFTINT_ST);
7753cac8019Srf 
7767c478bd9Sstevel@tonic-gate 	/*
7777c478bd9Sstevel@tonic-gate 	 * Initialize the busfunc list mutex.  This must be a PIL_15 spin lock
7787c478bd9Sstevel@tonic-gate 	 * because we will need to acquire it from cpu_async_error().
7797c478bd9Sstevel@tonic-gate 	 */
7807c478bd9Sstevel@tonic-gate 	mutex_init(&bfd_lock, NULL, MUTEX_SPIN, (void *)PIL_15);
7817c478bd9Sstevel@tonic-gate 
782a60fc142Srf 	/* Only allow one cpu at a time to dump errh errors. */
783a60fc142Srf 	mutex_init(&errh_print_lock, NULL, MUTEX_SPIN, (void *)PIL_15);
784a60fc142Srf 
7857c478bd9Sstevel@tonic-gate 	node = prom_rootnode();
7867c478bd9Sstevel@tonic-gate 	if ((node == OBP_NONODE) || (node == OBP_BADNODE)) {
7877c478bd9Sstevel@tonic-gate 		cmn_err(CE_CONT, "error_init: node 0x%x\n", (uint_t)node);
7887c478bd9Sstevel@tonic-gate 		return;
7897c478bd9Sstevel@tonic-gate 	}
7907c478bd9Sstevel@tonic-gate 
7917c478bd9Sstevel@tonic-gate 	if (((size = prom_getproplen(node, "reset-reason")) != -1) &&
7927c478bd9Sstevel@tonic-gate 	    (size <= MAXSYSNAME) &&
7937c478bd9Sstevel@tonic-gate 	    (prom_getprop(node, "reset-reason", tmp_name) != -1)) {
7947c478bd9Sstevel@tonic-gate 		if (reset_debug) {
7957c478bd9Sstevel@tonic-gate 			cmn_err(CE_CONT, "System booting after %s\n", tmp_name);
7967c478bd9Sstevel@tonic-gate 		} else if (strncmp(tmp_name, "FATAL", 5) == 0) {
7977c478bd9Sstevel@tonic-gate 			cmn_err(CE_CONT,
7987c478bd9Sstevel@tonic-gate 			    "System booting after fatal error %s\n", tmp_name);
7997c478bd9Sstevel@tonic-gate 		}
8007c478bd9Sstevel@tonic-gate 	}
8017c478bd9Sstevel@tonic-gate }
80218aea0b1Swh 
80318aea0b1Swh /*
80418aea0b1Swh  * Nonresumable queue is full, panic here
80518aea0b1Swh  */
80618aea0b1Swh /*ARGSUSED*/
80718aea0b1Swh void
80818aea0b1Swh nrq_overflow(struct regs *rp)
80918aea0b1Swh {
81018aea0b1Swh 	fm_panic("Nonresumable queue full");
81118aea0b1Swh }
81244961713Sgirish 
81344961713Sgirish /*
81444961713Sgirish  * This is the place for special error handling for individual errors.
81544961713Sgirish  */
81644961713Sgirish static void
81744961713Sgirish errh_handle_attr(errh_async_flt_t *errh_fltp)
81844961713Sgirish {
81944961713Sgirish 	switch (errh_fltp->errh_er.attr & ~ERRH_MODE_MASK) {
82044961713Sgirish 	case ERRH_ATTR_CPU:
82144961713Sgirish 	case ERRH_ATTR_MEM:
82244961713Sgirish 	case ERRH_ATTR_PIO:
82344961713Sgirish 	case ERRH_ATTR_IRF:
82444961713Sgirish 	case ERRH_ATTR_FRF:
82544961713Sgirish 	case ERRH_ATTR_SHUT:
82644961713Sgirish 		break;
82744961713Sgirish 
82844961713Sgirish 	case ERRH_ATTR_ASR:
82944961713Sgirish 		errh_handle_asr(errh_fltp);
83044961713Sgirish 		break;
83144961713Sgirish 
83244961713Sgirish 	case ERRH_ATTR_ASI:
83344961713Sgirish 	case ERRH_ATTR_PREG:
83444961713Sgirish 	case ERRH_ATTR_RQF:
83544961713Sgirish 		break;
83644961713Sgirish 
83744961713Sgirish 	default:
83844961713Sgirish 		break;
83944961713Sgirish 	}
84044961713Sgirish }
84144961713Sgirish 
84244961713Sgirish /*
84344961713Sgirish  * Handle ASR bit set in ATTR
84444961713Sgirish  */
84544961713Sgirish static void
84644961713Sgirish errh_handle_asr(errh_async_flt_t *errh_fltp)
84744961713Sgirish {
84844961713Sgirish 	uint64_t current_tick;
84944961713Sgirish 
85044961713Sgirish 	switch (errh_fltp->errh_er.reg) {
85144961713Sgirish 	case ASR_REG_VALID | ASR_REG_TICK:
85244961713Sgirish 		/*
85344961713Sgirish 		 * For Tick Compare Register error, it only happens when
85444961713Sgirish 		 * the register is being read or compared with the %tick
85544961713Sgirish 		 * register. Since we lost the contents of the register,
85644961713Sgirish 		 * we set the %tick_compr in the future. An interrupt will
85744961713Sgirish 		 * happen when %tick matches the value field of %tick_compr.
85844961713Sgirish 		 */
85944961713Sgirish 		current_tick = (uint64_t)gettick();
86044961713Sgirish 		tickcmpr_set(current_tick);
86144961713Sgirish 		/* Do not panic */
86244961713Sgirish 		errh_fltp->cmn_asyncflt.flt_panic = 0;
86344961713Sgirish 		break;
86444961713Sgirish 
86544961713Sgirish 	default:
86644961713Sgirish 		break;
86744961713Sgirish 	}
86844961713Sgirish }
869a60fc142Srf 
870a60fc142Srf /*
871a60fc142Srf  * Dump the error packet
872a60fc142Srf  */
873a60fc142Srf /*ARGSUSED*/
874a60fc142Srf static void
875a60fc142Srf errh_er_print(errh_er_t *errh_erp, const char *queue)
876a60fc142Srf {
877a60fc142Srf 	typedef union {
878a60fc142Srf 		uint64_t w;
879a60fc142Srf 		uint16_t s[4];
880a60fc142Srf 	} errhp_t;
881a60fc142Srf 	errhp_t *p = (errhp_t *)errh_erp;
882a60fc142Srf 	int i;
883a60fc142Srf 
884a60fc142Srf 	mutex_enter(&errh_print_lock);
885a60fc142Srf 	switch (errh_erp->desc) {
886a60fc142Srf 	case ERRH_DESC_UCOR_RE:
887a60fc142Srf 		cmn_err(CE_CONT, "\nResumable Uncorrectable Error ");
888a60fc142Srf 		break;
889a60fc142Srf 	case ERRH_DESC_PR_NRE:
890a60fc142Srf 		cmn_err(CE_CONT, "\nNonresumable Precise Error ");
891a60fc142Srf 		break;
892a60fc142Srf 	case ERRH_DESC_DEF_NRE:
893a60fc142Srf 		cmn_err(CE_CONT, "\nNonresumable Deferred Error ");
894a60fc142Srf 		break;
895a60fc142Srf 	default:
896a60fc142Srf 		cmn_err(CE_CONT, "\nError packet ");
897a60fc142Srf 		break;
898a60fc142Srf 	}
899a60fc142Srf 	cmn_err(CE_CONT, "received on %s\n", queue);
900a60fc142Srf 
901a60fc142Srf 	/*
902a60fc142Srf 	 * Print Q_ENTRY_SIZE bytes of epacket with 8 bytes per line
903a60fc142Srf 	 */
904a60fc142Srf 	for (i = Q_ENTRY_SIZE; i > 0; i -= 8, ++p) {
905a60fc142Srf 		cmn_err(CE_CONT, "%016lx: %04x %04x %04x %04x\n", (uint64_t)p,
906a60fc142Srf 		    p->s[0], p->s[1], p->s[2], p->s[3]);
907a60fc142Srf 	}
908a60fc142Srf 	mutex_exit(&errh_print_lock);
909a60fc142Srf }
910