17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5a812d870Sayznaga * Common Development and Distribution License (the "License").
6a812d870Sayznaga * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
22*9d468d1aSAnthony Yznaga * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
237c478bd9Sstevel@tonic-gate * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate */
257c478bd9Sstevel@tonic-gate
267c478bd9Sstevel@tonic-gate #include <sys/types.h>
277c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>
282ae0af4bSep #include <sys/sysmacros.h>
297c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
307c478bd9Sstevel@tonic-gate #include <sys/async.h>
317c478bd9Sstevel@tonic-gate #include <sys/ontrap.h>
327c478bd9Sstevel@tonic-gate #include <sys/ddifm.h>
337c478bd9Sstevel@tonic-gate #include <sys/hypervisor_api.h>
347c478bd9Sstevel@tonic-gate #include <sys/errorq.h>
357c478bd9Sstevel@tonic-gate #include <sys/promif.h>
367c478bd9Sstevel@tonic-gate #include <sys/prom_plat.h>
377c478bd9Sstevel@tonic-gate #include <sys/x_call.h>
387c478bd9Sstevel@tonic-gate #include <sys/error.h>
397c478bd9Sstevel@tonic-gate #include <sys/fm/util.h>
403cac8019Srf #include <sys/ivintr.h>
4144961713Sgirish #include <sys/archsystm.h>
427c478bd9Sstevel@tonic-gate
437c478bd9Sstevel@tonic-gate #define MAX_CE_FLTS 10
447c478bd9Sstevel@tonic-gate #define MAX_ASYNC_FLTS 6
457c478bd9Sstevel@tonic-gate
467c478bd9Sstevel@tonic-gate errorq_t *ue_queue; /* queue of uncorrectable errors */
477c478bd9Sstevel@tonic-gate errorq_t *ce_queue; /* queue of correctable errors */
48b7b0558aSAnthony Yznaga errorq_t *errh_queue; /* queue of sun4v error reports */
497c478bd9Sstevel@tonic-gate
507c478bd9Sstevel@tonic-gate /*
517c478bd9Sstevel@tonic-gate * Being used by memory test driver.
527c478bd9Sstevel@tonic-gate * ce_verbose_memory - covers CEs in DIMMs
537c478bd9Sstevel@tonic-gate * ce_verbose_other - covers "others" (ecache, IO, etc.)
547c478bd9Sstevel@tonic-gate *
557c478bd9Sstevel@tonic-gate * If the value is 0, nothing is logged.
567c478bd9Sstevel@tonic-gate * If the value is 1, the error is logged to the log file, but not console.
577c478bd9Sstevel@tonic-gate * If the value is 2, the error is logged to the log file and console.
587c478bd9Sstevel@tonic-gate */
597c478bd9Sstevel@tonic-gate int ce_verbose_memory = 1;
607c478bd9Sstevel@tonic-gate int ce_verbose_other = 1;
617c478bd9Sstevel@tonic-gate
627c478bd9Sstevel@tonic-gate int ce_show_data = 0;
637c478bd9Sstevel@tonic-gate int ce_debug = 0;
647c478bd9Sstevel@tonic-gate int ue_debug = 0;
657c478bd9Sstevel@tonic-gate int reset_debug = 0;
667c478bd9Sstevel@tonic-gate
677c478bd9Sstevel@tonic-gate /*
687c478bd9Sstevel@tonic-gate * Tunables for controlling the handling of asynchronous faults (AFTs). Setting
697c478bd9Sstevel@tonic-gate * these to non-default values on a non-DEBUG kernel is NOT supported.
707c478bd9Sstevel@tonic-gate */
717c478bd9Sstevel@tonic-gate int aft_verbose = 0; /* log AFT messages > 1 to log only */
727c478bd9Sstevel@tonic-gate int aft_panic = 0; /* panic (not reboot) on fatal usermode AFLT */
737c478bd9Sstevel@tonic-gate int aft_testfatal = 0; /* force all AFTs to panic immediately */
747c478bd9Sstevel@tonic-gate
753cac8019Srf /*
76a60fc142Srf * Used for vbsc hostshutdown (power-off button)
773cac8019Srf */
783cac8019Srf int err_shutdown_triggered = 0; /* only once */
79b0fc0e77Sgovinda uint64_t err_shutdown_inum = 0; /* used to pull the trigger */
803cac8019Srf
81a60fc142Srf /*
82a60fc142Srf * Used to print NRE/RE via system variable or kmdb
83a60fc142Srf */
84a60fc142Srf int printerrh = 0; /* see /etc/system */
85a60fc142Srf static void errh_er_print(errh_er_t *, const char *);
86a60fc142Srf kmutex_t errh_print_lock;
87a60fc142Srf
887c478bd9Sstevel@tonic-gate /*
897c478bd9Sstevel@tonic-gate * Defined in bus_func.c but initialised in error_init
907c478bd9Sstevel@tonic-gate */
917c478bd9Sstevel@tonic-gate extern kmutex_t bfd_lock;
927c478bd9Sstevel@tonic-gate
937c478bd9Sstevel@tonic-gate static uint32_t rq_overflow_count = 0; /* counter for rq overflow */
947c478bd9Sstevel@tonic-gate
957c478bd9Sstevel@tonic-gate static void cpu_queue_one_event(errh_async_flt_t *);
967c478bd9Sstevel@tonic-gate static uint32_t count_entries_on_queue(uint64_t, uint64_t, uint32_t);
97db874c57Selowe static void errh_page_retire(errh_async_flt_t *, uchar_t);
987c478bd9Sstevel@tonic-gate static int errh_error_protected(struct regs *, struct async_flt *, int *);
997c478bd9Sstevel@tonic-gate static void errh_rq_full(struct async_flt *);
1007c478bd9Sstevel@tonic-gate static void ue_drain(void *, struct async_flt *, errorq_elem_t *);
1017c478bd9Sstevel@tonic-gate static void ce_drain(void *, struct async_flt *, errorq_elem_t *);
102b7b0558aSAnthony Yznaga static void errh_drain(void *, errh_er_t *, errorq_elem_t *);
10344961713Sgirish static void errh_handle_attr(errh_async_flt_t *);
10444961713Sgirish static void errh_handle_asr(errh_async_flt_t *);
105b7b0558aSAnthony Yznaga static void errh_handle_sp(errh_er_t *);
1064df55fdeSJanie Lu static void sp_ereport_post(uint8_t);
1077c478bd9Sstevel@tonic-gate
1087c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1097c478bd9Sstevel@tonic-gate void
process_resumable_error(struct regs * rp,uint32_t head_offset,uint32_t tail_offset)1107c478bd9Sstevel@tonic-gate process_resumable_error(struct regs *rp, uint32_t head_offset,
1117c478bd9Sstevel@tonic-gate uint32_t tail_offset)
1127c478bd9Sstevel@tonic-gate {
1137c478bd9Sstevel@tonic-gate struct machcpu *mcpup;
1147c478bd9Sstevel@tonic-gate struct async_flt *aflt;
1157c478bd9Sstevel@tonic-gate errh_async_flt_t errh_flt;
1167c478bd9Sstevel@tonic-gate errh_er_t *head_va;
1177c478bd9Sstevel@tonic-gate
1187c478bd9Sstevel@tonic-gate mcpup = &(CPU->cpu_m);
1197c478bd9Sstevel@tonic-gate
1207c478bd9Sstevel@tonic-gate while (head_offset != tail_offset) {
1217c478bd9Sstevel@tonic-gate /* kernel buffer starts right after the resumable queue */
1227c478bd9Sstevel@tonic-gate head_va = (errh_er_t *)(mcpup->cpu_rq_va + head_offset +
1237c478bd9Sstevel@tonic-gate CPU_RQ_SIZE);
1247c478bd9Sstevel@tonic-gate /* Copy the error report to local buffer */
1257c478bd9Sstevel@tonic-gate bzero(&errh_flt, sizeof (errh_async_flt_t));
1267c478bd9Sstevel@tonic-gate bcopy((char *)head_va, &(errh_flt.errh_er),
1277c478bd9Sstevel@tonic-gate sizeof (errh_er_t));
1287c478bd9Sstevel@tonic-gate
129a60fc142Srf mcpup->cpu_rq_lastre = head_va;
130a60fc142Srf if (printerrh)
131a60fc142Srf errh_er_print(&errh_flt.errh_er, "RQ");
132a60fc142Srf
1337c478bd9Sstevel@tonic-gate /* Increment the queue head */
1347c478bd9Sstevel@tonic-gate head_offset += Q_ENTRY_SIZE;
1357c478bd9Sstevel@tonic-gate /* Wrap around */
1367c478bd9Sstevel@tonic-gate head_offset &= (CPU_RQ_SIZE - 1);
1377c478bd9Sstevel@tonic-gate
1387c478bd9Sstevel@tonic-gate /* set error handle to zero so it can hold new error report */
1397c478bd9Sstevel@tonic-gate head_va->ehdl = 0;
1407c478bd9Sstevel@tonic-gate
1417c478bd9Sstevel@tonic-gate switch (errh_flt.errh_er.desc) {
1427c478bd9Sstevel@tonic-gate case ERRH_DESC_UCOR_RE:
14344961713Sgirish /*
14444961713Sgirish * Check error attribute, handle individual error
14544961713Sgirish * if it is needed.
14644961713Sgirish */
14744961713Sgirish errh_handle_attr(&errh_flt);
1487c478bd9Sstevel@tonic-gate break;
1497c478bd9Sstevel@tonic-gate
1503cac8019Srf case ERRH_DESC_WARN_RE:
1513cac8019Srf /*
1523cac8019Srf * Power-off requested, but handle it one time only.
1533cac8019Srf */
1543cac8019Srf if (!err_shutdown_triggered) {
1553cac8019Srf setsoftint(err_shutdown_inum);
1563cac8019Srf ++err_shutdown_triggered;
1573cac8019Srf }
1583cac8019Srf continue;
1593cac8019Srf
1604df55fdeSJanie Lu case ERRH_DESC_SP:
1614df55fdeSJanie Lu /*
1624df55fdeSJanie Lu * The state of the SP has changed.
1634df55fdeSJanie Lu */
164b7b0558aSAnthony Yznaga errorq_dispatch(errh_queue, &errh_flt.errh_er,
165b7b0558aSAnthony Yznaga sizeof (errh_er_t), ERRORQ_ASYNC);
1664df55fdeSJanie Lu continue;
1674df55fdeSJanie Lu
1687c478bd9Sstevel@tonic-gate default:
1697c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "Error Descriptor 0x%llx "
1707c478bd9Sstevel@tonic-gate " invalid in resumable error handler",
1717c478bd9Sstevel@tonic-gate (long long) errh_flt.errh_er.desc);
1727c478bd9Sstevel@tonic-gate continue;
1737c478bd9Sstevel@tonic-gate }
1747c478bd9Sstevel@tonic-gate
1757c478bd9Sstevel@tonic-gate aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt);
1767c478bd9Sstevel@tonic-gate aflt->flt_id = gethrtime();
1777c478bd9Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid();
1787c478bd9Sstevel@tonic-gate aflt->flt_class = CPU_FAULT;
1797c478bd9Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE;
1807c478bd9Sstevel@tonic-gate aflt->flt_priv = (((errh_flt.errh_er.attr & ERRH_MODE_MASK)
1817c478bd9Sstevel@tonic-gate >> ERRH_MODE_SHIFT) == ERRH_MODE_PRIV);
1827c478bd9Sstevel@tonic-gate
1837c478bd9Sstevel@tonic-gate if (errh_flt.errh_er.attr & ERRH_ATTR_CPU)
1847c478bd9Sstevel@tonic-gate /* If it is an error on other cpu */
1857c478bd9Sstevel@tonic-gate aflt->flt_panic = 1;
1867c478bd9Sstevel@tonic-gate else
1877c478bd9Sstevel@tonic-gate aflt->flt_panic = 0;
1887c478bd9Sstevel@tonic-gate
1897c478bd9Sstevel@tonic-gate /*
1907c478bd9Sstevel@tonic-gate * Handle resumable queue full case.
1917c478bd9Sstevel@tonic-gate */
1927c478bd9Sstevel@tonic-gate if (errh_flt.errh_er.attr & ERRH_ATTR_RQF) {
1937c478bd9Sstevel@tonic-gate (void) errh_rq_full(aflt);
1947c478bd9Sstevel@tonic-gate }
1957c478bd9Sstevel@tonic-gate
1967c478bd9Sstevel@tonic-gate /*
1977c478bd9Sstevel@tonic-gate * Queue the error on ce or ue queue depend on flt_panic.
1987c478bd9Sstevel@tonic-gate * Even if flt_panic is set, the code still keep processing
1997c478bd9Sstevel@tonic-gate * the rest element on rq until the panic starts.
2007c478bd9Sstevel@tonic-gate */
2017c478bd9Sstevel@tonic-gate (void) cpu_queue_one_event(&errh_flt);
2027c478bd9Sstevel@tonic-gate
2037c478bd9Sstevel@tonic-gate /*
2047c478bd9Sstevel@tonic-gate * Panic here if aflt->flt_panic has been set.
2057c478bd9Sstevel@tonic-gate * Enqueued errors will be logged as part of the panic flow.
2067c478bd9Sstevel@tonic-gate */
2077c478bd9Sstevel@tonic-gate if (aflt->flt_panic) {
2087c478bd9Sstevel@tonic-gate fm_panic("Unrecoverable error on another CPU");
2097c478bd9Sstevel@tonic-gate }
2107c478bd9Sstevel@tonic-gate }
2117c478bd9Sstevel@tonic-gate }
2127c478bd9Sstevel@tonic-gate
2137c478bd9Sstevel@tonic-gate void
process_nonresumable_error(struct regs * rp,uint64_t flags,uint32_t head_offset,uint32_t tail_offset)214ad559ebfSwh process_nonresumable_error(struct regs *rp, uint64_t flags,
2157c478bd9Sstevel@tonic-gate uint32_t head_offset, uint32_t tail_offset)
2167c478bd9Sstevel@tonic-gate {
2177c478bd9Sstevel@tonic-gate struct machcpu *mcpup;
2187c478bd9Sstevel@tonic-gate struct async_flt *aflt;
2197c478bd9Sstevel@tonic-gate errh_async_flt_t errh_flt;
2207c478bd9Sstevel@tonic-gate errh_er_t *head_va;
2217c478bd9Sstevel@tonic-gate int trampolined = 0;
2227c478bd9Sstevel@tonic-gate int expected = DDI_FM_ERR_UNEXPECTED;
2237c478bd9Sstevel@tonic-gate uint64_t exec_mode;
224ad559ebfSwh uint8_t u_spill_fill;
2257c478bd9Sstevel@tonic-gate
2267c478bd9Sstevel@tonic-gate mcpup = &(CPU->cpu_m);
2277c478bd9Sstevel@tonic-gate
2287c478bd9Sstevel@tonic-gate while (head_offset != tail_offset) {
2297c478bd9Sstevel@tonic-gate /* kernel buffer starts right after the nonresumable queue */
2307c478bd9Sstevel@tonic-gate head_va = (errh_er_t *)(mcpup->cpu_nrq_va + head_offset +
2317c478bd9Sstevel@tonic-gate CPU_NRQ_SIZE);
2327c478bd9Sstevel@tonic-gate
2337c478bd9Sstevel@tonic-gate /* Copy the error report to local buffer */
2347c478bd9Sstevel@tonic-gate bzero(&errh_flt, sizeof (errh_async_flt_t));
2357c478bd9Sstevel@tonic-gate
2367c478bd9Sstevel@tonic-gate bcopy((char *)head_va, &(errh_flt.errh_er),
2377c478bd9Sstevel@tonic-gate sizeof (errh_er_t));
2387c478bd9Sstevel@tonic-gate
239a60fc142Srf mcpup->cpu_nrq_lastnre = head_va;
240a60fc142Srf if (printerrh)
241a60fc142Srf errh_er_print(&errh_flt.errh_er, "NRQ");
242a60fc142Srf
2437c478bd9Sstevel@tonic-gate /* Increment the queue head */
2447c478bd9Sstevel@tonic-gate head_offset += Q_ENTRY_SIZE;
2457c478bd9Sstevel@tonic-gate /* Wrap around */
2467c478bd9Sstevel@tonic-gate head_offset &= (CPU_NRQ_SIZE - 1);
2477c478bd9Sstevel@tonic-gate
2487c478bd9Sstevel@tonic-gate /* set error handle to zero so it can hold new error report */
2497c478bd9Sstevel@tonic-gate head_va->ehdl = 0;
2507c478bd9Sstevel@tonic-gate
2517c478bd9Sstevel@tonic-gate aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt);
2527c478bd9Sstevel@tonic-gate
2537c478bd9Sstevel@tonic-gate trampolined = 0;
2547c478bd9Sstevel@tonic-gate
2557c478bd9Sstevel@tonic-gate if (errh_flt.errh_er.attr & ERRH_ATTR_PIO)
2567c478bd9Sstevel@tonic-gate aflt->flt_class = BUS_FAULT;
2577c478bd9Sstevel@tonic-gate else
2587c478bd9Sstevel@tonic-gate aflt->flt_class = CPU_FAULT;
2597c478bd9Sstevel@tonic-gate
2607c478bd9Sstevel@tonic-gate aflt->flt_id = gethrtime();
2617c478bd9Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid();
2627c478bd9Sstevel@tonic-gate aflt->flt_pc = (caddr_t)rp->r_pc;
2637c478bd9Sstevel@tonic-gate exec_mode = (errh_flt.errh_er.attr & ERRH_MODE_MASK)
2647c478bd9Sstevel@tonic-gate >> ERRH_MODE_SHIFT;
2657c478bd9Sstevel@tonic-gate aflt->flt_priv = (exec_mode == ERRH_MODE_PRIV ||
2667c478bd9Sstevel@tonic-gate exec_mode == ERRH_MODE_UNKNOWN);
2677c478bd9Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE;
268ad559ebfSwh aflt->flt_tl = (uchar_t)(flags & ERRH_TL_MASK);
2697c478bd9Sstevel@tonic-gate aflt->flt_panic = ((aflt->flt_tl != 0) ||
2707c478bd9Sstevel@tonic-gate (aft_testfatal != 0));
2717c478bd9Sstevel@tonic-gate
272ad559ebfSwh /*
273ad559ebfSwh * For the first error packet on the queue, check if it
274ad559ebfSwh * happened in user fill/spill trap.
275ad559ebfSwh */
276ad559ebfSwh if (flags & ERRH_U_SPILL_FILL) {
277ad559ebfSwh u_spill_fill = 1;
278ad559ebfSwh /* clear the user fill/spill flag in flags */
279ad559ebfSwh flags = (uint64_t)aflt->flt_tl;
280ad559ebfSwh } else
281ad559ebfSwh u_spill_fill = 0;
282ad559ebfSwh
2837c478bd9Sstevel@tonic-gate switch (errh_flt.errh_er.desc) {
2847c478bd9Sstevel@tonic-gate case ERRH_DESC_PR_NRE:
285ad559ebfSwh if (u_spill_fill) {
286ad559ebfSwh aflt->flt_panic = 0;
287ad559ebfSwh break;
288ad559ebfSwh }
2897c478bd9Sstevel@tonic-gate /*
2909d0d62adSJason Beloro * Fall through, precise fault also need to check
2919d0d62adSJason Beloro * to see if it was protected.
2927c478bd9Sstevel@tonic-gate */
293ad559ebfSwh /*FALLTHRU*/
2947c478bd9Sstevel@tonic-gate
2957c478bd9Sstevel@tonic-gate case ERRH_DESC_DEF_NRE:
2967c478bd9Sstevel@tonic-gate /*
2977c478bd9Sstevel@tonic-gate * If the trap occurred in privileged mode at TL=0,
2987c478bd9Sstevel@tonic-gate * we need to check to see if we were executing
2997c478bd9Sstevel@tonic-gate * in kernel under on_trap() or t_lofault
3005a5604afSrf * protection. If so, and if it was a PIO or MEM
3015a5604afSrf * error, then modify the saved registers so that
3025a5604afSrf * we return from the trap to the appropriate
3035a5604afSrf * trampoline routine.
3047c478bd9Sstevel@tonic-gate */
3055a5604afSrf if (aflt->flt_priv == 1 && aflt->flt_tl == 0 &&
3065a5604afSrf ((errh_flt.errh_er.attr & ERRH_ATTR_PIO) ||
3075a5604afSrf (errh_flt.errh_er.attr & ERRH_ATTR_MEM))) {
3087c478bd9Sstevel@tonic-gate trampolined =
3097c478bd9Sstevel@tonic-gate errh_error_protected(rp, aflt, &expected);
3105a5604afSrf }
3117c478bd9Sstevel@tonic-gate
3127c478bd9Sstevel@tonic-gate if (!aflt->flt_priv || aflt->flt_prot ==
3137c478bd9Sstevel@tonic-gate AFLT_PROT_COPY) {
3147c478bd9Sstevel@tonic-gate aflt->flt_panic |= aft_panic;
3157c478bd9Sstevel@tonic-gate } else if (!trampolined &&
316c4b03495Srf (aflt->flt_class != BUS_FAULT)) {
3177c478bd9Sstevel@tonic-gate aflt->flt_panic = 1;
3187c478bd9Sstevel@tonic-gate }
3197c478bd9Sstevel@tonic-gate
32044961713Sgirish /*
32144961713Sgirish * Check error attribute, handle individual error
32244961713Sgirish * if it is needed.
32344961713Sgirish */
32444961713Sgirish errh_handle_attr(&errh_flt);
32544961713Sgirish
3267c478bd9Sstevel@tonic-gate /*
3277c478bd9Sstevel@tonic-gate * If PIO error, we need to query the bus nexus
3287c478bd9Sstevel@tonic-gate * for fatal errors.
3297c478bd9Sstevel@tonic-gate */
3307c478bd9Sstevel@tonic-gate if (aflt->flt_class == BUS_FAULT) {
3319d0d62adSJason Beloro aflt->flt_addr = errh_flt.errh_er.ra;
3327c478bd9Sstevel@tonic-gate errh_cpu_run_bus_error_handlers(aflt,
3337c478bd9Sstevel@tonic-gate expected);
3347c478bd9Sstevel@tonic-gate }
3357c478bd9Sstevel@tonic-gate
3367c478bd9Sstevel@tonic-gate break;
3377c478bd9Sstevel@tonic-gate
338b72ed0ffSiskreen case ERRH_DESC_USER_DCORE:
339b72ed0ffSiskreen /*
340b72ed0ffSiskreen * User generated panic. Call panic directly
341b72ed0ffSiskreen * since there are no FMA e-reports to
342b72ed0ffSiskreen * display.
343b72ed0ffSiskreen */
344b72ed0ffSiskreen
345b72ed0ffSiskreen panic("Panic - Generated at user request");
346b72ed0ffSiskreen
347b72ed0ffSiskreen break;
348b72ed0ffSiskreen
3497c478bd9Sstevel@tonic-gate default:
3504cacbdcfSwh cmn_err(CE_WARN, "Panic - Error Descriptor 0x%llx "
3514cacbdcfSwh " invalid in non-resumable error handler",
3527c478bd9Sstevel@tonic-gate (long long) errh_flt.errh_er.desc);
3534cacbdcfSwh aflt->flt_panic = 1;
3544cacbdcfSwh break;
3557c478bd9Sstevel@tonic-gate }
3567c478bd9Sstevel@tonic-gate
3577c478bd9Sstevel@tonic-gate /*
3587c478bd9Sstevel@tonic-gate * Queue the error report for further processing. If
3597c478bd9Sstevel@tonic-gate * flt_panic is set, code still process other errors
3607c478bd9Sstevel@tonic-gate * in the queue until the panic routine stops the
3617c478bd9Sstevel@tonic-gate * kernel.
3627c478bd9Sstevel@tonic-gate */
3637c478bd9Sstevel@tonic-gate (void) cpu_queue_one_event(&errh_flt);
3647c478bd9Sstevel@tonic-gate
3657c478bd9Sstevel@tonic-gate /*
3667c478bd9Sstevel@tonic-gate * Panic here if aflt->flt_panic has been set.
3677c478bd9Sstevel@tonic-gate * Enqueued errors will be logged as part of the panic flow.
3687c478bd9Sstevel@tonic-gate */
3697c478bd9Sstevel@tonic-gate if (aflt->flt_panic) {
3707c478bd9Sstevel@tonic-gate fm_panic("Unrecoverable hardware error");
3717c478bd9Sstevel@tonic-gate }
3727c478bd9Sstevel@tonic-gate
3737c478bd9Sstevel@tonic-gate /*
374db874c57Selowe * Call page_retire() to handle memory errors.
3757c478bd9Sstevel@tonic-gate */
3767c478bd9Sstevel@tonic-gate if (errh_flt.errh_er.attr & ERRH_ATTR_MEM)
377db874c57Selowe errh_page_retire(&errh_flt, PR_UE);
3787c478bd9Sstevel@tonic-gate
3797c478bd9Sstevel@tonic-gate /*
3809d0d62adSJason Beloro * If we queued an error and the it was in user mode, or
3819d0d62adSJason Beloro * protected by t_lofault, or user_spill_fill is set, we
3829d0d62adSJason Beloro * set AST flag so the queue will be drained before
3839d0d62adSJason Beloro * returning to user mode.
3847c478bd9Sstevel@tonic-gate */
3859d0d62adSJason Beloro if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY ||
3869d0d62adSJason Beloro u_spill_fill) {
3877c478bd9Sstevel@tonic-gate int pcb_flag = 0;
3887c478bd9Sstevel@tonic-gate
3897c478bd9Sstevel@tonic-gate if (aflt->flt_class == CPU_FAULT)
3907c478bd9Sstevel@tonic-gate pcb_flag |= ASYNC_HWERR;
3917c478bd9Sstevel@tonic-gate else if (aflt->flt_class == BUS_FAULT)
3927c478bd9Sstevel@tonic-gate pcb_flag |= ASYNC_BERR;
3937c478bd9Sstevel@tonic-gate
3947c478bd9Sstevel@tonic-gate ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
3957c478bd9Sstevel@tonic-gate aston(curthread);
3967c478bd9Sstevel@tonic-gate }
3977c478bd9Sstevel@tonic-gate }
3987c478bd9Sstevel@tonic-gate }
3997c478bd9Sstevel@tonic-gate
4007c478bd9Sstevel@tonic-gate /*
4017c478bd9Sstevel@tonic-gate * For PIO errors, this routine calls nexus driver's error
4027c478bd9Sstevel@tonic-gate * callback routines. If the callback routine returns fatal, and
4037c478bd9Sstevel@tonic-gate * we are in kernel or unknow mode without any error protection,
4047c478bd9Sstevel@tonic-gate * we need to turn on the panic flag.
4057c478bd9Sstevel@tonic-gate */
4067c478bd9Sstevel@tonic-gate void
errh_cpu_run_bus_error_handlers(struct async_flt * aflt,int expected)4077c478bd9Sstevel@tonic-gate errh_cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
4087c478bd9Sstevel@tonic-gate {
4097c478bd9Sstevel@tonic-gate int status;
4107c478bd9Sstevel@tonic-gate ddi_fm_error_t de;
4117c478bd9Sstevel@tonic-gate
4127c478bd9Sstevel@tonic-gate bzero(&de, sizeof (ddi_fm_error_t));
4137c478bd9Sstevel@tonic-gate
4147c478bd9Sstevel@tonic-gate de.fme_version = DDI_FME_VERSION;
4157c478bd9Sstevel@tonic-gate de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1);
4167c478bd9Sstevel@tonic-gate de.fme_flag = expected;
4177c478bd9Sstevel@tonic-gate de.fme_bus_specific = (void *)aflt->flt_addr;
4187c478bd9Sstevel@tonic-gate status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
4197c478bd9Sstevel@tonic-gate
4207c478bd9Sstevel@tonic-gate /*
4217c478bd9Sstevel@tonic-gate * If error is protected, it will jump to proper routine
4227c478bd9Sstevel@tonic-gate * to handle the handle; if it is in user level, we just
4237c478bd9Sstevel@tonic-gate * kill the user process; if the driver thinks the error is
4247c478bd9Sstevel@tonic-gate * not fatal, we can drive on. If none of above are true,
4257c478bd9Sstevel@tonic-gate * we panic
4267c478bd9Sstevel@tonic-gate */
4277c478bd9Sstevel@tonic-gate if ((aflt->flt_prot == AFLT_PROT_NONE) && (aflt->flt_priv == 1) &&
4287c478bd9Sstevel@tonic-gate (status == DDI_FM_FATAL))
4297c478bd9Sstevel@tonic-gate aflt->flt_panic = 1;
4307c478bd9Sstevel@tonic-gate }
4317c478bd9Sstevel@tonic-gate
4327c478bd9Sstevel@tonic-gate /*
4337c478bd9Sstevel@tonic-gate * This routine checks to see if we are under any error protection when
4347c478bd9Sstevel@tonic-gate * the error happens. If we are under error protection, we unwind to
4357c478bd9Sstevel@tonic-gate * the protection and indicate fault.
4367c478bd9Sstevel@tonic-gate */
4377c478bd9Sstevel@tonic-gate static int
errh_error_protected(struct regs * rp,struct async_flt * aflt,int * expected)4387c478bd9Sstevel@tonic-gate errh_error_protected(struct regs *rp, struct async_flt *aflt, int *expected)
4397c478bd9Sstevel@tonic-gate {
4407c478bd9Sstevel@tonic-gate int trampolined = 0;
4417c478bd9Sstevel@tonic-gate ddi_acc_hdl_t *hp;
4427c478bd9Sstevel@tonic-gate
4437c478bd9Sstevel@tonic-gate if (curthread->t_ontrap != NULL) {
4447c478bd9Sstevel@tonic-gate on_trap_data_t *otp = curthread->t_ontrap;
4457c478bd9Sstevel@tonic-gate
4467c478bd9Sstevel@tonic-gate if (otp->ot_prot & OT_DATA_EC) {
4477c478bd9Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_EC;
4487c478bd9Sstevel@tonic-gate otp->ot_trap |= OT_DATA_EC;
4497c478bd9Sstevel@tonic-gate rp->r_pc = otp->ot_trampoline;
4507c478bd9Sstevel@tonic-gate rp->r_npc = rp->r_pc +4;
4517c478bd9Sstevel@tonic-gate trampolined = 1;
4527c478bd9Sstevel@tonic-gate }
4537c478bd9Sstevel@tonic-gate
4547c478bd9Sstevel@tonic-gate if (otp->ot_prot & OT_DATA_ACCESS) {
4557c478bd9Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_ACCESS;
4567c478bd9Sstevel@tonic-gate otp->ot_trap |= OT_DATA_ACCESS;
4577c478bd9Sstevel@tonic-gate rp->r_pc = otp->ot_trampoline;
4587c478bd9Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4;
4597c478bd9Sstevel@tonic-gate trampolined = 1;
4607c478bd9Sstevel@tonic-gate /*
4617c478bd9Sstevel@tonic-gate * for peek and caut_gets
4627c478bd9Sstevel@tonic-gate * errors are expected
4637c478bd9Sstevel@tonic-gate */
4647c478bd9Sstevel@tonic-gate hp = (ddi_acc_hdl_t *)otp->ot_handle;
4657c478bd9Sstevel@tonic-gate if (!hp)
4667c478bd9Sstevel@tonic-gate *expected = DDI_FM_ERR_PEEK;
4677c478bd9Sstevel@tonic-gate else if (hp->ah_acc.devacc_attr_access ==
4687c478bd9Sstevel@tonic-gate DDI_CAUTIOUS_ACC)
4697c478bd9Sstevel@tonic-gate *expected = DDI_FM_ERR_EXPECTED;
4707c478bd9Sstevel@tonic-gate }
4717c478bd9Sstevel@tonic-gate } else if (curthread->t_lofault) {
4727c478bd9Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_COPY;
4737c478bd9Sstevel@tonic-gate rp->r_g1 = EFAULT;
4747c478bd9Sstevel@tonic-gate rp->r_pc = curthread->t_lofault;
4757c478bd9Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4;
4767c478bd9Sstevel@tonic-gate trampolined = 1;
4777c478bd9Sstevel@tonic-gate }
4787c478bd9Sstevel@tonic-gate
4797c478bd9Sstevel@tonic-gate return (trampolined);
4807c478bd9Sstevel@tonic-gate }
4817c478bd9Sstevel@tonic-gate
4827c478bd9Sstevel@tonic-gate /*
4837c478bd9Sstevel@tonic-gate * Queue one event.
4847c478bd9Sstevel@tonic-gate */
4857c478bd9Sstevel@tonic-gate static void
cpu_queue_one_event(errh_async_flt_t * errh_fltp)4867c478bd9Sstevel@tonic-gate cpu_queue_one_event(errh_async_flt_t *errh_fltp)
4877c478bd9Sstevel@tonic-gate {
4887c478bd9Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)errh_fltp;
4897c478bd9Sstevel@tonic-gate errorq_t *eqp;
4907c478bd9Sstevel@tonic-gate
4917c478bd9Sstevel@tonic-gate if (aflt->flt_panic)
4927c478bd9Sstevel@tonic-gate eqp = ue_queue;
4937c478bd9Sstevel@tonic-gate else
4947c478bd9Sstevel@tonic-gate eqp = ce_queue;
4957c478bd9Sstevel@tonic-gate
4967c478bd9Sstevel@tonic-gate errorq_dispatch(eqp, errh_fltp, sizeof (errh_async_flt_t),
4977c478bd9Sstevel@tonic-gate aflt->flt_panic);
4987c478bd9Sstevel@tonic-gate }
4997c478bd9Sstevel@tonic-gate
5007c478bd9Sstevel@tonic-gate /*
5017c478bd9Sstevel@tonic-gate * The cpu_async_log_err() function is called by the ce/ue_drain() function to
5027c478bd9Sstevel@tonic-gate * handle logging for CPU events that are dequeued. As such, it can be invoked
5037c478bd9Sstevel@tonic-gate * from softint context, from AST processing in the trap() flow, or from the
5047c478bd9Sstevel@tonic-gate * panic flow. We decode the CPU-specific data, and log appropriate messages.
5057c478bd9Sstevel@tonic-gate */
5067c478bd9Sstevel@tonic-gate void
cpu_async_log_err(void * flt)5077c478bd9Sstevel@tonic-gate cpu_async_log_err(void *flt)
5087c478bd9Sstevel@tonic-gate {
5097c478bd9Sstevel@tonic-gate errh_async_flt_t *errh_fltp = (errh_async_flt_t *)flt;
5107c478bd9Sstevel@tonic-gate errh_er_t *errh_erp = (errh_er_t *)&errh_fltp->errh_er;
5117c478bd9Sstevel@tonic-gate
5127c478bd9Sstevel@tonic-gate switch (errh_erp->desc) {
5137c478bd9Sstevel@tonic-gate case ERRH_DESC_UCOR_RE:
5147c478bd9Sstevel@tonic-gate if (errh_erp->attr & ERRH_ATTR_MEM) {
5157c478bd9Sstevel@tonic-gate /*
516db874c57Selowe * Turn on the PR_UE flag. The page will be
5177c478bd9Sstevel@tonic-gate * scrubbed when it is freed.
5187c478bd9Sstevel@tonic-gate */
519db874c57Selowe errh_page_retire(errh_fltp, PR_UE);
5207c478bd9Sstevel@tonic-gate }
5217c478bd9Sstevel@tonic-gate
5227c478bd9Sstevel@tonic-gate break;
5237c478bd9Sstevel@tonic-gate
5247c478bd9Sstevel@tonic-gate case ERRH_DESC_PR_NRE:
5257c478bd9Sstevel@tonic-gate case ERRH_DESC_DEF_NRE:
5267c478bd9Sstevel@tonic-gate if (errh_erp->attr & ERRH_ATTR_MEM) {
5277c478bd9Sstevel@tonic-gate /*
5287c478bd9Sstevel@tonic-gate * For non-resumable memory error, retire
5297c478bd9Sstevel@tonic-gate * the page here.
5307c478bd9Sstevel@tonic-gate */
531db874c57Selowe errh_page_retire(errh_fltp, PR_UE);
5323461bce3Swh
5333461bce3Swh /*
5343461bce3Swh * If we are going to panic, scrub the page first
5353461bce3Swh */
5363461bce3Swh if (errh_fltp->cmn_asyncflt.flt_panic)
5379d0d62adSJason Beloro mem_scrub(errh_fltp->errh_er.ra,
5383461bce3Swh errh_fltp->errh_er.sz);
5397c478bd9Sstevel@tonic-gate }
5407c478bd9Sstevel@tonic-gate break;
5417c478bd9Sstevel@tonic-gate
5427c478bd9Sstevel@tonic-gate default:
5437c478bd9Sstevel@tonic-gate break;
5447c478bd9Sstevel@tonic-gate }
5457c478bd9Sstevel@tonic-gate }
5467c478bd9Sstevel@tonic-gate
5477c478bd9Sstevel@tonic-gate /*
5487c478bd9Sstevel@tonic-gate * Called from ce_drain().
5497c478bd9Sstevel@tonic-gate */
5507c478bd9Sstevel@tonic-gate void
cpu_ce_log_err(struct async_flt * aflt)5517c478bd9Sstevel@tonic-gate cpu_ce_log_err(struct async_flt *aflt)
5527c478bd9Sstevel@tonic-gate {
5537c478bd9Sstevel@tonic-gate switch (aflt->flt_class) {
5547c478bd9Sstevel@tonic-gate case CPU_FAULT:
5557c478bd9Sstevel@tonic-gate cpu_async_log_err(aflt);
5567c478bd9Sstevel@tonic-gate break;
5577c478bd9Sstevel@tonic-gate
5587c478bd9Sstevel@tonic-gate case BUS_FAULT:
5597c478bd9Sstevel@tonic-gate cpu_async_log_err(aflt);
5607c478bd9Sstevel@tonic-gate break;
5617c478bd9Sstevel@tonic-gate
5627c478bd9Sstevel@tonic-gate default:
5637c478bd9Sstevel@tonic-gate break;
5647c478bd9Sstevel@tonic-gate }
5657c478bd9Sstevel@tonic-gate }
5667c478bd9Sstevel@tonic-gate
5677c478bd9Sstevel@tonic-gate /*
5687c478bd9Sstevel@tonic-gate * Called from ue_drain().
5697c478bd9Sstevel@tonic-gate */
5707c478bd9Sstevel@tonic-gate void
cpu_ue_log_err(struct async_flt * aflt)5717c478bd9Sstevel@tonic-gate cpu_ue_log_err(struct async_flt *aflt)
5727c478bd9Sstevel@tonic-gate {
5737c478bd9Sstevel@tonic-gate switch (aflt->flt_class) {
5747c478bd9Sstevel@tonic-gate case CPU_FAULT:
5757c478bd9Sstevel@tonic-gate cpu_async_log_err(aflt);
5767c478bd9Sstevel@tonic-gate break;
5777c478bd9Sstevel@tonic-gate
5787c478bd9Sstevel@tonic-gate case BUS_FAULT:
5797c478bd9Sstevel@tonic-gate cpu_async_log_err(aflt);
5807c478bd9Sstevel@tonic-gate break;
5817c478bd9Sstevel@tonic-gate
5827c478bd9Sstevel@tonic-gate default:
5837c478bd9Sstevel@tonic-gate break;
5847c478bd9Sstevel@tonic-gate }
5857c478bd9Sstevel@tonic-gate }
5867c478bd9Sstevel@tonic-gate
5877c478bd9Sstevel@tonic-gate /*
5887c478bd9Sstevel@tonic-gate * Turn on flag on the error memory region.
5897c478bd9Sstevel@tonic-gate */
5907c478bd9Sstevel@tonic-gate static void
errh_page_retire(errh_async_flt_t * errh_fltp,uchar_t flag)591db874c57Selowe errh_page_retire(errh_async_flt_t *errh_fltp, uchar_t flag)
5927c478bd9Sstevel@tonic-gate {
5939d0d62adSJason Beloro uint64_t flt_real_addr_start = errh_fltp->errh_er.ra;
5947c478bd9Sstevel@tonic-gate uint64_t flt_real_addr_end = flt_real_addr_start +
5957c478bd9Sstevel@tonic-gate errh_fltp->errh_er.sz - 1;
5967c478bd9Sstevel@tonic-gate int64_t current_addr;
5977c478bd9Sstevel@tonic-gate
5987c478bd9Sstevel@tonic-gate if (errh_fltp->errh_er.sz == 0)
5997c478bd9Sstevel@tonic-gate return;
6007c478bd9Sstevel@tonic-gate
6017c478bd9Sstevel@tonic-gate for (current_addr = flt_real_addr_start;
6027c478bd9Sstevel@tonic-gate current_addr < flt_real_addr_end; current_addr += MMU_PAGESIZE) {
603db874c57Selowe (void) page_retire(current_addr, flag);
6047c478bd9Sstevel@tonic-gate }
6057c478bd9Sstevel@tonic-gate }
6067c478bd9Sstevel@tonic-gate
6077c478bd9Sstevel@tonic-gate void
mem_scrub(uint64_t paddr,uint64_t len)6087c478bd9Sstevel@tonic-gate mem_scrub(uint64_t paddr, uint64_t len)
6097c478bd9Sstevel@tonic-gate {
6107c478bd9Sstevel@tonic-gate uint64_t pa, length, scrubbed_len;
6117c478bd9Sstevel@tonic-gate
6127c478bd9Sstevel@tonic-gate pa = paddr;
6137c478bd9Sstevel@tonic-gate length = len;
6147c478bd9Sstevel@tonic-gate scrubbed_len = 0;
6157c478bd9Sstevel@tonic-gate
6163461bce3Swh while (length > 0) {
6173461bce3Swh if (hv_mem_scrub(pa, length, &scrubbed_len) != H_EOK)
6187c478bd9Sstevel@tonic-gate break;
6197c478bd9Sstevel@tonic-gate
6207c478bd9Sstevel@tonic-gate pa += scrubbed_len;
6217c478bd9Sstevel@tonic-gate length -= scrubbed_len;
6227c478bd9Sstevel@tonic-gate }
6237c478bd9Sstevel@tonic-gate }
6247c478bd9Sstevel@tonic-gate
625ad559ebfSwh /*
6262ae0af4bSep * Call hypervisor to flush the memory region.
6272ae0af4bSep * Both va and len must be MMU_PAGESIZE aligned.
6282ae0af4bSep * Returns the total number of bytes flushed.
629ad559ebfSwh */
6302ae0af4bSep uint64_t
mem_sync(caddr_t orig_va,size_t orig_len)6315ccb2ff8Swh mem_sync(caddr_t orig_va, size_t orig_len)
6327c478bd9Sstevel@tonic-gate {
6337c478bd9Sstevel@tonic-gate uint64_t pa, length, flushed;
6342ae0af4bSep uint64_t chunk_len = MMU_PAGESIZE;
6352ae0af4bSep uint64_t total_flushed = 0;
6365ccb2ff8Swh uint64_t va, len;
6377c478bd9Sstevel@tonic-gate
6385ccb2ff8Swh if (orig_len == 0)
6392ae0af4bSep return (total_flushed);
6407c478bd9Sstevel@tonic-gate
6415ccb2ff8Swh /* align va */
6425ccb2ff8Swh va = P2ALIGN_TYPED(orig_va, MMU_PAGESIZE, uint64_t);
6435ccb2ff8Swh /* round up len to MMU_PAGESIZE aligned */
6445ccb2ff8Swh len = P2ROUNDUP_TYPED(orig_va + orig_len, MMU_PAGESIZE, uint64_t) - va;
6455ccb2ff8Swh
6462ae0af4bSep while (len > 0) {
6472ae0af4bSep pa = va_to_pa((caddr_t)va);
6482ae0af4bSep if (pa == (uint64_t)-1)
6492ae0af4bSep return (total_flushed);
6507c478bd9Sstevel@tonic-gate
6512ae0af4bSep length = chunk_len;
6522ae0af4bSep flushed = 0;
653ad559ebfSwh
6542ae0af4bSep while (length > 0) {
6552ae0af4bSep if (hv_mem_sync(pa, length, &flushed) != H_EOK)
6562ae0af4bSep return (total_flushed);
6577c478bd9Sstevel@tonic-gate
6582ae0af4bSep pa += flushed;
6592ae0af4bSep length -= flushed;
6602ae0af4bSep total_flushed += flushed;
6612ae0af4bSep }
6627c478bd9Sstevel@tonic-gate
6632ae0af4bSep va += chunk_len;
6642ae0af4bSep len -= chunk_len;
6657c478bd9Sstevel@tonic-gate }
6662ae0af4bSep
6672ae0af4bSep return (total_flushed);
6687c478bd9Sstevel@tonic-gate }
6697c478bd9Sstevel@tonic-gate
6707c478bd9Sstevel@tonic-gate /*
6717c478bd9Sstevel@tonic-gate * If resumable queue is full, we need to check if any cpu is in
6727c478bd9Sstevel@tonic-gate * error state. If not, we drive on. If yes, we need to panic. The
6737c478bd9Sstevel@tonic-gate * hypervisor call hv_cpu_state() is being used for checking the
674367c34e9Srf * cpu state. And reset %tick_compr in case tick-compare was lost.
6757c478bd9Sstevel@tonic-gate */
6767c478bd9Sstevel@tonic-gate static void
errh_rq_full(struct async_flt * afltp)6777c478bd9Sstevel@tonic-gate errh_rq_full(struct async_flt *afltp)
6787c478bd9Sstevel@tonic-gate {
6797c478bd9Sstevel@tonic-gate processorid_t who;
6807c478bd9Sstevel@tonic-gate uint64_t cpu_state;
6817c478bd9Sstevel@tonic-gate uint64_t retval;
682367c34e9Srf uint64_t current_tick;
683367c34e9Srf
684367c34e9Srf current_tick = (uint64_t)gettick();
685367c34e9Srf tickcmpr_set(current_tick);
6867c478bd9Sstevel@tonic-gate
6877c478bd9Sstevel@tonic-gate for (who = 0; who < NCPU; who++)
6887c478bd9Sstevel@tonic-gate if (CPU_IN_SET(cpu_ready_set, who)) {
6897c478bd9Sstevel@tonic-gate retval = hv_cpu_state(who, &cpu_state);
6907c478bd9Sstevel@tonic-gate if (retval != H_EOK || cpu_state == CPU_STATE_ERROR) {
6917c478bd9Sstevel@tonic-gate afltp->flt_panic = 1;
6927c478bd9Sstevel@tonic-gate break;
6937c478bd9Sstevel@tonic-gate }
6947c478bd9Sstevel@tonic-gate }
6957c478bd9Sstevel@tonic-gate }
6967c478bd9Sstevel@tonic-gate
6977c478bd9Sstevel@tonic-gate /*
6987c478bd9Sstevel@tonic-gate * Return processor specific async error structure
6997c478bd9Sstevel@tonic-gate * size used.
7007c478bd9Sstevel@tonic-gate */
7017c478bd9Sstevel@tonic-gate int
cpu_aflt_size(void)7027c478bd9Sstevel@tonic-gate cpu_aflt_size(void)
7037c478bd9Sstevel@tonic-gate {
7047c478bd9Sstevel@tonic-gate return (sizeof (errh_async_flt_t));
7057c478bd9Sstevel@tonic-gate }
7067c478bd9Sstevel@tonic-gate
7077c478bd9Sstevel@tonic-gate #define SZ_TO_ETRS_SHIFT 6
7087c478bd9Sstevel@tonic-gate
7097c478bd9Sstevel@tonic-gate /*
7107c478bd9Sstevel@tonic-gate * Message print out when resumable queue is overflown
7117c478bd9Sstevel@tonic-gate */
7127c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7137c478bd9Sstevel@tonic-gate void
rq_overflow(struct regs * rp,uint64_t head_offset,uint64_t tail_offset)7147c478bd9Sstevel@tonic-gate rq_overflow(struct regs *rp, uint64_t head_offset,
7157c478bd9Sstevel@tonic-gate uint64_t tail_offset)
7167c478bd9Sstevel@tonic-gate {
7177c478bd9Sstevel@tonic-gate rq_overflow_count++;
7187c478bd9Sstevel@tonic-gate }
7197c478bd9Sstevel@tonic-gate
7207c478bd9Sstevel@tonic-gate /*
7217c478bd9Sstevel@tonic-gate * Handler to process a fatal error. This routine can be called from a
7227c478bd9Sstevel@tonic-gate * softint, called from trap()'s AST handling, or called from the panic flow.
7237c478bd9Sstevel@tonic-gate */
7247c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7257c478bd9Sstevel@tonic-gate static void
ue_drain(void * ignored,struct async_flt * aflt,errorq_elem_t * eqep)7267c478bd9Sstevel@tonic-gate ue_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep)
7277c478bd9Sstevel@tonic-gate {
7287c478bd9Sstevel@tonic-gate cpu_ue_log_err(aflt);
7297c478bd9Sstevel@tonic-gate }
7307c478bd9Sstevel@tonic-gate
7317c478bd9Sstevel@tonic-gate /*
7327c478bd9Sstevel@tonic-gate * Handler to process a correctable error. This routine can be called from a
7337c478bd9Sstevel@tonic-gate * softint. We just call the CPU module's logging routine.
7347c478bd9Sstevel@tonic-gate */
7357c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7367c478bd9Sstevel@tonic-gate static void
ce_drain(void * ignored,struct async_flt * aflt,errorq_elem_t * eqep)7377c478bd9Sstevel@tonic-gate ce_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep)
7387c478bd9Sstevel@tonic-gate {
7397c478bd9Sstevel@tonic-gate cpu_ce_log_err(aflt);
7407c478bd9Sstevel@tonic-gate }
7417c478bd9Sstevel@tonic-gate
742b7b0558aSAnthony Yznaga /*
743b7b0558aSAnthony Yznaga * Handler to process a sun4v errort report via an errorq_t. This routine
744b7b0558aSAnthony Yznaga * can be called from a softint.
745b7b0558aSAnthony Yznaga *
746b7b0558aSAnthony Yznaga * This is used for sun4v error reports that cannot be processed at high-level
747b7b0558aSAnthony Yznaga * interrupt time. Currently only error reports indicating an SP state change
748b7b0558aSAnthony Yznaga * are handled in this manner.
749b7b0558aSAnthony Yznaga */
750b7b0558aSAnthony Yznaga /*ARGSUSED*/
751b7b0558aSAnthony Yznaga static void
errh_drain(void * ignored,errh_er_t * errh_erp,errorq_elem_t * eqep)752b7b0558aSAnthony Yznaga errh_drain(void *ignored, errh_er_t *errh_erp, errorq_elem_t *eqep)
753b7b0558aSAnthony Yznaga {
754b7b0558aSAnthony Yznaga ASSERT(errh_erp->desc == ERRH_DESC_SP);
755b7b0558aSAnthony Yznaga
756b7b0558aSAnthony Yznaga errh_handle_sp(errh_erp);
757b7b0558aSAnthony Yznaga }
758b7b0558aSAnthony Yznaga
7593cac8019Srf /*
7603cac8019Srf * Handler to process vbsc hostshutdown (power-off button).
7613cac8019Srf */
7623cac8019Srf static int
err_shutdown_softintr()7633cac8019Srf err_shutdown_softintr()
7643cac8019Srf {
7653cac8019Srf cmn_err(CE_WARN, "Power-off requested, system will now shutdown.");
7663cac8019Srf do_shutdown();
7673cac8019Srf
7683cac8019Srf /*
7693cac8019Srf * just in case do_shutdown() fails
7703cac8019Srf */
7713cac8019Srf (void) timeout((void(*)(void *))power_down, NULL, 100 * hz);
7723cac8019Srf return (DDI_INTR_CLAIMED);
7733cac8019Srf }
7743cac8019Srf
7757c478bd9Sstevel@tonic-gate /*
7767c478bd9Sstevel@tonic-gate * Allocate error queue sizes based on max_ncpus. max_ncpus is set just
7777c478bd9Sstevel@tonic-gate * after ncpunode has been determined. ncpus is set in start_other_cpus
7787c478bd9Sstevel@tonic-gate * which is called after error_init() but may change dynamically.
7797c478bd9Sstevel@tonic-gate */
7807c478bd9Sstevel@tonic-gate void
error_init(void)7817c478bd9Sstevel@tonic-gate error_init(void)
7827c478bd9Sstevel@tonic-gate {
7837c478bd9Sstevel@tonic-gate char tmp_name[MAXSYSNAME];
784fa9e4066Sahrens pnode_t node;
7857c478bd9Sstevel@tonic-gate size_t size = cpu_aflt_size();
7867c478bd9Sstevel@tonic-gate
7877c478bd9Sstevel@tonic-gate /*
7887c478bd9Sstevel@tonic-gate * Initialize the correctable and uncorrectable error queues.
7897c478bd9Sstevel@tonic-gate */
7907c478bd9Sstevel@tonic-gate ue_queue = errorq_create("ue_queue", (errorq_func_t)ue_drain, NULL,
7917c478bd9Sstevel@tonic-gate MAX_ASYNC_FLTS * (max_ncpus + 1), size, PIL_2, ERRORQ_VITAL);
7927c478bd9Sstevel@tonic-gate
7937c478bd9Sstevel@tonic-gate ce_queue = errorq_create("ce_queue", (errorq_func_t)ce_drain, NULL,
7947c478bd9Sstevel@tonic-gate MAX_CE_FLTS * (max_ncpus + 1), size, PIL_1, 0);
7957c478bd9Sstevel@tonic-gate
796b7b0558aSAnthony Yznaga errh_queue = errorq_create("errh_queue", (errorq_func_t)errh_drain,
797b7b0558aSAnthony Yznaga NULL, CPU_RQ_ENTRIES, sizeof (errh_er_t), PIL_1, 0);
798b7b0558aSAnthony Yznaga
799b7b0558aSAnthony Yznaga if (ue_queue == NULL || ce_queue == NULL || errh_queue == NULL)
8007c478bd9Sstevel@tonic-gate panic("failed to create required system error queue");
8017c478bd9Sstevel@tonic-gate
8023cac8019Srf /*
8033cac8019Srf * Setup interrupt handler for power-off button.
8043cac8019Srf */
8053cac8019Srf err_shutdown_inum = add_softintr(PIL_9,
806b0fc0e77Sgovinda (softintrfunc)err_shutdown_softintr, NULL, SOFTINT_ST);
8073cac8019Srf
8087c478bd9Sstevel@tonic-gate /*
8097c478bd9Sstevel@tonic-gate * Initialize the busfunc list mutex. This must be a PIL_15 spin lock
8107c478bd9Sstevel@tonic-gate * because we will need to acquire it from cpu_async_error().
8117c478bd9Sstevel@tonic-gate */
8127c478bd9Sstevel@tonic-gate mutex_init(&bfd_lock, NULL, MUTEX_SPIN, (void *)PIL_15);
8137c478bd9Sstevel@tonic-gate
814a60fc142Srf /* Only allow one cpu at a time to dump errh errors. */
815a60fc142Srf mutex_init(&errh_print_lock, NULL, MUTEX_SPIN, (void *)PIL_15);
816a60fc142Srf
8177c478bd9Sstevel@tonic-gate node = prom_rootnode();
8187c478bd9Sstevel@tonic-gate if ((node == OBP_NONODE) || (node == OBP_BADNODE)) {
8197c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "error_init: node 0x%x\n", (uint_t)node);
8207c478bd9Sstevel@tonic-gate return;
8217c478bd9Sstevel@tonic-gate }
8227c478bd9Sstevel@tonic-gate
8237c478bd9Sstevel@tonic-gate if (((size = prom_getproplen(node, "reset-reason")) != -1) &&
8247c478bd9Sstevel@tonic-gate (size <= MAXSYSNAME) &&
8257c478bd9Sstevel@tonic-gate (prom_getprop(node, "reset-reason", tmp_name) != -1)) {
8267c478bd9Sstevel@tonic-gate if (reset_debug) {
8277c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "System booting after %s\n", tmp_name);
8287c478bd9Sstevel@tonic-gate } else if (strncmp(tmp_name, "FATAL", 5) == 0) {
8297c478bd9Sstevel@tonic-gate cmn_err(CE_CONT,
8307c478bd9Sstevel@tonic-gate "System booting after fatal error %s\n", tmp_name);
8317c478bd9Sstevel@tonic-gate }
8327c478bd9Sstevel@tonic-gate }
8337c478bd9Sstevel@tonic-gate }
83418aea0b1Swh
83518aea0b1Swh /*
83618aea0b1Swh * Nonresumable queue is full, panic here
83718aea0b1Swh */
83818aea0b1Swh /*ARGSUSED*/
83918aea0b1Swh void
nrq_overflow(struct regs * rp)84018aea0b1Swh nrq_overflow(struct regs *rp)
84118aea0b1Swh {
84218aea0b1Swh fm_panic("Nonresumable queue full");
84318aea0b1Swh }
84444961713Sgirish
84544961713Sgirish /*
84644961713Sgirish * This is the place for special error handling for individual errors.
84744961713Sgirish */
84844961713Sgirish static void
errh_handle_attr(errh_async_flt_t * errh_fltp)84944961713Sgirish errh_handle_attr(errh_async_flt_t *errh_fltp)
85044961713Sgirish {
85144961713Sgirish switch (errh_fltp->errh_er.attr & ~ERRH_MODE_MASK) {
85244961713Sgirish case ERRH_ATTR_CPU:
85344961713Sgirish case ERRH_ATTR_MEM:
85444961713Sgirish case ERRH_ATTR_PIO:
85544961713Sgirish case ERRH_ATTR_IRF:
85644961713Sgirish case ERRH_ATTR_FRF:
85744961713Sgirish case ERRH_ATTR_SHUT:
85844961713Sgirish break;
85944961713Sgirish
86044961713Sgirish case ERRH_ATTR_ASR:
86144961713Sgirish errh_handle_asr(errh_fltp);
86244961713Sgirish break;
86344961713Sgirish
86444961713Sgirish case ERRH_ATTR_ASI:
86544961713Sgirish case ERRH_ATTR_PREG:
86644961713Sgirish case ERRH_ATTR_RQF:
86744961713Sgirish break;
86844961713Sgirish
86944961713Sgirish default:
87044961713Sgirish break;
87144961713Sgirish }
87244961713Sgirish }
87344961713Sgirish
87444961713Sgirish /*
87544961713Sgirish * Handle ASR bit set in ATTR
87644961713Sgirish */
87744961713Sgirish static void
errh_handle_asr(errh_async_flt_t * errh_fltp)87844961713Sgirish errh_handle_asr(errh_async_flt_t *errh_fltp)
87944961713Sgirish {
88044961713Sgirish uint64_t current_tick;
88144961713Sgirish
88244961713Sgirish switch (errh_fltp->errh_er.reg) {
88344961713Sgirish case ASR_REG_VALID | ASR_REG_TICK:
88444961713Sgirish /*
88544961713Sgirish * For Tick Compare Register error, it only happens when
88644961713Sgirish * the register is being read or compared with the %tick
88744961713Sgirish * register. Since we lost the contents of the register,
88844961713Sgirish * we set the %tick_compr in the future. An interrupt will
88944961713Sgirish * happen when %tick matches the value field of %tick_compr.
89044961713Sgirish */
89144961713Sgirish current_tick = (uint64_t)gettick();
89244961713Sgirish tickcmpr_set(current_tick);
89344961713Sgirish /* Do not panic */
89444961713Sgirish errh_fltp->cmn_asyncflt.flt_panic = 0;
89544961713Sgirish break;
89644961713Sgirish
89744961713Sgirish default:
89844961713Sgirish break;
89944961713Sgirish }
90044961713Sgirish }
901a60fc142Srf
9024df55fdeSJanie Lu /*
9034df55fdeSJanie Lu * Handle a SP state change.
9044df55fdeSJanie Lu */
9054df55fdeSJanie Lu static void
errh_handle_sp(errh_er_t * errh_erp)906b7b0558aSAnthony Yznaga errh_handle_sp(errh_er_t *errh_erp)
9074df55fdeSJanie Lu {
9084df55fdeSJanie Lu uint8_t sp_state;
9094df55fdeSJanie Lu
910b7b0558aSAnthony Yznaga sp_state = (errh_erp->attr & ERRH_SP_MASK) >> ERRH_SP_SHIFT;
9114df55fdeSJanie Lu
912*9d468d1aSAnthony Yznaga sp_ereport_post(sp_state);
9134df55fdeSJanie Lu }
9144df55fdeSJanie Lu
915a60fc142Srf /*
916a60fc142Srf * Dump the error packet
917a60fc142Srf */
918a60fc142Srf /*ARGSUSED*/
919a60fc142Srf static void
errh_er_print(errh_er_t * errh_erp,const char * queue)920a60fc142Srf errh_er_print(errh_er_t *errh_erp, const char *queue)
921a60fc142Srf {
922a60fc142Srf typedef union {
923a60fc142Srf uint64_t w;
924a60fc142Srf uint16_t s[4];
925a60fc142Srf } errhp_t;
926a60fc142Srf errhp_t *p = (errhp_t *)errh_erp;
927a60fc142Srf int i;
928a60fc142Srf
929a60fc142Srf mutex_enter(&errh_print_lock);
930a60fc142Srf switch (errh_erp->desc) {
931a60fc142Srf case ERRH_DESC_UCOR_RE:
932a60fc142Srf cmn_err(CE_CONT, "\nResumable Uncorrectable Error ");
933a60fc142Srf break;
934a60fc142Srf case ERRH_DESC_PR_NRE:
935a60fc142Srf cmn_err(CE_CONT, "\nNonresumable Precise Error ");
936a60fc142Srf break;
937a60fc142Srf case ERRH_DESC_DEF_NRE:
938a60fc142Srf cmn_err(CE_CONT, "\nNonresumable Deferred Error ");
939a60fc142Srf break;
940a60fc142Srf default:
941a60fc142Srf cmn_err(CE_CONT, "\nError packet ");
942a60fc142Srf break;
943a60fc142Srf }
944a60fc142Srf cmn_err(CE_CONT, "received on %s\n", queue);
945a60fc142Srf
946a60fc142Srf /*
947a60fc142Srf * Print Q_ENTRY_SIZE bytes of epacket with 8 bytes per line
948a60fc142Srf */
949a60fc142Srf for (i = Q_ENTRY_SIZE; i > 0; i -= 8, ++p) {
950a60fc142Srf cmn_err(CE_CONT, "%016lx: %04x %04x %04x %04x\n", (uint64_t)p,
951a60fc142Srf p->s[0], p->s[1], p->s[2], p->s[3]);
952a60fc142Srf }
953a60fc142Srf mutex_exit(&errh_print_lock);
954a60fc142Srf }
9554df55fdeSJanie Lu
9564df55fdeSJanie Lu static void
sp_ereport_post(uint8_t sp_state)9574df55fdeSJanie Lu sp_ereport_post(uint8_t sp_state)
9584df55fdeSJanie Lu {
9594df55fdeSJanie Lu nvlist_t *ereport, *detector;
960*9d468d1aSAnthony Yznaga char *str = NULL;
9614df55fdeSJanie Lu
962*9d468d1aSAnthony Yznaga switch (sp_state) {
963*9d468d1aSAnthony Yznaga case ERRH_SP_FAULTED:
964*9d468d1aSAnthony Yznaga str = "chassis.sp.unavailable";
965*9d468d1aSAnthony Yznaga break;
966*9d468d1aSAnthony Yznaga
967*9d468d1aSAnthony Yznaga case ERRH_SP_NOT_PRESENT:
968*9d468d1aSAnthony Yznaga /*
969*9d468d1aSAnthony Yznaga * It is expected that removal of the SP will be undertaken
970*9d468d1aSAnthony Yznaga * in response to an existing service action. Diagnosing
971*9d468d1aSAnthony Yznaga * a fault in response to notification that the SP is
972*9d468d1aSAnthony Yznaga * missing is therefore undesired. In the future the fault
973*9d468d1aSAnthony Yznaga * management architecture may be updated to support more
974*9d468d1aSAnthony Yznaga * appropriate alert events. When that happens this code
975*9d468d1aSAnthony Yznaga * should be revisited.
976*9d468d1aSAnthony Yznaga */
977*9d468d1aSAnthony Yznaga return;
978*9d468d1aSAnthony Yznaga
979*9d468d1aSAnthony Yznaga case ERRH_SP_AVAILABLE:
980*9d468d1aSAnthony Yznaga /*
981*9d468d1aSAnthony Yznaga * Hypervisor does not send an epkt for this case
982*9d468d1aSAnthony Yznaga * so this should never happen.
983*9d468d1aSAnthony Yznaga */
984*9d468d1aSAnthony Yznaga cmn_err(CE_WARN, "Received unexpected notification "
985*9d468d1aSAnthony Yznaga "that the SP is available.");
986*9d468d1aSAnthony Yznaga return;
987*9d468d1aSAnthony Yznaga
988*9d468d1aSAnthony Yznaga default:
989*9d468d1aSAnthony Yznaga cmn_err(CE_WARN, "Invalid SP state 0x%x. No ereport posted.\n",
990*9d468d1aSAnthony Yznaga sp_state);
991*9d468d1aSAnthony Yznaga return;
992*9d468d1aSAnthony Yznaga }
9934df55fdeSJanie Lu
9944df55fdeSJanie Lu ereport = fm_nvlist_create(NULL);
9954df55fdeSJanie Lu detector = fm_nvlist_create(NULL);
9964df55fdeSJanie Lu
9974df55fdeSJanie Lu /*
9984df55fdeSJanie Lu * Create an HC-scheme detector FMRI.
9994df55fdeSJanie Lu */
10004df55fdeSJanie Lu fm_fmri_hc_set(detector, FM_HC_SCHEME_VERSION, NULL, NULL, 1,
10014df55fdeSJanie Lu "chassis", 0);
10024df55fdeSJanie Lu
1003*9d468d1aSAnthony Yznaga fm_ereport_set(ereport, FM_EREPORT_VERSION, str,
10044df55fdeSJanie Lu fm_ena_generate(0, FM_ENA_FMT1), detector, NULL);
10054df55fdeSJanie Lu
10064df55fdeSJanie Lu (void) fm_ereport_post(ereport, EVCH_TRYHARD);
10074df55fdeSJanie Lu
10084df55fdeSJanie Lu fm_nvlist_destroy(ereport, FM_NVA_FREE);
10094df55fdeSJanie Lu fm_nvlist_destroy(detector, FM_NVA_FREE);
10104df55fdeSJanie Lu }
1011