xref: /illumos-gate/usr/src/uts/sun4v/os/error.c (revision 7c478bd9)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*7c478bd9Sstevel@tonic-gate 
29*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
30*7c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>
31*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
32*7c478bd9Sstevel@tonic-gate #include <sys/async.h>
33*7c478bd9Sstevel@tonic-gate #include <sys/ontrap.h>
34*7c478bd9Sstevel@tonic-gate #include <sys/ddifm.h>
35*7c478bd9Sstevel@tonic-gate #include <sys/hypervisor_api.h>
36*7c478bd9Sstevel@tonic-gate #include <sys/errorq.h>
37*7c478bd9Sstevel@tonic-gate #include <sys/promif.h>
38*7c478bd9Sstevel@tonic-gate #include <sys/prom_plat.h>
39*7c478bd9Sstevel@tonic-gate #include <sys/x_call.h>
40*7c478bd9Sstevel@tonic-gate #include <sys/error.h>
41*7c478bd9Sstevel@tonic-gate #include <sys/fm/util.h>
42*7c478bd9Sstevel@tonic-gate 
43*7c478bd9Sstevel@tonic-gate #define	MAX_CE_FLTS		10
44*7c478bd9Sstevel@tonic-gate #define	MAX_ASYNC_FLTS		6
45*7c478bd9Sstevel@tonic-gate 
46*7c478bd9Sstevel@tonic-gate errorq_t *ue_queue;			/* queue of uncorrectable errors */
47*7c478bd9Sstevel@tonic-gate errorq_t *ce_queue;			/* queue of correctable errors */
48*7c478bd9Sstevel@tonic-gate 
49*7c478bd9Sstevel@tonic-gate /*
50*7c478bd9Sstevel@tonic-gate  * Being used by memory test driver.
51*7c478bd9Sstevel@tonic-gate  * ce_verbose_memory - covers CEs in DIMMs
52*7c478bd9Sstevel@tonic-gate  * ce_verbose_other - covers "others" (ecache, IO, etc.)
53*7c478bd9Sstevel@tonic-gate  *
54*7c478bd9Sstevel@tonic-gate  * If the value is 0, nothing is logged.
55*7c478bd9Sstevel@tonic-gate  * If the value is 1, the error is logged to the log file, but not console.
56*7c478bd9Sstevel@tonic-gate  * If the value is 2, the error is logged to the log file and console.
57*7c478bd9Sstevel@tonic-gate  */
58*7c478bd9Sstevel@tonic-gate int	ce_verbose_memory = 1;
59*7c478bd9Sstevel@tonic-gate int	ce_verbose_other = 1;
60*7c478bd9Sstevel@tonic-gate 
61*7c478bd9Sstevel@tonic-gate int	ce_show_data = 0;
62*7c478bd9Sstevel@tonic-gate int	ce_debug = 0;
63*7c478bd9Sstevel@tonic-gate int	ue_debug = 0;
64*7c478bd9Sstevel@tonic-gate int	reset_debug = 0;
65*7c478bd9Sstevel@tonic-gate 
66*7c478bd9Sstevel@tonic-gate /*
67*7c478bd9Sstevel@tonic-gate  * Tunables for controlling the handling of asynchronous faults (AFTs). Setting
68*7c478bd9Sstevel@tonic-gate  * these to non-default values on a non-DEBUG kernel is NOT supported.
69*7c478bd9Sstevel@tonic-gate  */
70*7c478bd9Sstevel@tonic-gate int	aft_verbose = 0;	/* log AFT messages > 1 to log only */
71*7c478bd9Sstevel@tonic-gate int	aft_panic = 0;		/* panic (not reboot) on fatal usermode AFLT */
72*7c478bd9Sstevel@tonic-gate int	aft_testfatal = 0;	/* force all AFTs to panic immediately */
73*7c478bd9Sstevel@tonic-gate 
74*7c478bd9Sstevel@tonic-gate /*
75*7c478bd9Sstevel@tonic-gate  * Defined in bus_func.c but initialised in error_init
76*7c478bd9Sstevel@tonic-gate  */
77*7c478bd9Sstevel@tonic-gate extern kmutex_t bfd_lock;
78*7c478bd9Sstevel@tonic-gate 
79*7c478bd9Sstevel@tonic-gate static uint32_t rq_overflow_count = 0;		/* counter for rq overflow */
80*7c478bd9Sstevel@tonic-gate 
81*7c478bd9Sstevel@tonic-gate static void cpu_queue_one_event(errh_async_flt_t *);
82*7c478bd9Sstevel@tonic-gate static uint32_t count_entries_on_queue(uint64_t, uint64_t, uint32_t);
83*7c478bd9Sstevel@tonic-gate static void errh_page_settoxic(errh_async_flt_t *, uchar_t);
84*7c478bd9Sstevel@tonic-gate static void errh_page_retire(errh_async_flt_t *);
85*7c478bd9Sstevel@tonic-gate static int errh_error_protected(struct regs *, struct async_flt *, int *);
86*7c478bd9Sstevel@tonic-gate static void errh_rq_full(struct async_flt *);
87*7c478bd9Sstevel@tonic-gate static void ue_drain(void *, struct async_flt *, errorq_elem_t *);
88*7c478bd9Sstevel@tonic-gate static void ce_drain(void *, struct async_flt *, errorq_elem_t *);
89*7c478bd9Sstevel@tonic-gate 
90*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
91*7c478bd9Sstevel@tonic-gate void
92*7c478bd9Sstevel@tonic-gate process_resumable_error(struct regs *rp, uint32_t head_offset,
93*7c478bd9Sstevel@tonic-gate     uint32_t tail_offset)
94*7c478bd9Sstevel@tonic-gate {
95*7c478bd9Sstevel@tonic-gate 	struct machcpu *mcpup;
96*7c478bd9Sstevel@tonic-gate 	struct async_flt *aflt;
97*7c478bd9Sstevel@tonic-gate 	errh_async_flt_t errh_flt;
98*7c478bd9Sstevel@tonic-gate 	errh_er_t *head_va;
99*7c478bd9Sstevel@tonic-gate 
100*7c478bd9Sstevel@tonic-gate 	mcpup = &(CPU->cpu_m);
101*7c478bd9Sstevel@tonic-gate 
102*7c478bd9Sstevel@tonic-gate 	while (head_offset != tail_offset) {
103*7c478bd9Sstevel@tonic-gate 		/* kernel buffer starts right after the resumable queue */
104*7c478bd9Sstevel@tonic-gate 		head_va = (errh_er_t *)(mcpup->cpu_rq_va + head_offset +
105*7c478bd9Sstevel@tonic-gate 		    CPU_RQ_SIZE);
106*7c478bd9Sstevel@tonic-gate 		/* Copy the error report to local buffer */
107*7c478bd9Sstevel@tonic-gate 		bzero(&errh_flt, sizeof (errh_async_flt_t));
108*7c478bd9Sstevel@tonic-gate 		bcopy((char *)head_va, &(errh_flt.errh_er),
109*7c478bd9Sstevel@tonic-gate 		    sizeof (errh_er_t));
110*7c478bd9Sstevel@tonic-gate 
111*7c478bd9Sstevel@tonic-gate 		/* Increment the queue head */
112*7c478bd9Sstevel@tonic-gate 		head_offset += Q_ENTRY_SIZE;
113*7c478bd9Sstevel@tonic-gate 		/* Wrap around */
114*7c478bd9Sstevel@tonic-gate 		head_offset &= (CPU_RQ_SIZE - 1);
115*7c478bd9Sstevel@tonic-gate 
116*7c478bd9Sstevel@tonic-gate 		/* set error handle to zero so it can hold new error report */
117*7c478bd9Sstevel@tonic-gate 		head_va->ehdl = 0;
118*7c478bd9Sstevel@tonic-gate 
119*7c478bd9Sstevel@tonic-gate 		switch (errh_flt.errh_er.desc) {
120*7c478bd9Sstevel@tonic-gate 		case ERRH_DESC_UCOR_RE:
121*7c478bd9Sstevel@tonic-gate 			break;
122*7c478bd9Sstevel@tonic-gate 
123*7c478bd9Sstevel@tonic-gate 		default:
124*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "Error Descriptor 0x%llx "
125*7c478bd9Sstevel@tonic-gate 			    " invalid in resumable error handler",
126*7c478bd9Sstevel@tonic-gate 			    (long long) errh_flt.errh_er.desc);
127*7c478bd9Sstevel@tonic-gate 			continue;
128*7c478bd9Sstevel@tonic-gate 		}
129*7c478bd9Sstevel@tonic-gate 
130*7c478bd9Sstevel@tonic-gate 		aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt);
131*7c478bd9Sstevel@tonic-gate 		aflt->flt_id = gethrtime();
132*7c478bd9Sstevel@tonic-gate 		aflt->flt_bus_id = getprocessorid();
133*7c478bd9Sstevel@tonic-gate 		aflt->flt_class = CPU_FAULT;
134*7c478bd9Sstevel@tonic-gate 		aflt->flt_prot = AFLT_PROT_NONE;
135*7c478bd9Sstevel@tonic-gate 		aflt->flt_priv = (((errh_flt.errh_er.attr & ERRH_MODE_MASK)
136*7c478bd9Sstevel@tonic-gate 		    >> ERRH_MODE_SHIFT) == ERRH_MODE_PRIV);
137*7c478bd9Sstevel@tonic-gate 
138*7c478bd9Sstevel@tonic-gate 		if (errh_flt.errh_er.attr & ERRH_ATTR_CPU)
139*7c478bd9Sstevel@tonic-gate 			/* If it is an error on other cpu */
140*7c478bd9Sstevel@tonic-gate 			aflt->flt_panic = 1;
141*7c478bd9Sstevel@tonic-gate 		else
142*7c478bd9Sstevel@tonic-gate 			aflt->flt_panic = 0;
143*7c478bd9Sstevel@tonic-gate 
144*7c478bd9Sstevel@tonic-gate 		/*
145*7c478bd9Sstevel@tonic-gate 		 * Handle resumable queue full case.
146*7c478bd9Sstevel@tonic-gate 		 */
147*7c478bd9Sstevel@tonic-gate 		if (errh_flt.errh_er.attr & ERRH_ATTR_RQF) {
148*7c478bd9Sstevel@tonic-gate 			(void) errh_rq_full(aflt);
149*7c478bd9Sstevel@tonic-gate 		}
150*7c478bd9Sstevel@tonic-gate 
151*7c478bd9Sstevel@tonic-gate 		/*
152*7c478bd9Sstevel@tonic-gate 		 * Queue the error on ce or ue queue depend on flt_panic.
153*7c478bd9Sstevel@tonic-gate 		 * Even if flt_panic is set, the code still keep processing
154*7c478bd9Sstevel@tonic-gate 		 * the rest element on rq until the panic starts.
155*7c478bd9Sstevel@tonic-gate 		 */
156*7c478bd9Sstevel@tonic-gate 		(void) cpu_queue_one_event(&errh_flt);
157*7c478bd9Sstevel@tonic-gate 
158*7c478bd9Sstevel@tonic-gate 		/*
159*7c478bd9Sstevel@tonic-gate 		 * Panic here if aflt->flt_panic has been set.
160*7c478bd9Sstevel@tonic-gate 		 * Enqueued errors will be logged as part of the panic flow.
161*7c478bd9Sstevel@tonic-gate 		 */
162*7c478bd9Sstevel@tonic-gate 		if (aflt->flt_panic) {
163*7c478bd9Sstevel@tonic-gate 			fm_panic("Unrecoverable error on another CPU");
164*7c478bd9Sstevel@tonic-gate 		}
165*7c478bd9Sstevel@tonic-gate 	}
166*7c478bd9Sstevel@tonic-gate }
167*7c478bd9Sstevel@tonic-gate 
168*7c478bd9Sstevel@tonic-gate void
169*7c478bd9Sstevel@tonic-gate process_nonresumable_error(struct regs *rp, uint64_t tl,
170*7c478bd9Sstevel@tonic-gate     uint32_t head_offset, uint32_t tail_offset)
171*7c478bd9Sstevel@tonic-gate {
172*7c478bd9Sstevel@tonic-gate 	struct machcpu *mcpup;
173*7c478bd9Sstevel@tonic-gate 	struct async_flt *aflt;
174*7c478bd9Sstevel@tonic-gate 	errh_async_flt_t errh_flt;
175*7c478bd9Sstevel@tonic-gate 	errh_er_t *head_va;
176*7c478bd9Sstevel@tonic-gate 	int trampolined = 0;
177*7c478bd9Sstevel@tonic-gate 	int expected = DDI_FM_ERR_UNEXPECTED;
178*7c478bd9Sstevel@tonic-gate 	uint64_t exec_mode;
179*7c478bd9Sstevel@tonic-gate 
180*7c478bd9Sstevel@tonic-gate 	mcpup = &(CPU->cpu_m);
181*7c478bd9Sstevel@tonic-gate 
182*7c478bd9Sstevel@tonic-gate 	while (head_offset != tail_offset) {
183*7c478bd9Sstevel@tonic-gate 		/* kernel buffer starts right after the nonresumable queue */
184*7c478bd9Sstevel@tonic-gate 		head_va = (errh_er_t *)(mcpup->cpu_nrq_va + head_offset +
185*7c478bd9Sstevel@tonic-gate 		    CPU_NRQ_SIZE);
186*7c478bd9Sstevel@tonic-gate 
187*7c478bd9Sstevel@tonic-gate 		/* Copy the error report to local buffer */
188*7c478bd9Sstevel@tonic-gate 		bzero(&errh_flt, sizeof (errh_async_flt_t));
189*7c478bd9Sstevel@tonic-gate 
190*7c478bd9Sstevel@tonic-gate 		bcopy((char *)head_va, &(errh_flt.errh_er),
191*7c478bd9Sstevel@tonic-gate 		    sizeof (errh_er_t));
192*7c478bd9Sstevel@tonic-gate 
193*7c478bd9Sstevel@tonic-gate 		/* Increment the queue head */
194*7c478bd9Sstevel@tonic-gate 		head_offset += Q_ENTRY_SIZE;
195*7c478bd9Sstevel@tonic-gate 		/* Wrap around */
196*7c478bd9Sstevel@tonic-gate 		head_offset &= (CPU_NRQ_SIZE - 1);
197*7c478bd9Sstevel@tonic-gate 
198*7c478bd9Sstevel@tonic-gate 		/* set error handle to zero so it can hold new error report */
199*7c478bd9Sstevel@tonic-gate 		head_va->ehdl = 0;
200*7c478bd9Sstevel@tonic-gate 
201*7c478bd9Sstevel@tonic-gate 		aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt);
202*7c478bd9Sstevel@tonic-gate 
203*7c478bd9Sstevel@tonic-gate 		trampolined = 0;
204*7c478bd9Sstevel@tonic-gate 
205*7c478bd9Sstevel@tonic-gate 		if (errh_flt.errh_er.attr & ERRH_ATTR_PIO)
206*7c478bd9Sstevel@tonic-gate 			aflt->flt_class = BUS_FAULT;
207*7c478bd9Sstevel@tonic-gate 		else
208*7c478bd9Sstevel@tonic-gate 			aflt->flt_class = CPU_FAULT;
209*7c478bd9Sstevel@tonic-gate 
210*7c478bd9Sstevel@tonic-gate 		aflt->flt_id = gethrtime();
211*7c478bd9Sstevel@tonic-gate 		aflt->flt_bus_id = getprocessorid();
212*7c478bd9Sstevel@tonic-gate 		aflt->flt_pc = (caddr_t)rp->r_pc;
213*7c478bd9Sstevel@tonic-gate 		exec_mode = (errh_flt.errh_er.attr & ERRH_MODE_MASK)
214*7c478bd9Sstevel@tonic-gate 		    >> ERRH_MODE_SHIFT;
215*7c478bd9Sstevel@tonic-gate 		aflt->flt_priv = (exec_mode == ERRH_MODE_PRIV ||
216*7c478bd9Sstevel@tonic-gate 		    exec_mode == ERRH_MODE_UNKNOWN);
217*7c478bd9Sstevel@tonic-gate 		aflt->flt_tl = (uchar_t)tl;
218*7c478bd9Sstevel@tonic-gate 		aflt->flt_prot = AFLT_PROT_NONE;
219*7c478bd9Sstevel@tonic-gate 		aflt->flt_panic = ((aflt->flt_tl != 0) ||
220*7c478bd9Sstevel@tonic-gate 		    (aft_testfatal != 0));
221*7c478bd9Sstevel@tonic-gate 
222*7c478bd9Sstevel@tonic-gate 		switch (errh_flt.errh_er.desc) {
223*7c478bd9Sstevel@tonic-gate 		case ERRH_DESC_PR_NRE:
224*7c478bd9Sstevel@tonic-gate 			/*
225*7c478bd9Sstevel@tonic-gate 			 * Fall through, precise fault also need to check
226*7c478bd9Sstevel@tonic-gate 			 * to see if it was protected.
227*7c478bd9Sstevel@tonic-gate 			 */
228*7c478bd9Sstevel@tonic-gate 
229*7c478bd9Sstevel@tonic-gate 		case ERRH_DESC_DEF_NRE:
230*7c478bd9Sstevel@tonic-gate 			/*
231*7c478bd9Sstevel@tonic-gate 			 * If the trap occurred in privileged mode at TL=0,
232*7c478bd9Sstevel@tonic-gate 			 * we need to check to see if we were executing
233*7c478bd9Sstevel@tonic-gate 			 * in kernel under on_trap() or t_lofault
234*7c478bd9Sstevel@tonic-gate 			 * protection. If so, modify the saved registers
235*7c478bd9Sstevel@tonic-gate 			 * so that we return from the trap to the
236*7c478bd9Sstevel@tonic-gate 			 * appropriate trampoline routine.
237*7c478bd9Sstevel@tonic-gate 			 */
238*7c478bd9Sstevel@tonic-gate 			if (aflt->flt_priv == 1 && aflt->flt_tl == 0)
239*7c478bd9Sstevel@tonic-gate 				trampolined =
240*7c478bd9Sstevel@tonic-gate 				    errh_error_protected(rp, aflt, &expected);
241*7c478bd9Sstevel@tonic-gate 
242*7c478bd9Sstevel@tonic-gate 			if (!aflt->flt_priv || aflt->flt_prot ==
243*7c478bd9Sstevel@tonic-gate 			    AFLT_PROT_COPY) {
244*7c478bd9Sstevel@tonic-gate 				aflt->flt_panic |= aft_panic;
245*7c478bd9Sstevel@tonic-gate 			} else if (!trampolined &&
246*7c478bd9Sstevel@tonic-gate 			    aflt->flt_class != BUS_FAULT) {
247*7c478bd9Sstevel@tonic-gate 				aflt->flt_panic = 1;
248*7c478bd9Sstevel@tonic-gate 			}
249*7c478bd9Sstevel@tonic-gate 
250*7c478bd9Sstevel@tonic-gate 			/*
251*7c478bd9Sstevel@tonic-gate 			 * If PIO error, we need to query the bus nexus
252*7c478bd9Sstevel@tonic-gate 			 * for fatal errors.
253*7c478bd9Sstevel@tonic-gate 			 */
254*7c478bd9Sstevel@tonic-gate 			if (aflt->flt_class == BUS_FAULT) {
255*7c478bd9Sstevel@tonic-gate 				aflt->flt_addr = errh_flt.errh_er.ra;
256*7c478bd9Sstevel@tonic-gate 				errh_cpu_run_bus_error_handlers(aflt,
257*7c478bd9Sstevel@tonic-gate 				    expected);
258*7c478bd9Sstevel@tonic-gate 			}
259*7c478bd9Sstevel@tonic-gate 
260*7c478bd9Sstevel@tonic-gate 			break;
261*7c478bd9Sstevel@tonic-gate 
262*7c478bd9Sstevel@tonic-gate 		default:
263*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "Error Descriptor 0x%llx "
264*7c478bd9Sstevel@tonic-gate 			    " invalid in nonresumable error handler",
265*7c478bd9Sstevel@tonic-gate 			    (long long) errh_flt.errh_er.desc);
266*7c478bd9Sstevel@tonic-gate 			continue;
267*7c478bd9Sstevel@tonic-gate 		}
268*7c478bd9Sstevel@tonic-gate 
269*7c478bd9Sstevel@tonic-gate 		/*
270*7c478bd9Sstevel@tonic-gate 		 * Queue the error report for further processing. If
271*7c478bd9Sstevel@tonic-gate 		 * flt_panic is set, code still process other errors
272*7c478bd9Sstevel@tonic-gate 		 * in the queue until the panic routine stops the
273*7c478bd9Sstevel@tonic-gate 		 * kernel.
274*7c478bd9Sstevel@tonic-gate 		 */
275*7c478bd9Sstevel@tonic-gate 		(void) cpu_queue_one_event(&errh_flt);
276*7c478bd9Sstevel@tonic-gate 
277*7c478bd9Sstevel@tonic-gate 		/*
278*7c478bd9Sstevel@tonic-gate 		 * Panic here if aflt->flt_panic has been set.
279*7c478bd9Sstevel@tonic-gate 		 * Enqueued errors will be logged as part of the panic flow.
280*7c478bd9Sstevel@tonic-gate 		 */
281*7c478bd9Sstevel@tonic-gate 		if (aflt->flt_panic) {
282*7c478bd9Sstevel@tonic-gate 			fm_panic("Unrecoverable hardware error");
283*7c478bd9Sstevel@tonic-gate 		}
284*7c478bd9Sstevel@tonic-gate 
285*7c478bd9Sstevel@tonic-gate 		/*
286*7c478bd9Sstevel@tonic-gate 		 * If it is a memory error, we turn on the PAGE_IS_TOXIC
287*7c478bd9Sstevel@tonic-gate 		 * flag. The page will be retired later and scrubbed when
288*7c478bd9Sstevel@tonic-gate 		 * it is freed.
289*7c478bd9Sstevel@tonic-gate 		 */
290*7c478bd9Sstevel@tonic-gate 		if (errh_flt.errh_er.attr & ERRH_ATTR_MEM)
291*7c478bd9Sstevel@tonic-gate 			(void) errh_page_settoxic(&errh_flt, PAGE_IS_TOXIC);
292*7c478bd9Sstevel@tonic-gate 
293*7c478bd9Sstevel@tonic-gate 		/*
294*7c478bd9Sstevel@tonic-gate 		 * If we queued an error and the it was in user mode or
295*7c478bd9Sstevel@tonic-gate 		 * protected by t_lofault,
296*7c478bd9Sstevel@tonic-gate 		 * set AST flag so the queue will be drained before
297*7c478bd9Sstevel@tonic-gate 		 * returning to user mode.
298*7c478bd9Sstevel@tonic-gate 		 */
299*7c478bd9Sstevel@tonic-gate 		if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
300*7c478bd9Sstevel@tonic-gate 			int pcb_flag = 0;
301*7c478bd9Sstevel@tonic-gate 
302*7c478bd9Sstevel@tonic-gate 			if (aflt->flt_class == CPU_FAULT)
303*7c478bd9Sstevel@tonic-gate 				pcb_flag |= ASYNC_HWERR;
304*7c478bd9Sstevel@tonic-gate 			else if (aflt->flt_class == BUS_FAULT)
305*7c478bd9Sstevel@tonic-gate 				pcb_flag |= ASYNC_BERR;
306*7c478bd9Sstevel@tonic-gate 
307*7c478bd9Sstevel@tonic-gate 			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
308*7c478bd9Sstevel@tonic-gate 			aston(curthread);
309*7c478bd9Sstevel@tonic-gate 		}
310*7c478bd9Sstevel@tonic-gate 	}
311*7c478bd9Sstevel@tonic-gate }
312*7c478bd9Sstevel@tonic-gate 
313*7c478bd9Sstevel@tonic-gate /*
314*7c478bd9Sstevel@tonic-gate  * For PIO errors, this routine calls nexus driver's error
315*7c478bd9Sstevel@tonic-gate  * callback routines. If the callback routine returns fatal, and
316*7c478bd9Sstevel@tonic-gate  * we are in kernel or unknow mode without any error protection,
317*7c478bd9Sstevel@tonic-gate  * we need to turn on the panic flag.
318*7c478bd9Sstevel@tonic-gate  */
319*7c478bd9Sstevel@tonic-gate void
320*7c478bd9Sstevel@tonic-gate errh_cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
321*7c478bd9Sstevel@tonic-gate {
322*7c478bd9Sstevel@tonic-gate 	int status;
323*7c478bd9Sstevel@tonic-gate 	ddi_fm_error_t de;
324*7c478bd9Sstevel@tonic-gate 
325*7c478bd9Sstevel@tonic-gate 	bzero(&de, sizeof (ddi_fm_error_t));
326*7c478bd9Sstevel@tonic-gate 
327*7c478bd9Sstevel@tonic-gate 	de.fme_version = DDI_FME_VERSION;
328*7c478bd9Sstevel@tonic-gate 	de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1);
329*7c478bd9Sstevel@tonic-gate 	de.fme_flag = expected;
330*7c478bd9Sstevel@tonic-gate 	de.fme_bus_specific = (void *)aflt->flt_addr;
331*7c478bd9Sstevel@tonic-gate 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
332*7c478bd9Sstevel@tonic-gate 
333*7c478bd9Sstevel@tonic-gate 	/*
334*7c478bd9Sstevel@tonic-gate 	 * If error is protected, it will jump to proper routine
335*7c478bd9Sstevel@tonic-gate 	 * to handle the handle; if it is in user level, we just
336*7c478bd9Sstevel@tonic-gate 	 * kill the user process; if the driver thinks the error is
337*7c478bd9Sstevel@tonic-gate 	 * not fatal, we can drive on. If none of above are true,
338*7c478bd9Sstevel@tonic-gate 	 * we panic
339*7c478bd9Sstevel@tonic-gate 	 */
340*7c478bd9Sstevel@tonic-gate 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (aflt->flt_priv == 1) &&
341*7c478bd9Sstevel@tonic-gate 	    (status == DDI_FM_FATAL))
342*7c478bd9Sstevel@tonic-gate 		aflt->flt_panic = 1;
343*7c478bd9Sstevel@tonic-gate }
344*7c478bd9Sstevel@tonic-gate 
345*7c478bd9Sstevel@tonic-gate /*
346*7c478bd9Sstevel@tonic-gate  * This routine checks to see if we are under any error protection when
347*7c478bd9Sstevel@tonic-gate  * the error happens. If we are under error protection, we unwind to
348*7c478bd9Sstevel@tonic-gate  * the protection and indicate fault.
349*7c478bd9Sstevel@tonic-gate  */
350*7c478bd9Sstevel@tonic-gate static int
351*7c478bd9Sstevel@tonic-gate errh_error_protected(struct regs *rp, struct async_flt *aflt, int *expected)
352*7c478bd9Sstevel@tonic-gate {
353*7c478bd9Sstevel@tonic-gate 	int trampolined = 0;
354*7c478bd9Sstevel@tonic-gate 	ddi_acc_hdl_t *hp;
355*7c478bd9Sstevel@tonic-gate 
356*7c478bd9Sstevel@tonic-gate 	if (curthread->t_ontrap != NULL) {
357*7c478bd9Sstevel@tonic-gate 		on_trap_data_t *otp = curthread->t_ontrap;
358*7c478bd9Sstevel@tonic-gate 
359*7c478bd9Sstevel@tonic-gate 		if (otp->ot_prot & OT_DATA_EC) {
360*7c478bd9Sstevel@tonic-gate 			aflt->flt_prot = AFLT_PROT_EC;
361*7c478bd9Sstevel@tonic-gate 			otp->ot_trap |= OT_DATA_EC;
362*7c478bd9Sstevel@tonic-gate 			rp->r_pc = otp->ot_trampoline;
363*7c478bd9Sstevel@tonic-gate 			rp->r_npc = rp->r_pc +4;
364*7c478bd9Sstevel@tonic-gate 			trampolined = 1;
365*7c478bd9Sstevel@tonic-gate 		}
366*7c478bd9Sstevel@tonic-gate 
367*7c478bd9Sstevel@tonic-gate 		if (otp->ot_prot & OT_DATA_ACCESS) {
368*7c478bd9Sstevel@tonic-gate 			aflt->flt_prot = AFLT_PROT_ACCESS;
369*7c478bd9Sstevel@tonic-gate 			otp->ot_trap |= OT_DATA_ACCESS;
370*7c478bd9Sstevel@tonic-gate 			rp->r_pc = otp->ot_trampoline;
371*7c478bd9Sstevel@tonic-gate 			rp->r_npc = rp->r_pc + 4;
372*7c478bd9Sstevel@tonic-gate 			trampolined = 1;
373*7c478bd9Sstevel@tonic-gate 			/*
374*7c478bd9Sstevel@tonic-gate 			 * for peek and caut_gets
375*7c478bd9Sstevel@tonic-gate 			 * errors are expected
376*7c478bd9Sstevel@tonic-gate 			 */
377*7c478bd9Sstevel@tonic-gate 			hp = (ddi_acc_hdl_t *)otp->ot_handle;
378*7c478bd9Sstevel@tonic-gate 			if (!hp)
379*7c478bd9Sstevel@tonic-gate 				*expected = DDI_FM_ERR_PEEK;
380*7c478bd9Sstevel@tonic-gate 			else if (hp->ah_acc.devacc_attr_access ==
381*7c478bd9Sstevel@tonic-gate 			    DDI_CAUTIOUS_ACC)
382*7c478bd9Sstevel@tonic-gate 				*expected = DDI_FM_ERR_EXPECTED;
383*7c478bd9Sstevel@tonic-gate 		}
384*7c478bd9Sstevel@tonic-gate 	} else if (curthread->t_lofault) {
385*7c478bd9Sstevel@tonic-gate 		aflt->flt_prot = AFLT_PROT_COPY;
386*7c478bd9Sstevel@tonic-gate 		rp->r_g1 = EFAULT;
387*7c478bd9Sstevel@tonic-gate 		rp->r_pc = curthread->t_lofault;
388*7c478bd9Sstevel@tonic-gate 		rp->r_npc = rp->r_pc + 4;
389*7c478bd9Sstevel@tonic-gate 		trampolined = 1;
390*7c478bd9Sstevel@tonic-gate 	}
391*7c478bd9Sstevel@tonic-gate 
392*7c478bd9Sstevel@tonic-gate 	return (trampolined);
393*7c478bd9Sstevel@tonic-gate }
394*7c478bd9Sstevel@tonic-gate 
395*7c478bd9Sstevel@tonic-gate /*
396*7c478bd9Sstevel@tonic-gate  * Queue one event.
397*7c478bd9Sstevel@tonic-gate  */
398*7c478bd9Sstevel@tonic-gate static void
399*7c478bd9Sstevel@tonic-gate cpu_queue_one_event(errh_async_flt_t *errh_fltp)
400*7c478bd9Sstevel@tonic-gate {
401*7c478bd9Sstevel@tonic-gate 	struct async_flt *aflt = (struct async_flt *)errh_fltp;
402*7c478bd9Sstevel@tonic-gate 	errorq_t *eqp;
403*7c478bd9Sstevel@tonic-gate 
404*7c478bd9Sstevel@tonic-gate 	if (aflt->flt_panic)
405*7c478bd9Sstevel@tonic-gate 		eqp = ue_queue;
406*7c478bd9Sstevel@tonic-gate 	else
407*7c478bd9Sstevel@tonic-gate 		eqp = ce_queue;
408*7c478bd9Sstevel@tonic-gate 
409*7c478bd9Sstevel@tonic-gate 	errorq_dispatch(eqp, errh_fltp, sizeof (errh_async_flt_t),
410*7c478bd9Sstevel@tonic-gate 	    aflt->flt_panic);
411*7c478bd9Sstevel@tonic-gate }
412*7c478bd9Sstevel@tonic-gate 
413*7c478bd9Sstevel@tonic-gate /*
414*7c478bd9Sstevel@tonic-gate  * The cpu_async_log_err() function is called by the ce/ue_drain() function to
415*7c478bd9Sstevel@tonic-gate  * handle logging for CPU events that are dequeued.  As such, it can be invoked
416*7c478bd9Sstevel@tonic-gate  * from softint context, from AST processing in the trap() flow, or from the
417*7c478bd9Sstevel@tonic-gate  * panic flow.  We decode the CPU-specific data, and log appropriate messages.
418*7c478bd9Sstevel@tonic-gate  */
419*7c478bd9Sstevel@tonic-gate void
420*7c478bd9Sstevel@tonic-gate cpu_async_log_err(void *flt)
421*7c478bd9Sstevel@tonic-gate {
422*7c478bd9Sstevel@tonic-gate 	errh_async_flt_t *errh_fltp = (errh_async_flt_t *)flt;
423*7c478bd9Sstevel@tonic-gate 	errh_er_t *errh_erp = (errh_er_t *)&errh_fltp->errh_er;
424*7c478bd9Sstevel@tonic-gate 
425*7c478bd9Sstevel@tonic-gate 	switch (errh_erp->desc) {
426*7c478bd9Sstevel@tonic-gate 	case ERRH_DESC_UCOR_RE:
427*7c478bd9Sstevel@tonic-gate 		if (errh_erp->attr & ERRH_ATTR_MEM) {
428*7c478bd9Sstevel@tonic-gate 			/*
429*7c478bd9Sstevel@tonic-gate 			 * Turn on the PAGE_IS_TOXIC flag. The page will be
430*7c478bd9Sstevel@tonic-gate 			 * scrubbed when it is freed.
431*7c478bd9Sstevel@tonic-gate 			 */
432*7c478bd9Sstevel@tonic-gate 			(void) errh_page_settoxic(errh_fltp, PAGE_IS_TOXIC);
433*7c478bd9Sstevel@tonic-gate 		}
434*7c478bd9Sstevel@tonic-gate 
435*7c478bd9Sstevel@tonic-gate 		break;
436*7c478bd9Sstevel@tonic-gate 
437*7c478bd9Sstevel@tonic-gate 	case ERRH_DESC_PR_NRE:
438*7c478bd9Sstevel@tonic-gate 	case ERRH_DESC_DEF_NRE:
439*7c478bd9Sstevel@tonic-gate 		if (errh_erp->attr & ERRH_ATTR_MEM) {
440*7c478bd9Sstevel@tonic-gate 			/*
441*7c478bd9Sstevel@tonic-gate 			 * For non-resumable memory error, retire
442*7c478bd9Sstevel@tonic-gate 			 * the page here.
443*7c478bd9Sstevel@tonic-gate 			 */
444*7c478bd9Sstevel@tonic-gate 			errh_page_retire(errh_fltp);
445*7c478bd9Sstevel@tonic-gate 		}
446*7c478bd9Sstevel@tonic-gate 		break;
447*7c478bd9Sstevel@tonic-gate 
448*7c478bd9Sstevel@tonic-gate 	default:
449*7c478bd9Sstevel@tonic-gate 		break;
450*7c478bd9Sstevel@tonic-gate 	}
451*7c478bd9Sstevel@tonic-gate }
452*7c478bd9Sstevel@tonic-gate 
453*7c478bd9Sstevel@tonic-gate /*
454*7c478bd9Sstevel@tonic-gate  * Called from ce_drain().
455*7c478bd9Sstevel@tonic-gate  */
456*7c478bd9Sstevel@tonic-gate void
457*7c478bd9Sstevel@tonic-gate cpu_ce_log_err(struct async_flt *aflt)
458*7c478bd9Sstevel@tonic-gate {
459*7c478bd9Sstevel@tonic-gate 	switch (aflt->flt_class) {
460*7c478bd9Sstevel@tonic-gate 	case CPU_FAULT:
461*7c478bd9Sstevel@tonic-gate 		cpu_async_log_err(aflt);
462*7c478bd9Sstevel@tonic-gate 		break;
463*7c478bd9Sstevel@tonic-gate 
464*7c478bd9Sstevel@tonic-gate 	case BUS_FAULT:
465*7c478bd9Sstevel@tonic-gate 		cpu_async_log_err(aflt);
466*7c478bd9Sstevel@tonic-gate 		break;
467*7c478bd9Sstevel@tonic-gate 
468*7c478bd9Sstevel@tonic-gate 	default:
469*7c478bd9Sstevel@tonic-gate 		break;
470*7c478bd9Sstevel@tonic-gate 	}
471*7c478bd9Sstevel@tonic-gate }
472*7c478bd9Sstevel@tonic-gate 
473*7c478bd9Sstevel@tonic-gate /*
474*7c478bd9Sstevel@tonic-gate  * Called from ue_drain().
475*7c478bd9Sstevel@tonic-gate  */
476*7c478bd9Sstevel@tonic-gate void
477*7c478bd9Sstevel@tonic-gate cpu_ue_log_err(struct async_flt *aflt)
478*7c478bd9Sstevel@tonic-gate {
479*7c478bd9Sstevel@tonic-gate 	switch (aflt->flt_class) {
480*7c478bd9Sstevel@tonic-gate 	case CPU_FAULT:
481*7c478bd9Sstevel@tonic-gate 		cpu_async_log_err(aflt);
482*7c478bd9Sstevel@tonic-gate 		break;
483*7c478bd9Sstevel@tonic-gate 
484*7c478bd9Sstevel@tonic-gate 	case BUS_FAULT:
485*7c478bd9Sstevel@tonic-gate 		cpu_async_log_err(aflt);
486*7c478bd9Sstevel@tonic-gate 		break;
487*7c478bd9Sstevel@tonic-gate 
488*7c478bd9Sstevel@tonic-gate 	default:
489*7c478bd9Sstevel@tonic-gate 		break;
490*7c478bd9Sstevel@tonic-gate 	}
491*7c478bd9Sstevel@tonic-gate }
492*7c478bd9Sstevel@tonic-gate 
493*7c478bd9Sstevel@tonic-gate /*
494*7c478bd9Sstevel@tonic-gate  * Turn on flag on the error memory region.
495*7c478bd9Sstevel@tonic-gate  */
496*7c478bd9Sstevel@tonic-gate static void
497*7c478bd9Sstevel@tonic-gate errh_page_settoxic(errh_async_flt_t *errh_fltp, uchar_t flag)
498*7c478bd9Sstevel@tonic-gate {
499*7c478bd9Sstevel@tonic-gate 	page_t *pp;
500*7c478bd9Sstevel@tonic-gate 	uint64_t flt_real_addr_start = errh_fltp->errh_er.ra;
501*7c478bd9Sstevel@tonic-gate 	uint64_t flt_real_addr_end = flt_real_addr_start +
502*7c478bd9Sstevel@tonic-gate 	    errh_fltp->errh_er.sz - 1;
503*7c478bd9Sstevel@tonic-gate 	int64_t current_addr;
504*7c478bd9Sstevel@tonic-gate 
505*7c478bd9Sstevel@tonic-gate 	if (errh_fltp->errh_er.sz == 0)
506*7c478bd9Sstevel@tonic-gate 		return;
507*7c478bd9Sstevel@tonic-gate 
508*7c478bd9Sstevel@tonic-gate 	for (current_addr = flt_real_addr_start;
509*7c478bd9Sstevel@tonic-gate 	    current_addr < flt_real_addr_end; current_addr += MMU_PAGESIZE) {
510*7c478bd9Sstevel@tonic-gate 		pp = page_numtopp_nolock((pfn_t)
511*7c478bd9Sstevel@tonic-gate 		    (current_addr >> MMU_PAGESHIFT));
512*7c478bd9Sstevel@tonic-gate 
513*7c478bd9Sstevel@tonic-gate 		if (pp != NULL) {
514*7c478bd9Sstevel@tonic-gate 			page_settoxic(pp, flag);
515*7c478bd9Sstevel@tonic-gate 		}
516*7c478bd9Sstevel@tonic-gate 	}
517*7c478bd9Sstevel@tonic-gate }
518*7c478bd9Sstevel@tonic-gate 
519*7c478bd9Sstevel@tonic-gate /*
520*7c478bd9Sstevel@tonic-gate  * Retire the page(s) indicated in the error report.
521*7c478bd9Sstevel@tonic-gate  */
522*7c478bd9Sstevel@tonic-gate static void
523*7c478bd9Sstevel@tonic-gate errh_page_retire(errh_async_flt_t *errh_fltp)
524*7c478bd9Sstevel@tonic-gate {
525*7c478bd9Sstevel@tonic-gate 	page_t *pp;
526*7c478bd9Sstevel@tonic-gate 	uint64_t flt_real_addr_start = errh_fltp->errh_er.ra;
527*7c478bd9Sstevel@tonic-gate 	uint64_t flt_real_addr_end = flt_real_addr_start +
528*7c478bd9Sstevel@tonic-gate 	    errh_fltp->errh_er.sz - 1;
529*7c478bd9Sstevel@tonic-gate 	int64_t current_addr;
530*7c478bd9Sstevel@tonic-gate 
531*7c478bd9Sstevel@tonic-gate 	if (errh_fltp->errh_er.sz == 0)
532*7c478bd9Sstevel@tonic-gate 		return;
533*7c478bd9Sstevel@tonic-gate 
534*7c478bd9Sstevel@tonic-gate 	for (current_addr = flt_real_addr_start;
535*7c478bd9Sstevel@tonic-gate 	    current_addr < flt_real_addr_end; current_addr += MMU_PAGESIZE) {
536*7c478bd9Sstevel@tonic-gate 		pp = page_numtopp_nolock((pfn_t)
537*7c478bd9Sstevel@tonic-gate 		    (current_addr >> MMU_PAGESHIFT));
538*7c478bd9Sstevel@tonic-gate 
539*7c478bd9Sstevel@tonic-gate 		if (pp != NULL) {
540*7c478bd9Sstevel@tonic-gate 			(void) page_retire(pp, PAGE_IS_TOXIC);
541*7c478bd9Sstevel@tonic-gate 		}
542*7c478bd9Sstevel@tonic-gate 	}
543*7c478bd9Sstevel@tonic-gate }
544*7c478bd9Sstevel@tonic-gate 
545*7c478bd9Sstevel@tonic-gate void
546*7c478bd9Sstevel@tonic-gate mem_scrub(uint64_t paddr, uint64_t len)
547*7c478bd9Sstevel@tonic-gate {
548*7c478bd9Sstevel@tonic-gate 	uint64_t pa, length, scrubbed_len;
549*7c478bd9Sstevel@tonic-gate 	uint64_t ret = H_EOK;
550*7c478bd9Sstevel@tonic-gate 
551*7c478bd9Sstevel@tonic-gate 	pa = paddr;
552*7c478bd9Sstevel@tonic-gate 	length = len;
553*7c478bd9Sstevel@tonic-gate 	scrubbed_len = 0;
554*7c478bd9Sstevel@tonic-gate 
555*7c478bd9Sstevel@tonic-gate 	while (ret == H_EOK) {
556*7c478bd9Sstevel@tonic-gate 		ret = hv_mem_scrub(pa, length, &scrubbed_len);
557*7c478bd9Sstevel@tonic-gate 
558*7c478bd9Sstevel@tonic-gate 		if (ret == H_EOK || scrubbed_len >= length) {
559*7c478bd9Sstevel@tonic-gate 			break;
560*7c478bd9Sstevel@tonic-gate 		}
561*7c478bd9Sstevel@tonic-gate 
562*7c478bd9Sstevel@tonic-gate 		pa += scrubbed_len;
563*7c478bd9Sstevel@tonic-gate 		length -= scrubbed_len;
564*7c478bd9Sstevel@tonic-gate 	}
565*7c478bd9Sstevel@tonic-gate }
566*7c478bd9Sstevel@tonic-gate 
567*7c478bd9Sstevel@tonic-gate void
568*7c478bd9Sstevel@tonic-gate mem_sync(caddr_t va, size_t len)
569*7c478bd9Sstevel@tonic-gate {
570*7c478bd9Sstevel@tonic-gate 	uint64_t pa, length, flushed;
571*7c478bd9Sstevel@tonic-gate 	uint64_t ret = H_EOK;
572*7c478bd9Sstevel@tonic-gate 
573*7c478bd9Sstevel@tonic-gate 	pa = va_to_pa((caddr_t)va);
574*7c478bd9Sstevel@tonic-gate 
575*7c478bd9Sstevel@tonic-gate 	if (pa == (uint64_t)-1)
576*7c478bd9Sstevel@tonic-gate 		return;
577*7c478bd9Sstevel@tonic-gate 
578*7c478bd9Sstevel@tonic-gate 	length = len;
579*7c478bd9Sstevel@tonic-gate 	flushed = 0;
580*7c478bd9Sstevel@tonic-gate 
581*7c478bd9Sstevel@tonic-gate 	while (ret == H_EOK) {
582*7c478bd9Sstevel@tonic-gate 		ret = hv_mem_sync(pa, length, &flushed);
583*7c478bd9Sstevel@tonic-gate 
584*7c478bd9Sstevel@tonic-gate 		if (ret == H_EOK || flushed >= length) {
585*7c478bd9Sstevel@tonic-gate 			break;
586*7c478bd9Sstevel@tonic-gate 		}
587*7c478bd9Sstevel@tonic-gate 
588*7c478bd9Sstevel@tonic-gate 		pa += flushed;
589*7c478bd9Sstevel@tonic-gate 		length -= flushed;
590*7c478bd9Sstevel@tonic-gate 	}
591*7c478bd9Sstevel@tonic-gate }
592*7c478bd9Sstevel@tonic-gate 
593*7c478bd9Sstevel@tonic-gate /*
594*7c478bd9Sstevel@tonic-gate  * If resumable queue is full, we need to check if any cpu is in
595*7c478bd9Sstevel@tonic-gate  * error state. If not, we drive on. If yes, we need to panic. The
596*7c478bd9Sstevel@tonic-gate  * hypervisor call hv_cpu_state() is being used for checking the
597*7c478bd9Sstevel@tonic-gate  * cpu state.
598*7c478bd9Sstevel@tonic-gate  */
599*7c478bd9Sstevel@tonic-gate static void
600*7c478bd9Sstevel@tonic-gate errh_rq_full(struct async_flt *afltp)
601*7c478bd9Sstevel@tonic-gate {
602*7c478bd9Sstevel@tonic-gate 	processorid_t who;
603*7c478bd9Sstevel@tonic-gate 	uint64_t cpu_state;
604*7c478bd9Sstevel@tonic-gate 	uint64_t retval;
605*7c478bd9Sstevel@tonic-gate 
606*7c478bd9Sstevel@tonic-gate 	for (who = 0; who < NCPU; who++)
607*7c478bd9Sstevel@tonic-gate 		if (CPU_IN_SET(cpu_ready_set, who)) {
608*7c478bd9Sstevel@tonic-gate 			retval = hv_cpu_state(who, &cpu_state);
609*7c478bd9Sstevel@tonic-gate 			if (retval != H_EOK || cpu_state == CPU_STATE_ERROR) {
610*7c478bd9Sstevel@tonic-gate 				afltp->flt_panic = 1;
611*7c478bd9Sstevel@tonic-gate 				break;
612*7c478bd9Sstevel@tonic-gate 			}
613*7c478bd9Sstevel@tonic-gate 		}
614*7c478bd9Sstevel@tonic-gate }
615*7c478bd9Sstevel@tonic-gate 
616*7c478bd9Sstevel@tonic-gate /*
617*7c478bd9Sstevel@tonic-gate  * Return processor specific async error structure
618*7c478bd9Sstevel@tonic-gate  * size used.
619*7c478bd9Sstevel@tonic-gate  */
620*7c478bd9Sstevel@tonic-gate int
621*7c478bd9Sstevel@tonic-gate cpu_aflt_size(void)
622*7c478bd9Sstevel@tonic-gate {
623*7c478bd9Sstevel@tonic-gate 	return (sizeof (errh_async_flt_t));
624*7c478bd9Sstevel@tonic-gate }
625*7c478bd9Sstevel@tonic-gate 
626*7c478bd9Sstevel@tonic-gate #define	SZ_TO_ETRS_SHIFT	6
627*7c478bd9Sstevel@tonic-gate 
628*7c478bd9Sstevel@tonic-gate /*
629*7c478bd9Sstevel@tonic-gate  * Message print out when resumable queue is overflown
630*7c478bd9Sstevel@tonic-gate  */
631*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
632*7c478bd9Sstevel@tonic-gate void
633*7c478bd9Sstevel@tonic-gate rq_overflow(struct regs *rp, uint64_t head_offset,
634*7c478bd9Sstevel@tonic-gate     uint64_t tail_offset)
635*7c478bd9Sstevel@tonic-gate {
636*7c478bd9Sstevel@tonic-gate 	rq_overflow_count++;
637*7c478bd9Sstevel@tonic-gate }
638*7c478bd9Sstevel@tonic-gate 
639*7c478bd9Sstevel@tonic-gate /*
640*7c478bd9Sstevel@tonic-gate  * Handler to process a fatal error.  This routine can be called from a
641*7c478bd9Sstevel@tonic-gate  * softint, called from trap()'s AST handling, or called from the panic flow.
642*7c478bd9Sstevel@tonic-gate  */
643*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
644*7c478bd9Sstevel@tonic-gate static void
645*7c478bd9Sstevel@tonic-gate ue_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep)
646*7c478bd9Sstevel@tonic-gate {
647*7c478bd9Sstevel@tonic-gate 	cpu_ue_log_err(aflt);
648*7c478bd9Sstevel@tonic-gate }
649*7c478bd9Sstevel@tonic-gate 
650*7c478bd9Sstevel@tonic-gate /*
651*7c478bd9Sstevel@tonic-gate  * Handler to process a correctable error.  This routine can be called from a
652*7c478bd9Sstevel@tonic-gate  * softint.  We just call the CPU module's logging routine.
653*7c478bd9Sstevel@tonic-gate  */
654*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
655*7c478bd9Sstevel@tonic-gate static void
656*7c478bd9Sstevel@tonic-gate ce_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep)
657*7c478bd9Sstevel@tonic-gate {
658*7c478bd9Sstevel@tonic-gate 	cpu_ce_log_err(aflt);
659*7c478bd9Sstevel@tonic-gate }
660*7c478bd9Sstevel@tonic-gate 
661*7c478bd9Sstevel@tonic-gate /*
662*7c478bd9Sstevel@tonic-gate  * Allocate error queue sizes based on max_ncpus.  max_ncpus is set just
663*7c478bd9Sstevel@tonic-gate  * after ncpunode has been determined.  ncpus is set in start_other_cpus
664*7c478bd9Sstevel@tonic-gate  * which is called after error_init() but may change dynamically.
665*7c478bd9Sstevel@tonic-gate  */
666*7c478bd9Sstevel@tonic-gate void
667*7c478bd9Sstevel@tonic-gate error_init(void)
668*7c478bd9Sstevel@tonic-gate {
669*7c478bd9Sstevel@tonic-gate 	char tmp_name[MAXSYSNAME];
670*7c478bd9Sstevel@tonic-gate 	dnode_t node;
671*7c478bd9Sstevel@tonic-gate 	size_t size = cpu_aflt_size();
672*7c478bd9Sstevel@tonic-gate 
673*7c478bd9Sstevel@tonic-gate 	/*
674*7c478bd9Sstevel@tonic-gate 	 * Initialize the correctable and uncorrectable error queues.
675*7c478bd9Sstevel@tonic-gate 	 */
676*7c478bd9Sstevel@tonic-gate 	ue_queue = errorq_create("ue_queue", (errorq_func_t)ue_drain, NULL,
677*7c478bd9Sstevel@tonic-gate 	    MAX_ASYNC_FLTS * (max_ncpus + 1), size, PIL_2, ERRORQ_VITAL);
678*7c478bd9Sstevel@tonic-gate 
679*7c478bd9Sstevel@tonic-gate 	ce_queue = errorq_create("ce_queue", (errorq_func_t)ce_drain, NULL,
680*7c478bd9Sstevel@tonic-gate 	    MAX_CE_FLTS * (max_ncpus + 1), size, PIL_1, 0);
681*7c478bd9Sstevel@tonic-gate 
682*7c478bd9Sstevel@tonic-gate 	if (ue_queue == NULL || ce_queue == NULL)
683*7c478bd9Sstevel@tonic-gate 		panic("failed to create required system error queue");
684*7c478bd9Sstevel@tonic-gate 
685*7c478bd9Sstevel@tonic-gate 	/*
686*7c478bd9Sstevel@tonic-gate 	 * Initialize the busfunc list mutex.  This must be a PIL_15 spin lock
687*7c478bd9Sstevel@tonic-gate 	 * because we will need to acquire it from cpu_async_error().
688*7c478bd9Sstevel@tonic-gate 	 */
689*7c478bd9Sstevel@tonic-gate 	mutex_init(&bfd_lock, NULL, MUTEX_SPIN, (void *)PIL_15);
690*7c478bd9Sstevel@tonic-gate 
691*7c478bd9Sstevel@tonic-gate 	node = prom_rootnode();
692*7c478bd9Sstevel@tonic-gate 	if ((node == OBP_NONODE) || (node == OBP_BADNODE)) {
693*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_CONT, "error_init: node 0x%x\n", (uint_t)node);
694*7c478bd9Sstevel@tonic-gate 		return;
695*7c478bd9Sstevel@tonic-gate 	}
696*7c478bd9Sstevel@tonic-gate 
697*7c478bd9Sstevel@tonic-gate 	if (((size = prom_getproplen(node, "reset-reason")) != -1) &&
698*7c478bd9Sstevel@tonic-gate 	    (size <= MAXSYSNAME) &&
699*7c478bd9Sstevel@tonic-gate 	    (prom_getprop(node, "reset-reason", tmp_name) != -1)) {
700*7c478bd9Sstevel@tonic-gate 		if (reset_debug) {
701*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_CONT, "System booting after %s\n", tmp_name);
702*7c478bd9Sstevel@tonic-gate 		} else if (strncmp(tmp_name, "FATAL", 5) == 0) {
703*7c478bd9Sstevel@tonic-gate 			cmn_err(CE_CONT,
704*7c478bd9Sstevel@tonic-gate 			    "System booting after fatal error %s\n", tmp_name);
705*7c478bd9Sstevel@tonic-gate 		}
706*7c478bd9Sstevel@tonic-gate 	}
707*7c478bd9Sstevel@tonic-gate }
708