1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 1990-2002 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26/*
27 * Copyright 2019 Peter Tribble.
28 */
29
30#include <sys/types.h>
31#include <sys/conf.h>
32#include <sys/ddi.h>
33#include <sys/sunddi.h>
34#include <sys/ddi_impldefs.h>
35#include <sys/cmn_err.h>
36#include <sys/async.h>
37#include <sys/sysiosbus.h>
38#include <sys/sysioerr.h>
39#include <sys/x_call.h>
40#include <sys/machsystm.h>
41#include <sys/sysmacros.h>
42#include <sys/vmsystm.h>
43#include <sys/cpu_module.h>
44
45/*
46 * Set the following variable in /etc/system to tell the kernel
47 * not to shutdown the machine if the temperature reaches
48 * the Thermal Warning limit.
49 */
50int oven_test = 0;
51
52/*
53 * To indicate if the prom has the property of "thermal-interrupt".
54 */
55static int thermal_interrupt_enabled = 0;
56
57/*
58 * adb debug_sysio_errs to 1 if you don't want your system to panic on
59 * sbus ue errors. adb sysio_err_flag to 0 if you don't want your system
60 * to check for sysio errors at all.
61 */
62int sysio_err_flag = 1;
63uint_t debug_sysio_errs = 0;
64
65/*
66 * bto_cnt = number of bus errors and timeouts allowed within bto_secs
67 * use /etc/system to change the bto_cnt to a very large number if
68 * it's a problem!
69 */
70int bto_secs = 10;
71int bto_cnt = 10;
72
73static uint_t
74sysio_ue_intr(struct sbus_soft_state *softsp);
75
76static uint_t
77sysio_ce_intr(struct sbus_soft_state *softsp);
78
79static uint_t
80sbus_err_intr(struct sbus_soft_state *softsp);
81
82static void
83sysio_log_ce_err(struct async_flt *ecc, char *unum);
84
85static void
86sysio_log_ue_err(struct async_flt *ecc, char *unum);
87
88static void
89sbus_clear_intr(struct sbus_soft_state *softsp, uint64_t *pafsr);
90
91static void
92sbus_log_error(struct sbus_soft_state *softsp, uint64_t *pafsr, uint64_t *pafar,
93    ushort_t id, ushort_t inst, int cleared,
94    on_trap_data_t *ontrap_data);
95
96static int
97sbus_check_bto(struct sbus_soft_state *softsp);
98
99static void
100sbus_log_csr_error(struct async_flt *aflt, char *unum);
101
102static uint_t
103sbus_ctrl_ecc_err(struct sbus_soft_state *softsp);
104
105static uint_t
106sysio_dis_err(struct sbus_soft_state *softsp);
107
108static uint_t
109sysio_init_err(struct sbus_soft_state *softsp);
110
111static uint_t
112sysio_thermal_warn_intr(struct sbus_soft_state *softsp);
113
114static int sbus_pil[] = {SBUS_UE_PIL, SBUS_CE_PIL, SBUS_ERR_PIL, SBUS_PF_PIL,
115	SBUS_THERMAL_PIL, SBUS_PM_PIL};
116int
117sysio_err_init(struct sbus_soft_state *softsp, caddr_t address)
118{
119	if (sysio_err_flag == 0) {
120		cmn_err(CE_CONT, "Warning: sysio errors not initialized\n");
121		return (DDI_SUCCESS);
122	}
123
124	/*
125	 * Get the address of the already mapped-in sysio/sbus error registers.
126	 * Simply add each registers offset to the already mapped in address
127	 * that was retrieved from the device node's "address" property,
128	 * and passed as an argument to this function.
129	 *
130	 * Define a macro for the pointer arithmetic ...
131	 */
132
133#define	REG_ADDR(b, o)	(uint64_t *)((caddr_t)(b) + (o))
134
135	softsp->sysio_ecc_reg = REG_ADDR(address, OFF_SYSIO_ECC_REGS);
136	softsp->sysio_ue_reg = REG_ADDR(address, OFF_SYSIO_UE_REGS);
137	softsp->sysio_ce_reg = REG_ADDR(address, OFF_SYSIO_CE_REGS);
138	softsp->sbus_err_reg = REG_ADDR(address, OFF_SBUS_ERR_REGS);
139
140#undef	REG_ADDR
141
142	/*
143	 * create the interrupt-priorities property if it doesn't
144	 * already exist to provide a hint as to the PIL level for
145	 * our interrupt.
146	 */
147	{
148		int len;
149
150		if (ddi_getproplen(DDI_DEV_T_ANY, softsp->dip,
151		    DDI_PROP_DONTPASS, "interrupt-priorities",
152		    &len) != DDI_PROP_SUCCESS) {
153				/* Create the interrupt-priorities property. */
154			(void) ddi_prop_update_int_array(DDI_DEV_T_NONE,
155			    softsp->dip, "interrupt-priorities",
156			    (int *)sbus_pil, sizeof (sbus_pil) / sizeof (int));
157		}
158	}
159
160	(void) ddi_add_intr(softsp->dip, 0, NULL, NULL,
161	    (uint_t (*)())sysio_ue_intr, (caddr_t)softsp);
162	(void) ddi_add_intr(softsp->dip, 1, NULL, NULL,
163	    (uint_t (*)())sysio_ce_intr, (caddr_t)softsp);
164	(void) ddi_add_intr(softsp->dip, 2, NULL, NULL,
165	    (uint_t (*)())sbus_err_intr, (caddr_t)softsp);
166	/*
167	 * If the thermal-interrupt property is in place,
168	 * then register the thermal warning interrupt handler and
169	 * program its mapping register
170	 */
171	thermal_interrupt_enabled = ddi_getprop(DDI_DEV_T_ANY, softsp->dip,
172		DDI_PROP_DONTPASS, "thermal-interrupt", -1);
173
174	if (thermal_interrupt_enabled == 1) {
175		(void) ddi_add_intr(softsp->dip, 4, NULL, NULL,
176		    (uint_t (*)())sysio_thermal_warn_intr, (caddr_t)softsp);
177	}
178
179	bus_func_register(BF_TYPE_UE, (busfunc_t)sbus_ctrl_ecc_err, softsp);
180	bus_func_register(BF_TYPE_ERRDIS, (busfunc_t)sysio_dis_err, softsp);
181
182	(void) sysio_init_err(softsp);
183
184	return (DDI_SUCCESS);
185}
186
187int
188sysio_err_resume_init(struct sbus_soft_state *softsp)
189{
190	(void) sysio_init_err(softsp);
191	return (DDI_SUCCESS);
192}
193
194int
195sysio_err_uninit(struct sbus_soft_state *softsp)
196{
197	/* remove the interrupts from the interrupt list */
198	(void) sysio_dis_err(softsp);
199
200	ddi_remove_intr(softsp->dip, 0, NULL);
201	ddi_remove_intr(softsp->dip, 1, NULL);
202	ddi_remove_intr(softsp->dip, 2, NULL);
203
204	if (thermal_interrupt_enabled == 1) {
205		ddi_remove_intr(softsp->dip, 4, NULL);
206	}
207
208	bus_func_unregister(BF_TYPE_UE, (busfunc_t)sbus_ctrl_ecc_err, softsp);
209	bus_func_unregister(BF_TYPE_ERRDIS, (busfunc_t)sysio_dis_err, softsp);
210
211	return (DDI_SUCCESS);
212}
213
214static uint_t
215sysio_init_err(struct sbus_soft_state *softsp)
216{
217	volatile uint64_t tmp_mondo_vec, tmpreg;
218	volatile uint64_t *mondo_vec_reg;
219	uint_t cpu_id, acpu_id;
220
221	acpu_id = intr_dist_cpuid();
222	/*
223	 * Program the mondo vector accordingly.  This MUST be the
224	 * last thing we do.  Once we program the mondo, the device
225	 * may begin to interrupt. Store it in the hardware reg.
226	 */
227	mondo_vec_reg = (uint64_t *)(softsp->intr_mapping_reg + UE_ECC_MAPREG);
228	cpu_id = acpu_id;
229	tmp_mondo_vec = (cpu_id << INTERRUPT_CPU_FIELD) | INTERRUPT_VALID;
230	*mondo_vec_reg = tmp_mondo_vec;
231
232	mondo_vec_reg = (uint64_t *)(softsp->intr_mapping_reg + CE_ECC_MAPREG);
233	cpu_id = acpu_id;
234	tmp_mondo_vec = (cpu_id << INTERRUPT_CPU_FIELD) | INTERRUPT_VALID;
235	*mondo_vec_reg = tmp_mondo_vec;
236
237	mondo_vec_reg =
238	    (uint64_t *)(softsp->intr_mapping_reg + SBUS_ERR_MAPREG);
239	cpu_id = acpu_id;
240
241	tmp_mondo_vec = (cpu_id << INTERRUPT_CPU_FIELD) | INTERRUPT_VALID;
242	*mondo_vec_reg = tmp_mondo_vec;
243
244	if (thermal_interrupt_enabled == 1) {
245		mondo_vec_reg = (softsp->intr_mapping_reg + THERMAL_MAPREG);
246		cpu_id = acpu_id;
247		tmp_mondo_vec = (cpu_id << INTERRUPT_CPU_FIELD) |
248			INTERRUPT_VALID;
249		*mondo_vec_reg = tmp_mondo_vec;
250	}
251
252	/* Flush store buffers */
253	tmpreg = *softsp->sbus_ctrl_reg;
254
255	/*
256	 * XXX - This may already be set by the OBP.
257	 */
258	tmpreg = SYSIO_APCKEN;
259	*softsp->sysio_ctrl_reg |= tmpreg;
260	tmpreg = (SECR_ECC_EN | SECR_UE_INTEN | SECR_CE_INTEN);
261	*softsp->sysio_ecc_reg = tmpreg;
262	tmpreg = SB_CSR_ERRINT_EN;
263	*softsp->sbus_err_reg |= tmpreg;
264
265	/* Initialize timeout/bus error counter */
266	softsp->bto_timestamp = 0;
267	softsp->bto_ctr = 0;
268
269	return (0);
270}
271
272static uint_t
273sysio_dis_err(struct sbus_soft_state *softsp)
274{
275	volatile uint64_t tmpreg;
276	volatile uint64_t *mondo_vec_reg, *clear_vec_reg;
277
278	*softsp->sysio_ctrl_reg &= ~SYSIO_APCKEN;
279	*softsp->sysio_ecc_reg = 0;
280	*softsp->sbus_err_reg &= ~SB_CSR_ERRINT_EN;
281
282	/* Flush store buffers */
283	tmpreg = *softsp->sbus_ctrl_reg;
284#ifdef lint
285	tmpreg = tmpreg;
286#endif
287
288	/* Unmap mapping registers */
289	mondo_vec_reg = (softsp->intr_mapping_reg + UE_ECC_MAPREG);
290	clear_vec_reg = (softsp->clr_intr_reg + UE_ECC_CLEAR);
291
292	*mondo_vec_reg = 0;
293
294	*clear_vec_reg = 0;
295
296	mondo_vec_reg = (softsp->intr_mapping_reg + CE_ECC_MAPREG);
297	clear_vec_reg = (softsp->clr_intr_reg + CE_ECC_CLEAR);
298
299	*mondo_vec_reg = 0;
300
301	*clear_vec_reg = 0;
302
303	mondo_vec_reg = (softsp->intr_mapping_reg + SBUS_ERR_MAPREG);
304	clear_vec_reg = (softsp->clr_intr_reg + SBUS_ERR_CLEAR);
305
306	*mondo_vec_reg = 0;
307
308	*clear_vec_reg = 0;
309
310	/* Flush store buffers */
311	tmpreg = *softsp->sbus_ctrl_reg;
312
313	return (BF_NONE);
314}
315
316/*
317 * Gather information about the error into an async_flt structure, and then
318 * enqueue the error for reporting and processing and panic.
319 */
320static uint_t
321sysio_ue_intr(struct sbus_soft_state *softsp)
322{
323	volatile uint64_t t_afsr;
324	volatile uint64_t t_afar;
325	volatile uint64_t *ue_reg, *afar_reg, *clear_reg;
326	struct async_flt ecc;
327	uint64_t offset;
328
329	/*
330	 * Disable all further sbus errors, for this sbus instance, for
331	 * what is guaranteed to be a fatal error. And grab any other cpus.
332	 */
333	(void) sysio_dis_err(softsp);		/* disabled sysio errors */
334
335	/*
336	 * Then read and clear the afsr/afar and clear interrupt regs.
337	 */
338	ue_reg = (uint64_t *)softsp->sysio_ue_reg;
339	t_afsr = *ue_reg;
340	afar_reg = (uint64_t *)ue_reg + 1;
341	t_afar = *afar_reg;
342	*ue_reg = t_afsr;
343
344	clear_reg = (softsp->clr_intr_reg + UE_ECC_CLEAR);
345	*clear_reg = 0;
346
347	/*
348	 * The AFSR DW_OFFSET field contains the offset of the doubleword with
349	 * the ECC error relative to the 64-byte aligned PA.  We multiply by 8
350	 * to convert to a byte offset, and then add this to flt_addr.
351	 */
352	offset = ((t_afsr & SB_UE_AFSR_OFF) >> SB_UE_DW_SHIFT) * 8;
353
354	bzero(&ecc, sizeof (ecc));
355	ecc.flt_id = gethrtime();
356	ecc.flt_stat = t_afsr;
357	ecc.flt_addr = P2ALIGN(t_afar, 64) + offset;
358	ecc.flt_func = sysio_log_ue_err;
359	ecc.flt_bus_id = softsp->upa_id;
360	ecc.flt_inst = ddi_get_instance(softsp->dip);
361	ecc.flt_status = ECC_IOBUS;
362	ecc.flt_in_memory = (pf_is_memory(t_afar >> MMU_PAGESHIFT)) ? 1: 0;
363	ecc.flt_class = BUS_FAULT;
364	ecc.flt_panic = (debug_sysio_errs == 0);
365
366	errorq_dispatch(ue_queue, &ecc, sizeof (ecc), ecc.flt_panic);
367
368	/*
369	 * If the UE is in memory and fatal, save the fault info so the
370	 * panic code will know to check for copyback errors.
371	 */
372	if (ecc.flt_panic && ecc.flt_in_memory)
373		panic_aflt = ecc;
374
375	/*
376	 * We must also check for other bus UE errors, and panic if
377	 * any fatal ones are detected at this point.
378	 */
379	if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
380		ecc.flt_panic = 1;
381
382	if (ecc.flt_panic)
383		cmn_err(CE_PANIC, "Fatal Sbus%d UE Error", ecc.flt_inst);
384
385	return (DDI_INTR_CLAIMED);
386}
387
388/*
389 * callback logging function from the common error handling code
390 */
391static void
392sysio_log_ue_err(struct async_flt *ecc, char *unum)
393{
394	uint64_t t_afsr = ecc->flt_stat;
395	uint64_t t_afar = ecc->flt_addr;
396
397	ushort_t id = ecc->flt_bus_id;
398	ushort_t inst = ecc->flt_inst;
399
400	if (t_afsr & SB_UE_AFSR_P_PIO) {
401		cmn_err(CE_WARN, "SBus%d UE Primary Error from PIO: "
402			"AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d",
403			inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
404			(uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
405	}
406	if (t_afsr & SB_UE_AFSR_P_DRD) {
407		cmn_err(CE_WARN, "SBus%d UE Primary Error DMA read: "
408			"AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s Id %d",
409			inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
410			(uint32_t)(t_afar>>32), (uint32_t)t_afar, unum, id);
411	}
412	if (t_afsr & SB_UE_AFSR_P_DWR) {
413		cmn_err(CE_WARN, "SBus%d UE Primary Error DVMA write: "
414			"AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s Id %d",
415			inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
416			(uint32_t)(t_afar>>32), (uint32_t)t_afar, unum, id);
417	}
418	/*
419	 * We should never hit the secondary error panics.
420	 */
421	if (t_afsr & SB_UE_AFSR_S_PIO) {
422		cmn_err(CE_WARN, "SBus%d UE Secondary Error from PIO: "
423			"AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d",
424			inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
425			(uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
426	}
427	if (t_afsr & SB_UE_AFSR_S_DRD) {
428		cmn_err(CE_WARN, "SBus%d UE Secondary Error DMA read: "
429			"AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s Id %d",
430			inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
431			(uint32_t)(t_afar>>32), (uint32_t)t_afar, unum, id);
432	}
433	if (t_afsr & SB_UE_AFSR_S_DWR) {
434		cmn_err(CE_WARN, "SBus%d UE Secondary  Error DMA write: "
435			"AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s Id %d",
436			inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
437			(uint32_t)(t_afar>>32), (uint32_t)t_afar, unum, id);
438	}
439
440	if ((debug_sysio_errs) || (aft_verbose)) {
441		(void) read_ecc_data(ecc, 1, 0);
442		cmn_err(CE_CONT, "\tOffset 0x%x, Size %d, UPA MID 0x%x\n",
443		    (uint32_t)((t_afsr & SB_UE_AFSR_OFF) >> SB_UE_DW_SHIFT),
444		    (uint32_t)((t_afsr & SB_UE_AFSR_SIZE) >> SB_UE_SIZE_SHIFT),
445		    (uint32_t)((t_afsr & SB_UE_AFSR_MID) >> SB_UE_MID_SHIFT));
446	}
447}
448
449/*
450 * gather the information about the error, plus a pointer to
451 * the callback logging function, and call the generic ce_error handler.
452 */
453static uint_t
454sysio_ce_intr(struct sbus_soft_state *softsp)
455{
456	volatile uint64_t t_afsr;
457	volatile uint64_t t_afar;
458	volatile uint64_t *afar_reg, *clear_reg, *ce_reg;
459	struct async_flt ecc;
460	uint64_t offset;
461
462	ce_reg = (uint64_t *)softsp->sysio_ce_reg;
463	t_afsr = *ce_reg;
464	afar_reg = (uint64_t *)ce_reg + 1;
465	t_afar = *afar_reg;
466	*ce_reg = t_afsr;
467
468	clear_reg = (softsp->clr_intr_reg + CE_ECC_CLEAR);
469	*clear_reg = 0;
470
471	/*
472	 * The AFSR DW_OFFSET field contains the offset of the doubleword with
473	 * the ECC error relative to the 64-byte aligned PA.  We multiply by 8
474	 * to convert to a byte offset, and then add this to flt_addr.
475	 */
476	offset = ((t_afsr & SB_UE_AFSR_OFF) >> SB_UE_DW_SHIFT) * 8;
477
478	bzero(&ecc, sizeof (ecc));
479	ecc.flt_id = gethrtime();
480	ecc.flt_stat = t_afsr;
481	ecc.flt_addr = P2ALIGN(t_afar, 64) + offset;
482	ecc.flt_func = sysio_log_ce_err;
483	ecc.flt_bus_id = softsp->upa_id;
484	ecc.flt_inst = ddi_get_instance(softsp->dip);
485	ecc.flt_status = ECC_IOBUS;
486
487	ecc.flt_synd = (ushort_t)((t_afsr & SB_CE_AFSR_SYND) >>
488	    SB_CE_SYND_SHIFT);
489
490	ecc.flt_in_memory = (pf_is_memory(t_afar >> MMU_PAGESHIFT)) ? 1: 0;
491	ecc.flt_class = BUS_FAULT;
492
493	ce_scrub(&ecc);
494	errorq_dispatch(ce_queue, &ecc, sizeof (ecc), ERRORQ_ASYNC);
495
496	return (DDI_INTR_CLAIMED);
497}
498
499/*
500 * callback logging function from the common error handling code
501 */
502static void
503sysio_log_ce_err(struct async_flt *ecc, char *unum)
504{
505	uint64_t t_afsr = ecc->flt_stat;
506	uint64_t t_afar = ecc->flt_addr;
507	ushort_t id = ecc->flt_bus_id;
508	ushort_t inst = ecc->flt_inst;
509	int ce_verbose = ce_verbose_memory;
510	char *syndrome_str = "!\tSyndrome 0x%x, Offset 0x%x, Size %d, "
511	    "UPA MID 0x%x\n";
512
513	if ((!ce_verbose_memory) && (!debug_sysio_errs))
514		return;
515
516	if (t_afsr & SB_CE_AFSR_P_PIO) {
517		char *fmtstr = "!SBus%d CE Primary Error from PIO: "
518		    "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d\n";
519
520		if ((debug_sysio_errs) || (ce_verbose > 1))
521			fmtstr++;
522
523		cmn_err(CE_CONT, fmtstr, inst, (uint32_t)(t_afsr>>32),
524		    (uint32_t)t_afsr, (uint32_t)(t_afar>>32),
525		    (uint32_t)t_afar, id);
526	}
527	if (t_afsr & SB_CE_AFSR_P_DRD) {
528		char *fmtstr = "!SBus%d CE Primary Error DMA read: "
529		    "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s "
530		    "Id %d\n";
531
532		if ((debug_sysio_errs) || (ce_verbose > 1))
533			fmtstr++;
534
535		cmn_err(CE_CONT, fmtstr, inst, (uint32_t)(t_afsr>>32),
536		    (uint32_t)t_afsr, (uint32_t)(t_afar>>32), (uint32_t)t_afar,
537		    unum, id);
538	}
539	if (t_afsr & SB_CE_AFSR_P_DWR) {
540		char *fmtstr = "!SBus%d CE Primary Error DMA write: "
541		    "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s Id %d\n";
542
543		if ((debug_sysio_errs) || (ce_verbose > 1))
544			fmtstr++;
545
546		cmn_err(CE_CONT, fmtstr, inst, (uint32_t)(t_afsr>>32),
547		    (uint32_t)t_afsr, (uint32_t)(t_afar>>32), (uint32_t)t_afar,
548		    unum, id);
549	}
550
551	if (t_afsr & SB_CE_AFSR_S_PIO) {
552		char *fmtstr = "!SBus%d CE Secondary Error from PIO: "
553		    "AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d\n";
554
555		if ((debug_sysio_errs) || (ce_verbose > 1))
556			fmtstr++;
557
558		cmn_err(CE_CONT, fmtstr, inst, (uint32_t)(t_afsr>>32),
559		    (uint32_t)t_afsr, (uint32_t)(t_afar>>32), (uint32_t)t_afar,
560		    id);
561	}
562	if (t_afsr & SB_CE_AFSR_S_DRD) {
563		char *fmtstr = "!SBus%d CE Secondary Error DMA read: "
564		    "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s "
565		    "Id %d\n";
566
567		if ((debug_sysio_errs) || (ce_verbose > 1))
568			fmtstr++;
569
570		cmn_err(CE_CONT, fmtstr, inst, (uint32_t)(t_afsr>>32),
571		    (uint32_t)t_afsr, (uint32_t)(t_afar>>32), (uint32_t)t_afar,
572		    unum, id);
573	}
574	if (t_afsr & SB_CE_AFSR_S_DWR) {
575		char *fmtstr = "!SBus%d CE Secondary Error DMA write: "
576		    "AFSR 0x%08x.%08x AFAR 0x%08x.%08x MemMod %s "
577		    "Id %d\n";
578
579		if ((debug_sysio_errs) || (ce_verbose > 1))
580			fmtstr++;
581
582		cmn_err(CE_CONT, fmtstr,
583		    inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
584		    (uint32_t)(t_afar>>32), (uint32_t)t_afar, unum, id);
585	}
586
587	if ((debug_sysio_errs) || (ce_verbose > 1))
588		syndrome_str++;
589
590	cmn_err(CE_CONT, syndrome_str,
591	    (uint32_t)((t_afsr & SB_CE_AFSR_SYND) >> SB_CE_SYND_SHIFT),
592	    (uint32_t)((t_afsr & SB_CE_AFSR_OFF) >> SB_CE_OFFSET_SHIFT),
593	    (uint32_t)((t_afsr & SB_CE_AFSR_SIZE) >> SB_CE_SIZE_SHIFT),
594	    (uint32_t)((t_afsr & SB_CE_AFSR_MID) >> SB_CE_MID_SHIFT));
595}
596
597static uint_t
598sbus_err_intr(struct sbus_soft_state *softsp)
599{
600	volatile uint64_t t_afsr;
601	volatile uint64_t t_afar;
602	ushort_t id, inst;
603	int cleared = 0;
604	volatile uint64_t *afar_reg;
605	on_trap_data_t *otp = softsp->ontrap_data;
606
607	t_afsr = *softsp->sbus_err_reg;
608	afar_reg = (uint64_t *)softsp->sbus_err_reg + 1;
609	t_afar = *afar_reg;
610
611	if (otp == NULL || !(otp->ot_prot & OT_DATA_ACCESS)) {
612		sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
613		cleared = 1;
614	}
615
616	id = (ushort_t)softsp->upa_id;
617	inst = (ushort_t)ddi_get_instance(softsp->dip);
618
619	if (debug_sysio_errs) {
620		if (otp != NULL && (otp->ot_prot & OT_DATA_ACCESS))
621			otp->ot_trap |= OT_DATA_ACCESS;
622		if (!cleared)
623			sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
624
625		cmn_err(CE_CONT, "SBus%d Error: AFSR 0x%08x.%08x "
626			"AFAR 0x%08x.%08x Id %d\n",
627			inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
628			(uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
629
630		debug_enter("sbus_err_intr");
631	} else {
632		sbus_log_error(softsp, (uint64_t *)&t_afsr,
633		    (uint64_t *)&t_afar, id, inst, cleared, otp);
634	}
635	if (!cleared) {
636		sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
637	}
638
639	return (DDI_INTR_CLAIMED);
640}
641
642static void
643sbus_clear_intr(struct sbus_soft_state *softsp, uint64_t *pafsr)
644{
645	volatile uint64_t *clear_reg;
646
647	*softsp->sbus_err_reg = *pafsr;
648	clear_reg = (softsp->clr_intr_reg + SBUS_ERR_CLEAR);
649	*clear_reg = 0;
650}
651
652static void
653sbus_log_error(struct sbus_soft_state *softsp, uint64_t *pafsr, uint64_t *pafar,
654    ushort_t id, ushort_t inst, int cleared, on_trap_data_t *otp)
655{
656	uint64_t t_afsr;
657	uint64_t t_afar;
658	int level = CE_WARN;
659
660	t_afsr = *pafsr;
661	t_afar = *pafar;
662	if (t_afsr & SB_AFSR_P_LE) {
663		if (!cleared)
664			sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
665		cmn_err(CE_PANIC, "SBus%d Primary Error Late PIO: "
666			"AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d",
667			inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
668			(uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
669	}
670	if (t_afsr & SB_AFSR_P_TO) {
671		if (otp != NULL && (otp->ot_prot & OT_DATA_ACCESS)) {
672			otp->ot_trap |= OT_DATA_ACCESS;
673			return;
674		}
675		if (sbus_check_bto(softsp)) {
676			if (!cleared)
677				sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
678			level = CE_PANIC;
679		}
680		cmn_err(level, "SBus%d Primary Error Timeout: "
681			"AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d",
682			inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
683			(uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
684	}
685	if (t_afsr & SB_AFSR_P_BERR) {
686		if (otp != NULL && (otp->ot_prot & OT_DATA_ACCESS)) {
687			otp->ot_trap |= OT_DATA_ACCESS;
688			return;
689		}
690		if (sbus_check_bto(softsp)) {
691			if (!cleared)
692				sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
693			level = CE_PANIC;
694		}
695		cmn_err(level, "SBus%d Primary Error Bus Error: "
696			"AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d\n",
697			inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
698			(uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
699	}
700
701	if (t_afsr & SB_AFSR_S_LE) {
702		if (!cleared)
703			sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
704		cmn_err(CE_PANIC, "SBus%d Secondary Late PIO Error: "
705			"AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d",
706			inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
707			(uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
708	}
709	if (t_afsr & SB_AFSR_S_TO) {
710		if (sbus_check_bto(softsp)) {
711			if (!cleared)
712				sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
713			level = CE_PANIC;
714		}
715		cmn_err(level, "SBus%d Secondary Timeout Error: "
716			"AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d",
717			inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
718			(uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
719	}
720	if (t_afsr & SB_AFSR_S_BERR) {
721		if (sbus_check_bto(softsp)) {
722			if (!cleared)
723				sbus_clear_intr(softsp, (uint64_t *)&t_afsr);
724			level = CE_PANIC;
725		}
726		cmn_err(level, "SBus%d Secondary Bus Error: "
727			"AFSR 0x%08x.%08x AFAR 0x%08x.%08x Id %d",
728			inst, (uint32_t)(t_afsr>>32), (uint32_t)t_afsr,
729			(uint32_t)(t_afar>>32), (uint32_t)t_afar, id);
730	}
731}
732
733
734static int
735sbus_check_bto(struct sbus_soft_state *softsp)
736{
737	hrtime_t now = gethrtime();		/* high PIL safe */
738	hrtime_t diff = now - softsp->bto_timestamp;
739
740	if (diff > ((hrtime_t)bto_secs * NANOSEC) || diff < 0LL) {
741		/*
742		 * Reset error counter as this bus error has occurred
743		 * after more than bto_secs duration.
744		 */
745		softsp->bto_timestamp = now;
746		softsp->bto_ctr = 0;
747	}
748	if (softsp->bto_ctr++ >= bto_cnt)
749		return (1);
750	return (0);
751}
752
753static uint_t
754sbus_ctrl_ecc_err(struct sbus_soft_state *softsp)
755{
756	uint64_t t_sb_csr;
757	ushort_t id, inst;
758
759	t_sb_csr = *softsp->sbus_ctrl_reg;
760	id = (ushort_t)softsp->upa_id;
761	inst = (ushort_t)ddi_get_instance(softsp->dip);
762
763	if (debug_sysio_errs) {
764		cmn_err(CE_CONT, "sbus_ctrl_ecc_error: SBus%d Control Reg "
765		    "0x%016llx Id %d\n", inst, (u_longlong_t)t_sb_csr, id);
766	}
767
768	if (t_sb_csr & (SB_CSR_DPERR_S14|SB_CSR_DPERR_S13|SB_CSR_DPERR_S3|
769	    SB_CSR_DPERR_S2|SB_CSR_DPERR_S1|SB_CSR_DPERR_S0|SB_CSR_PIO_PERRS)) {
770		struct async_flt aflt;
771
772		*softsp->sbus_ctrl_reg = t_sb_csr; /* clear error bits */
773
774		bzero(&aflt, sizeof (aflt));
775		aflt.flt_id = gethrtime();
776		aflt.flt_stat = t_sb_csr;
777		aflt.flt_func = sbus_log_csr_error;
778		aflt.flt_bus_id = id;
779		aflt.flt_inst = inst;
780		aflt.flt_status = ECC_IOBUS;
781		aflt.flt_class = BUS_FAULT;
782		aflt.flt_panic = 1;
783
784		errorq_dispatch(ue_queue, &aflt, sizeof (aflt), aflt.flt_panic);
785		return (BF_FATAL);
786	}
787
788	return (BF_NONE);
789}
790
791/*ARGSUSED*/
792static void
793sbus_log_csr_error(struct async_flt *aflt, char *unum)
794{
795	uint64_t t_sb_csr = aflt->flt_stat;
796	uint_t id = aflt->flt_bus_id;
797	uint_t inst = aflt->flt_inst;
798
799	/*
800	 * Print out SBus error information.
801	 */
802	if (t_sb_csr & SB_CSR_DPERR_S14) {
803		cmn_err(CE_WARN,
804		"SBus%d Slot 14 DVMA Parity Error: AFSR 0x%08x.%08x Id %d",
805			inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
806	}
807	if (t_sb_csr & SB_CSR_DPERR_S13) {
808		cmn_err(CE_WARN,
809		"SBus%d Slot 13 DVMA Parity Error: AFSR 0x%08x.%08x Id %d",
810			inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
811	}
812	if (t_sb_csr & SB_CSR_DPERR_S3) {
813		cmn_err(CE_WARN,
814		"SBus%d Slot 3 DVMA Parity Error: AFSR 0x%08x.%08x Id %d",
815			inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
816	}
817	if (t_sb_csr & SB_CSR_DPERR_S2) {
818		cmn_err(CE_WARN,
819		"SBus%d Slot 2 DVMA Parity Error: AFSR 0x%08x.%08x Id %d",
820			inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
821	}
822	if (t_sb_csr & SB_CSR_DPERR_S1) {
823		cmn_err(CE_WARN,
824		"SBus%d Slot 1 DVMA Parity Error: AFSR 0x%08x.%08x Id %d",
825			inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
826	}
827	if (t_sb_csr & SB_CSR_DPERR_S0) {
828		cmn_err(CE_WARN,
829		"SBus%d Slot 0 DVMA Parity Error: AFSR 0x%08x.%08x Id %d",
830			inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
831	}
832	if (t_sb_csr & SB_CSR_PPERR_S15) {
833		cmn_err(CE_WARN,
834		"SBus%d Slot 15 PIO Parity Error: AFSR 0x%08x.%08x Id %d",
835			inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
836	}
837	if (t_sb_csr & SB_CSR_PPERR_S14) {
838		cmn_err(CE_WARN,
839		"SBus%d Slot 14 PIO Parity Error: AFSR 0x%08x.%08x Id %d",
840			inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
841	}
842	if (t_sb_csr & SB_CSR_PPERR_S13) {
843		cmn_err(CE_WARN,
844		"SBus%d Slot 13 PIO Parity Error: AFSR 0x%08x.%08x Id %d",
845			inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
846	}
847	if (t_sb_csr & SB_CSR_PPERR_S3) {
848		cmn_err(CE_WARN,
849		"SBus%d Slot 3 PIO Parity Error: AFSR 0x%08x.%08x Id %d",
850			inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
851	}
852	if (t_sb_csr & SB_CSR_PPERR_S2) {
853		cmn_err(CE_WARN,
854		"SBus%d Slot 2 PIO Parity Error: AFSR 0x%08x.%08x Id %d",
855			inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
856	}
857	if (t_sb_csr & SB_CSR_PPERR_S1) {
858		cmn_err(CE_WARN,
859		"SBus%d Slot 1 PIO Parity Error: AFSR 0x%08x.%08x Id %d",
860			inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
861	}
862	if (t_sb_csr & SB_CSR_PPERR_S0) {
863		cmn_err(CE_WARN,
864		"SBus%d Slot 0 PIO Parity Error: AFSR 0x%08x.%08x Id %d",
865			inst, (uint32_t)(t_sb_csr>>32), (uint32_t)t_sb_csr, id);
866	}
867}
868
869/*
870 * Sysio Thermal Warning interrupt handler
871 */
872static uint_t
873sysio_thermal_warn_intr(struct sbus_soft_state *softsp)
874{
875	volatile uint64_t *clear_reg;
876	volatile uint64_t tmp_mondo_vec;
877	volatile uint64_t *mondo_vec_reg;
878	const char thermal_warn_msg[] =
879	    "Severe over-temperature condition detected!";
880
881	/*
882	 * Take off the Thermal Warning interrupt and
883	 * remove its interrupt handler.
884	 */
885	mondo_vec_reg = (softsp->intr_mapping_reg + THERMAL_MAPREG);
886	tmp_mondo_vec = *mondo_vec_reg;
887	tmp_mondo_vec &= ~INTERRUPT_VALID;
888	*mondo_vec_reg = tmp_mondo_vec;
889
890	ddi_remove_intr(softsp->dip, 4, NULL);
891
892	clear_reg = (softsp->clr_intr_reg + THERMAL_CLEAR);
893	*clear_reg = 0;
894
895	if (oven_test) {
896		cmn_err(CE_NOTE, "OVEN TEST: %s", thermal_warn_msg);
897		return (DDI_INTR_CLAIMED);
898	}
899
900	cmn_err(CE_WARN, "%s", thermal_warn_msg);
901	cmn_err(CE_WARN, "Powering down...");
902
903	do_shutdown();
904
905	/*
906	 * just in case do_shutdown() fails
907	 */
908	(void) timeout((void(*)(void *))power_down, NULL,
909	    thermal_powerdown_delay * hz);
910
911	return (DDI_INTR_CLAIMED);
912}
913