xref: /illumos-gate/usr/src/uts/sun4/sys/async.h (revision d00f0155)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_SYS_ASYNC_H
28 #define	_SYS_ASYNC_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/privregs.h>
33 
34 #ifdef	__cplusplus
35 extern "C" {
36 #endif
37 
38 #ifndef	_ASM
39 
40 #include <sys/errorq.h>
41 
42 /*
43  * The async_flt structure is used to record all pertinent information about
44  * an asynchronous CPU or bus-related memory error.  Typically, the structure
45  * is initialized by a high-level interrupt or trap handler, and then enqueued
46  * for later processing.  Separate queues are maintained for correctable and
47  * uncorrectable errors.  The current CPU module determines the size of the
48  * queue elements, so that it may declare a CPU-specific fault structure
49  * which contains a struct async_flt as its first member.  Each async_flt also
50  * contains a callback function (flt_func) that is invoked by the processing
51  * code in order to actually log messages when the event is dequeued.  This
52  * function may be called from a softint, from trap() as part of AST handling
53  * before the victim thread returns to userland, or as part of panic().  As
54  * such, the flt_func should basically only be calling cmn_err (but NOT with
55  * the CE_PANIC flag).  It must not call panic(), acquire locks, or block.
56  * The owner of the event is responsible for determining whether the event is
57  * fatal; if so, the owner should set flt_panic and panic() after enqueuing
58  * the event.  The event will then be dequeued and logged as part of panic
59  * processing.  If flt_panic is not set, the queue function will schedule a
60  * soft interrupt to process the event.
61  */
62 
63 struct async_flt;
64 typedef void (*async_func_t)(struct async_flt *, char *);
65 
66 struct async_flt {
67 	uint64_t	flt_id;		/* gethrtime() at time of fault */
68 	uint64_t	flt_stat;	/* async fault status register */
69 	uint64_t	flt_addr;	/* async fault address register */
70 	caddr_t		flt_pc;		/* program counter from error trap */
71 	async_func_t	flt_func;	/* logging function */
72 	uint_t		flt_bus_id;	/* hardware bus id# of cpu/sbus/pci */
73 	uint_t		flt_inst;	/* software instance of cpu/sbus/pci */
74 	ushort_t	flt_status;	/* error information */
75 	ushort_t	flt_synd;	/* ECC syndrome */
76 	uchar_t		flt_in_memory;	/* fault occurred in memory if != 0 */
77 	uchar_t		flt_class;	/* fault class (cpu or bus) */
78 	uchar_t		flt_prot;	/* type of fault protection (if any) */
79 	uchar_t		flt_priv;	/* fault occurred in kernel if != 0 */
80 	uchar_t		flt_panic;	/* fault caused owner to panic() */
81 	uchar_t		flt_tl;		/* fault occurred at TL > 0 */
82 	uchar_t		flt_core;	/* fault occurred during core() dump */
83 	uchar_t		flt_pad;	/* reserved for future use */
84 	uint64_t	flt_disp;	/* error disposition information */
85 	uint64_t	flt_payload;	/* ereport payload information */
86 	char		*flt_erpt_class; /* ereport class string */
87 };
88 
89 /*
90  * Bus nexus drivers can use the bus_func_register() interface to register
91  * callback functions for error handling and panic handling.  The handler
92  * functions should be registered and unregistered from driver attach and
93  * detach context, where it is safe to perform a sleeping allocation.  The
94  * callbacks themselves can be invoked from panic, or from the CPU module's
95  * asynchronous trap handler at high PIL.  As such, these routines may only
96  * test for errors and enqueue async_flt events.  They may not grab adaptive
97  * locks, call panic(), or invoke bus_func_register() or bus_func_unregister().
98  * Each callback function should return one of the BF_* return status values
99  * below.  The bus_func_invoke() function calls all the registered handlers of
100  * the specified type, and returns the maximum of their return values (e.g.
101  * BF_FATAL if any callback returned BF_FATAL).  If any callback returns
102  * BF_FATAL, the system will panic at the end of callback processing.
103  */
104 
105 typedef	uint_t (*busfunc_t)(void *);
106 
107 #define	BF_TYPE_UE		1	/* check for uncorrectable errors */
108 #define	BF_TYPE_ERRDIS		2	/* disable error detection */
109 #define	BF_TYPE_RESINTR		3	/* reset interrupts */
110 
111 #define	BF_NONE			0	/* no errors were detected */
112 #define	BF_NONFATAL		1	/* one or more non-fatal errors found */
113 #define	BF_FATAL		2	/* one or more fatal errors found */
114 
115 typedef struct bus_func_desc {
116 	int bf_type;			/* type of function (see above) */
117 	busfunc_t bf_func;		/* function to call */
118 	void *bf_arg;			/* function argument */
119 	struct bus_func_desc *bf_next;	/* pointer to next registered desc */
120 } bus_func_desc_t;
121 
122 extern void bus_func_register(int, busfunc_t, void *);
123 extern void bus_func_unregister(int, busfunc_t, void *);
124 extern void bus_async_log_err(struct async_flt *);
125 extern uint_t bus_func_invoke(int);
126 
127 extern void ecc_cpu_call(struct async_flt *, char *, int);
128 
129 extern void ce_scrub(struct async_flt *);
130 extern void ecc_page_zero(void *);
131 
132 extern void error_init(void);
133 
134 extern	int	ce_verbose_memory;
135 extern	int	ce_verbose_other;
136 extern	int	ce_show_data;
137 extern	int	ce_debug;
138 extern	int	ue_debug;
139 
140 extern	int	aft_verbose;
141 extern	int	aft_panic;
142 extern	int	aft_testfatal;
143 
144 extern struct async_flt panic_aflt;
145 
146 extern errorq_t *ce_queue;
147 extern errorq_t *ue_queue;
148 
149 #endif	/* !_ASM */
150 
151 /*
152  * ECC or parity error status for async_flt.flt_status.
153  */
154 #define	ECC_C_TRAP		0x0001	/* Trap 0x63 Corrected ECC Error */
155 #define	ECC_I_TRAP		0x0002	/* Trap 0x0A Instr Access Error */
156 #define	ECC_ECACHE		0x0004	/* Ecache ECC Error */
157 #define	ECC_IOBUS		0x0008	/* Pci or sysio ECC Error */
158 #define	ECC_INTERMITTENT	0x0010	/* Intermittent ECC Error */
159 #define	ECC_PERSISTENT		0x0020	/* Persistent ECC Error */
160 #define	ECC_STICKY		0x0040	/* Sticky ECC Error */
161 #define	ECC_D_TRAP		0x0080	/* Trap 0x32 Data Access Error */
162 #define	ECC_F_TRAP		0x0100	/* Cheetah Trap 0x70 Fast ECC Error */
163 #define	ECC_DP_TRAP		0x0200	/* Cheetah+ Trap 0x71 D$ Parity Error */
164 #define	ECC_IP_TRAP		0x0400	/* Cheetah+ Trap 0x72 I$ Parity Error */
165 #define	ECC_ITLB_TRAP		0x0800	/* Panther ITLB Parity Error */
166 #define	ECC_DTLB_TRAP		0x1000	/* Panther DTLB Parity Error */
167 #define	ECC_IO_CE		0x2000	/* Pci or sysio CE */
168 #define	ECC_IO_UE		0x4000	/* Pci or sysio UE */
169 
170 /*
171  * Trap type numbers corresponding to the fault types defined above.
172  */
173 #define	TRAP_TYPE_ECC_I		0x0A
174 #define	TRAP_TYPE_ECC_D		0x32
175 #define	TRAP_TYPE_ECC_F		0x70
176 #define	TRAP_TYPE_ECC_C		0x63
177 #define	TRAP_TYPE_ECC_DP	0x71
178 #define	TRAP_TYPE_ECC_IP	0x72
179 #define	TRAP_TYPE_ECC_ITLB	0x08
180 #define	TRAP_TYPE_ECC_DTLB	0x30
181 #define	TRAP_TYPE_UNKNOWN	0
182 
183 /*
184  * Fault classes for async_flt.flt_class.
185  */
186 #define	BUS_FAULT		0	/* originating from bus drivers */
187 #define	CPU_FAULT		1	/* originating from CPUs */
188 #define	RECIRC_BUS_FAULT	2	/* scheduled diagnostic */
189 #define	RECIRC_CPU_FAULT	3	/* scheduled diagnostic */
190 
191 /*
192  * Invalid or unknown physical address for async_flt.flt_addr.
193  */
194 #define	AFLT_INV_ADDR	(-1ULL)
195 
196 /*
197  * Fault protection values for async_flt.flt_prot.  The async error handling
198  * code may be able to recover from errors when kernel code has explicitly
199  * protected itself using one of the mechanisms specified here.
200  */
201 #define	AFLT_PROT_NONE		0	/* no protection active */
202 #define	AFLT_PROT_ACCESS	1	/* on_trap OT_DATA_ACCESS protection */
203 #define	AFLT_PROT_EC		2	/* on_trap OT_DATA_EC protection */
204 #define	AFLT_PROT_COPY		3	/* t_lofault protection (ucopy, etc.) */
205 
206 /*
207  * These flags are used to indicate the validity of certain data based on
208  * the various overwrite priority features of the AFSR/AFAR:
209  * AFAR, ESYND and MSYND, each of which have different overwrite priorities.
210  *
211  * Given a specific afsr error bit and the entire afsr, there are three cases:
212  *   INVALID:	The specified bit is lower overwrite priority than some other
213  *		error bit which is on in the afsr (or IVU/IVC).
214  *   VALID:	The specified bit is higher priority than all other error bits
215  *		which are on in the afsr.
216  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
217  *		bit is on in the afsr.
218  *
219  * NB: The domain-to-SC communications depend on these values. If they are
220  * changed, plat_ecc_unum.[ch] must be updated to match.
221  */
222 #define	AFLT_STAT_INVALID	0	/* higher priority afsr bit is on */
223 #define	AFLT_STAT_VALID		1	/* this is highest priority afsr bit */
224 #define	AFLT_STAT_AMBIGUOUS	2	/* two afsr bits of equal priority */
225 
226 /*
227  * Maximum length of unum string.
228  */
229 #define	UNUM_NAMLEN	60
230 
231 /*
232  * Maximum length of a DIMM serial id string + null
233  */
234 #define	DIMM_SERIAL_ID_LEN	16
235 
236 #ifdef	__cplusplus
237 }
238 #endif
239 
240 #endif	/* _SYS_ASYNC_H */
241