1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <sys/types.h>
27#include <sys/systm.h>
28#include <sys/archsystm.h>
29#include <sys/machparam.h>
30#include <sys/machsystm.h>
31#include <sys/cpu.h>
32#include <sys/elf_SPARC.h>
33#include <vm/hat_sfmmu.h>
34#include <vm/seg_kpm.h>
35#include <vm/page.h>
36#include <vm/vm_dep.h>
37#include <sys/cpuvar.h>
38#include <sys/spitregs.h>
39#include <sys/async.h>
40#include <sys/cmn_err.h>
41#include <sys/debug.h>
42#include <sys/dditypes.h>
43#include <sys/sunddi.h>
44#include <sys/cpu_module.h>
45#include <sys/prom_debug.h>
46#include <sys/vmsystm.h>
47#include <sys/prom_plat.h>
48#include <sys/sysmacros.h>
49#include <sys/intreg.h>
50#include <sys/machtrap.h>
51#include <sys/ontrap.h>
52#include <sys/ivintr.h>
53#include <sys/atomic.h>
54#include <sys/panic.h>
55#include <sys/ndifm.h>
56#include <sys/fm/protocol.h>
57#include <sys/fm/util.h>
58#include <sys/fm/cpu/UltraSPARC-II.h>
59#include <sys/ddi.h>
60#include <sys/ecc_kstat.h>
61#include <sys/watchpoint.h>
62#include <sys/dtrace.h>
63#include <sys/errclassify.h>
64
65uint_t	cpu_impl_dual_pgsz = 0;
66
67/*
68 * Structure for the 8 byte ecache data dump and the associated AFSR state.
69 * There will be 8 of these structures used to dump an ecache line (64 bytes).
70 */
71typedef struct sf_ec_data_elm {
72	uint64_t ec_d8;
73	uint64_t ec_afsr;
74} ec_data_t;
75
76/*
77 * Define spitfire (Ultra I/II) specific asynchronous error structure
78 */
79typedef struct spitfire_async_flt {
80	struct async_flt cmn_asyncflt;	/* common - see sun4u/sys/async.h */
81	ushort_t flt_type;		/* types of faults - cpu specific */
82	ec_data_t flt_ec_data[8];	/* for E$ or mem dump/state */
83	uint64_t flt_ec_tag;		/* E$ tag info */
84	int flt_ec_lcnt;		/* number of bad E$ lines */
85	ushort_t flt_sdbh;		/* UDBH reg */
86	ushort_t flt_sdbl;		/* UDBL reg */
87} spitf_async_flt;
88
89/*
90 * Prototypes for support routines in spitfire_asm.s:
91 */
92extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize);
93extern uint64_t get_lsu(void);
94extern void set_lsu(uint64_t ncc);
95extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
96				uint64_t *oafsr, uint64_t *acc_afsr);
97extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr);
98extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr,
99				uint64_t *acc_afsr);
100extern uint64_t read_and_clear_afsr();
101extern void write_ec_tag_parity(uint32_t id);
102extern void write_hb_ec_tag_parity(uint32_t id);
103
104/*
105 * Spitfire module routines:
106 */
107static void cpu_async_log_err(void *flt);
108/*PRINTFLIKE6*/
109static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt,
110    uint_t logflags, const char *endstr, const char *fmt, ...);
111
112static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err);
113static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum);
114static void cpu_log_ecmem_info(spitf_async_flt *spf_flt);
115
116static void log_ce_err(struct async_flt *aflt, char *unum);
117static void log_ue_err(struct async_flt *aflt, char *unum);
118static void check_misc_err(spitf_async_flt *spf_flt);
119static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes);
120static int check_ecc(struct async_flt *aflt);
121static uint_t get_cpu_status(uint64_t arg);
122static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr);
123static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag,
124		int *m, uint64_t *afsr);
125static void ecache_kstat_init(struct cpu *cp);
126static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag,
127		uint64_t paddr, int mpb, uint64_t);
128static uint64_t ecache_scrub_misc_err(int, uint64_t);
129static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t);
130static void ecache_page_retire(void *);
131static int ecc_kstat_update(kstat_t *ksp, int rw);
132static int ce_count_unum(int status, int len, char *unum);
133static void add_leaky_bucket_timeout(void);
134static int synd_to_synd_code(int synd_status, ushort_t synd);
135
136extern uint_t read_all_memscrub;
137extern void memscrub_run(void);
138
139static uchar_t	isus2i;			/* set if sabre */
140static uchar_t	isus2e;			/* set if hummingbird */
141
142/*
143 * Default ecache mask and shift settings for Spitfire.  If we detect a
144 * different CPU implementation, we will modify these values at boot time.
145 */
146static uint64_t cpu_ec_tag_mask		= S_ECTAG_MASK;
147static uint64_t cpu_ec_state_mask	= S_ECSTATE_MASK;
148static uint64_t cpu_ec_par_mask		= S_ECPAR_MASK;
149static int cpu_ec_par_shift		= S_ECPAR_SHIFT;
150static int cpu_ec_tag_shift		= S_ECTAG_SHIFT;
151static int cpu_ec_state_shift		= S_ECSTATE_SHIFT;
152static uchar_t cpu_ec_state_exl		= S_ECSTATE_EXL;
153static uchar_t cpu_ec_state_mod		= S_ECSTATE_MOD;
154static uchar_t cpu_ec_state_shr		= S_ECSTATE_SHR;
155static uchar_t cpu_ec_state_own		= S_ECSTATE_OWN;
156
157/*
158 * Default ecache state bits for Spitfire.  These individual bits indicate if
159 * the given line is in any of the valid or modified states, respectively.
160 * Again, we modify these at boot if we detect a different CPU.
161 */
162static uchar_t cpu_ec_state_valid	= S_ECSTATE_VALID;
163static uchar_t cpu_ec_state_dirty	= S_ECSTATE_DIRTY;
164static uchar_t cpu_ec_parity		= S_EC_PARITY;
165static uchar_t cpu_ec_state_parity	= S_ECSTATE_PARITY;
166
167/*
168 * This table is used to determine which bit(s) is(are) bad when an ECC
169 * error occurrs.  The array is indexed an 8-bit syndrome.  The entries
170 * of this array have the following semantics:
171 *
172 *      00-63   The number of the bad bit, when only one bit is bad.
173 *      64      ECC bit C0 is bad.
174 *      65      ECC bit C1 is bad.
175 *      66      ECC bit C2 is bad.
176 *      67      ECC bit C3 is bad.
177 *      68      ECC bit C4 is bad.
178 *      69      ECC bit C5 is bad.
179 *      70      ECC bit C6 is bad.
180 *      71      ECC bit C7 is bad.
181 *      72      Two bits are bad.
182 *      73      Three bits are bad.
183 *      74      Four bits are bad.
184 *      75      More than Four bits are bad.
185 *      76      NO bits are bad.
186 * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28.
187 */
188
189#define	C0	64
190#define	C1	65
191#define	C2	66
192#define	C3	67
193#define	C4	68
194#define	C5	69
195#define	C6	70
196#define	C7	71
197#define	M2	72
198#define	M3	73
199#define	M4	74
200#define	MX	75
201#define	NA	76
202
203#define	SYND_IS_SINGLE_BIT_DATA(synd_code)	((synd_code >= 0) && \
204						    (synd_code < C0))
205#define	SYND_IS_SINGLE_BIT_CHK(synd_code)	((synd_code >= C0) && \
206						    (synd_code <= C7))
207
208static char ecc_syndrome_tab[] =
209{
210	NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4,
211	C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44,
212	C5, M2, M2, 33, M2, 61,  4, M2, M2, MX, 53, M2, 45, M2, M2, 41,
213	M2,  0,  1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2,
214	C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46,
215	M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2,
216	M2, MX, 36, M2,  7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2,
217	M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX,
218	C7, M2, M2, 47, M2, 63, MX, M2, M2,  6, 55, M2, 35, M2, M2, 43,
219	M2,  5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2,
220	M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2,
221	M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX,
222	M2,  8, 13, M2,  2, M2, M2, M3,  3, M2, M2, M3, M2, MX, MX, M2,
223	M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX,
224	M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX,
225	M4, 12,  9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4
226};
227
228#define	SYND_TBL_SIZE 256
229
230/*
231 * Hack for determining UDBH/UDBL, for later cpu-specific error reporting.
232 * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird.
233 */
234#define	UDBL_REG	0x8000
235#define	UDBL(synd)	((synd & UDBL_REG) >> 15)
236#define	SYND(synd)	(synd & 0x7FFF)
237
238/*
239 * These error types are specific to Spitfire and are used internally for the
240 * spitfire fault structure flt_type field.
241 */
242#define	CPU_UE_ERR		0	/* uncorrectable errors - UEs */
243#define	CPU_EDP_LDP_ERR		1	/* LDP or EDP parity error */
244#define	CPU_WP_ERR		2	/* WP parity error */
245#define	CPU_BTO_BERR_ERR	3	/* bus timeout errors */
246#define	CPU_PANIC_CP_ERR	4	/* cp error from panic polling */
247#define	CPU_TRAPPING_CP_ERR	5	/* for sabre/hbird only, cp error */
248#define	CPU_BADLINE_CI_ERR	6	/* E$ clean_bad line when idle */
249#define	CPU_BADLINE_CB_ERR	7	/* E$ clean_bad line when busy */
250#define	CPU_BADLINE_DI_ERR	8	/* E$ dirty_bad line when idle */
251#define	CPU_BADLINE_DB_ERR	9	/* E$ dirty_bad line when busy */
252#define	CPU_ORPHAN_CP_ERR	10	/* Orphan CP error */
253#define	CPU_ECACHE_ADDR_PAR_ERR	11	/* Ecache Address parity error */
254#define	CPU_ECACHE_STATE_ERR	12	/* Ecache state error */
255#define	CPU_ECACHE_ETP_ETS_ERR	13	/* ETP set but ETS is zero */
256#define	CPU_ECACHE_TAG_ERR	14	/* Scrub the E$ tag, if state clean */
257#define	CPU_ADDITIONAL_ERR	15	/* Additional errors occurred */
258
259/*
260 * Macro to access the "Spitfire cpu private" data structure.
261 */
262#define	CPU_PRIVATE_PTR(cp, x)	(&(((spitfire_private_t *)CPU_PRIVATE(cp))->x))
263
264/*
265 * set to 0 to disable automatic retiring of pages on
266 * DIMMs that have excessive soft errors
267 */
268int automatic_page_removal = 1;
269
270/*
271 * Heuristic for figuring out which module to replace.
272 * Relative likelihood that this P_SYND indicates that this module is bad.
273 * We call it a "score", though, not a relative likelihood.
274 *
275 * Step 1.
276 * Assign a score to each byte of P_SYND according to the following rules:
277 * If no bits on (0x00) or all bits on (0xFF), then give it a 5.
278 * If one bit on, give it a 95.
279 * If seven bits on, give it a 10.
280 * If two bits on:
281 *   in different nybbles, a 90
282 *   in same nybble, but unaligned, 85
283 *   in same nybble and as an aligned pair, 80
284 * If six bits on, look at the bits that are off:
285 *   in same nybble and as an aligned pair, 15
286 *   in same nybble, but unaligned, 20
287 *   in different nybbles, a 25
288 * If three bits on:
289 *   in diferent nybbles, no aligned pairs, 75
290 *   in diferent nybbles, one aligned pair, 70
291 *   in the same nybble, 65
292 * If five bits on, look at the bits that are off:
293 *   in the same nybble, 30
294 *   in diferent nybbles, one aligned pair, 35
295 *   in diferent nybbles, no aligned pairs, 40
296 * If four bits on:
297 *   all in one nybble, 45
298 *   as two aligned pairs, 50
299 *   one aligned pair, 55
300 *   no aligned pairs, 60
301 *
302 * Step 2:
303 * Take the higher of the two scores (one for each byte) as the score
304 * for the module.
305 *
306 * Print the score for each module, and field service should replace the
307 * module with the highest score.
308 */
309
310/*
311 * In the table below, the first row/column comment indicates the
312 * number of bits on in that nybble; the second row/column comment is
313 * the hex digit.
314 */
315
316static int
317p_synd_score_table[256] = {
318	/* 0   1   1   2   1   2   2   3   1   2   2   3   2   3   3   4 */
319	/* 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  A,  B,  C,  D,  E,  F */
320/* 0 0 */  5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45,
321/* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
322/* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
323/* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
324/* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
325/* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
326/* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
327/* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
328/* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
329/* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
330/* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
331/* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
332/* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
333/* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
334/* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
335/* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10,  5,
336};
337
338int
339ecc_psynd_score(ushort_t p_synd)
340{
341	int i, j, a, b;
342
343	i = p_synd & 0xFF;
344	j = (p_synd >> 8) & 0xFF;
345
346	a = p_synd_score_table[i];
347	b = p_synd_score_table[j];
348
349	return (a > b ? a : b);
350}
351
352/*
353 * Async Fault Logging
354 *
355 * To ease identifying, reading, and filtering async fault log messages, the
356 * label [AFT#] is now prepended to each async fault message.  These messages
357 * and the logging rules are implemented by cpu_aflt_log(), below.
358 *
359 * [AFT0] - Tag for log messages that are associated with corrected ECC errors.
360 *          This includes both corrected ECC memory and ecache faults.
361 *
362 * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything
363 *          else except CE errors) with a priority of 1 (highest).  This tag
364 *          is also used for panic messages that result from an async fault.
365 *
366 * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC
367 * [AFT3]   or parity errors.  For example, AFT2 is used for the actual dump
368 *          of the E-$ data and tags.
369 *
370 * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not
371 * printed on the console.  To send all AFT logs to both the log and the
372 * console, set aft_verbose = 1.
373 */
374
375#define	CPU_FLTCPU		0x0001	/* print flt_inst as a CPU id */
376#define	CPU_SPACE		0x0002	/* print flt_status (data or instr) */
377#define	CPU_ERRID		0x0004	/* print flt_id */
378#define	CPU_TL			0x0008	/* print flt_tl */
379#define	CPU_ERRID_FIRST		0x0010	/* print flt_id first in message */
380#define	CPU_AFSR		0x0020	/* print flt_stat as decoded %afsr */
381#define	CPU_AFAR		0x0040	/* print flt_addr as %afar */
382#define	CPU_AF_PSYND		0x0080	/* print flt_stat %afsr.PSYND */
383#define	CPU_AF_ETS		0x0100	/* print flt_stat %afsr.ETS */
384#define	CPU_UDBH		0x0200	/* print flt_sdbh and syndrome */
385#define	CPU_UDBL		0x0400	/* print flt_sdbl and syndrome */
386#define	CPU_FAULTPC		0x0800	/* print flt_pc */
387#define	CPU_SYND		0x1000	/* print flt_synd and unum */
388
389#define	CMN_LFLAGS	(CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL |	\
390				CPU_AFSR | CPU_AFAR | CPU_AF_PSYND |	\
391				CPU_AF_ETS | CPU_UDBH | CPU_UDBL |	\
392				CPU_FAULTPC)
393#define	UE_LFLAGS	(CMN_LFLAGS | CPU_SYND)
394#define	CE_LFLAGS	(UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL &	\
395				~CPU_SPACE)
396#define	PARERR_LFLAGS	(CMN_LFLAGS)
397#define	WP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL)
398#define	CP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL &		\
399				~CPU_FLTCPU & ~CPU_FAULTPC)
400#define	BERRTO_LFLAGS	(CMN_LFLAGS)
401#define	NO_LFLAGS	(0)
402
403#define	AFSR_FMTSTR0	"\020\1ME"
404#define	AFSR_FMTSTR1	"\020\040PRIV\037ISAP\036ETP\035IVUE\034TO"	\
405			"\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE"
406#define	UDB_FMTSTR	"\020\012UE\011CE"
407
408/*
409 * Save the cache bootup state for use when internal
410 * caches are to be re-enabled after an error occurs.
411 */
412uint64_t	cache_boot_state = 0;
413
414/*
415 * PA[31:0] represent Displacement in UPA configuration space.
416 */
417uint_t	root_phys_addr_lo_mask = 0xffffffff;
418
419/*
420 * Spitfire legacy globals
421 */
422int	itlb_entries;
423int	dtlb_entries;
424
425void
426cpu_setup(void)
427{
428	extern int page_retire_messages;
429	extern int page_retire_first_ue;
430	extern int at_flags;
431#if defined(SF_ERRATA_57)
432	extern caddr_t errata57_limit;
433#endif
434	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
435
436	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
437
438	/*
439	 * Spitfire isn't currently FMA-aware, so we have to enable the
440	 * page retirement messages. We also change the default policy
441	 * for UE retirement to allow clearing of transient errors.
442	 */
443	page_retire_messages = 1;
444	page_retire_first_ue = 0;
445
446	/*
447	 * save the cache bootup state.
448	 */
449	cache_boot_state = get_lsu() & (LSU_IC | LSU_DC);
450
451	if (use_page_coloring) {
452		do_pg_coloring = 1;
453	}
454
455	/*
456	 * Tune pp_slots to use up to 1/8th of the tlb entries.
457	 */
458	pp_slots = MIN(8, MAXPP_SLOTS);
459
460	/*
461	 * Block stores invalidate all pages of the d$ so pagecopy
462	 * et. al. do not need virtual translations with virtual
463	 * coloring taken into consideration.
464	 */
465	pp_consistent_coloring = 0;
466
467	isa_list =
468	    "sparcv9+vis sparcv9 "
469	    "sparcv8plus+vis sparcv8plus "
470	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
471
472	cpu_hwcap_flags = AV_SPARC_VIS;
473
474	/*
475	 * On Spitfire, there's a hole in the address space
476	 * that we must never map (the hardware only support 44-bits of
477	 * virtual address).  Later CPUs are expected to have wider
478	 * supported address ranges.
479	 *
480	 * See address map on p23 of the UltraSPARC 1 user's manual.
481	 */
482	hole_start = (caddr_t)0x80000000000ull;
483	hole_end = (caddr_t)0xfffff80000000000ull;
484
485	/*
486	 * A spitfire call bug requires us to be a further 4Gbytes of
487	 * firewall from the spec.
488	 *
489	 * See Spitfire Errata #21
490	 */
491	hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32));
492	hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32));
493
494	/*
495	 * The kpm mapping window.
496	 * kpm_size:
497	 *	The size of a single kpm range.
498	 *	The overall size will be: kpm_size * vac_colors.
499	 * kpm_vbase:
500	 *	The virtual start address of the kpm range within the kernel
501	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
502	 */
503	kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */
504	kpm_size_shift = 41;
505	kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */
506
507	/*
508	 * All UltraSPARC platforms should use small kpm page as default, as
509	 * the KPM large page VAC conflict code has no value to maintain. The
510	 * new generation of SPARC no longer have VAC conflict issue.
511	 */
512	kpm_smallpages = 1;
513
514#if defined(SF_ERRATA_57)
515	errata57_limit = (caddr_t)0x80000000ul;
516#endif
517
518	/*
519	 * Disable text by default.
520	 * Note that the other defaults are set in sun4u/vm/mach_vm_dep.c.
521	 */
522	max_utext_lpsize = MMU_PAGESIZE;
523}
524
525static int
526getintprop(pnode_t node, char *name, int deflt)
527{
528	int	value;
529
530	switch (prom_getproplen(node, name)) {
531	case 0:
532		value = 1;	/* boolean properties */
533		break;
534
535	case sizeof (int):
536		(void) prom_getprop(node, name, (caddr_t)&value);
537		break;
538
539	default:
540		value = deflt;
541		break;
542	}
543
544	return (value);
545}
546
547/*
548 * Set the magic constants of the implementation.
549 */
550void
551cpu_fiximp(pnode_t dnode)
552{
553	extern int vac_size, vac_shift;
554	extern uint_t vac_mask;
555	extern int dcache_line_mask;
556	int i, a;
557	static struct {
558		char	*name;
559		int	*var;
560	} prop[] = {
561		"dcache-size",		&dcache_size,
562		"dcache-line-size",	&dcache_linesize,
563		"icache-size",		&icache_size,
564		"icache-line-size",	&icache_linesize,
565		"ecache-size",		&ecache_size,
566		"ecache-line-size",	&ecache_alignsize,
567		"ecache-associativity", &ecache_associativity,
568		"#itlb-entries",	&itlb_entries,
569		"#dtlb-entries",	&dtlb_entries,
570		};
571
572	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) {
573		if ((a = getintprop(dnode, prop[i].name, -1)) != -1) {
574			*prop[i].var = a;
575		}
576	}
577
578	ecache_setsize = ecache_size / ecache_associativity;
579
580	vac_size = S_VAC_SIZE;
581	vac_mask = MMU_PAGEMASK & (vac_size - 1);
582	i = 0; a = vac_size;
583	while (a >>= 1)
584		++i;
585	vac_shift = i;
586	shm_alignment = vac_size;
587	vac = 1;
588
589	dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1);
590
591	/*
592	 * UltraSPARC I & II have ecache sizes running
593	 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB
594	 * and 8 MB. Adjust the copyin/copyout limits
595	 * according to the cache size. The magic number
596	 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code
597	 * and its floor of VIS_COPY_THRESHOLD bytes before it will use
598	 * VIS instructions.
599	 *
600	 * We assume that all CPUs on the system have the same size
601	 * ecache. We're also called very early in the game.
602	 * /etc/system will be parsed *after* we're called so
603	 * these values can be overwritten.
604	 */
605
606	hw_copy_limit_1 = VIS_COPY_THRESHOLD;
607	if (ecache_size <= 524288) {
608		hw_copy_limit_2 = VIS_COPY_THRESHOLD;
609		hw_copy_limit_4 = VIS_COPY_THRESHOLD;
610		hw_copy_limit_8 = VIS_COPY_THRESHOLD;
611	} else if (ecache_size == 1048576) {
612		hw_copy_limit_2 = 1024;
613		hw_copy_limit_4 = 1280;
614		hw_copy_limit_8 = 1536;
615	} else if (ecache_size == 2097152) {
616		hw_copy_limit_2 = 1536;
617		hw_copy_limit_4 = 2048;
618		hw_copy_limit_8 = 2560;
619	} else if (ecache_size == 4194304) {
620		hw_copy_limit_2 = 2048;
621		hw_copy_limit_4 = 2560;
622		hw_copy_limit_8 = 3072;
623	} else {
624		hw_copy_limit_2 = 2560;
625		hw_copy_limit_4 = 3072;
626		hw_copy_limit_8 = 3584;
627	}
628}
629
630/*
631 * Called by setcpudelay
632 */
633void
634cpu_init_tick_freq(void)
635{
636	/*
637	 * Determine the cpu frequency by calling
638	 * tod_get_cpufrequency. Use an approximate freqency
639	 * value computed by the prom if the tod module
640	 * is not initialized and loaded yet.
641	 */
642	if (tod_ops.tod_get_cpufrequency != NULL) {
643		mutex_enter(&tod_lock);
644		sys_tick_freq = tod_ops.tod_get_cpufrequency();
645		mutex_exit(&tod_lock);
646	} else {
647#if defined(HUMMINGBIRD)
648		/*
649		 * the hummingbird version of %stick is used as the basis for
650		 * low level timing; this provides an independent constant-rate
651		 * clock for general system use, and frees power mgmt to set
652		 * various cpu clock speeds.
653		 */
654		if (system_clock_freq == 0)
655			cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx",
656			    system_clock_freq);
657		sys_tick_freq = system_clock_freq;
658#else /* SPITFIRE */
659		sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
660#endif
661	}
662}
663
664
665void shipit(int upaid);
666extern uint64_t xc_tick_limit;
667extern uint64_t xc_tick_jump_limit;
668
669#ifdef SEND_MONDO_STATS
670uint64_t x_early[NCPU][64];
671#endif
672
673/*
674 * Note: A version of this function is used by the debugger via the KDI,
675 * and must be kept in sync with this version.  Any changes made to this
676 * function to support new chips or to accomodate errata must also be included
677 * in the KDI-specific version.  See spitfire_kdi.c.
678 */
679void
680send_one_mondo(int cpuid)
681{
682	uint64_t idsr, starttick, endtick;
683	int upaid, busy, nack;
684	uint64_t tick, tick_prev;
685	ulong_t ticks;
686
687	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
688	upaid = CPUID_TO_UPAID(cpuid);
689	tick = starttick = gettick();
690	shipit(upaid);
691	endtick = starttick + xc_tick_limit;
692	busy = nack = 0;
693	for (;;) {
694		idsr = getidsr();
695		if (idsr == 0)
696			break;
697		/*
698		 * When we detect an irregular tick jump, we adjust
699		 * the timer window to the current tick value.
700		 */
701		tick_prev = tick;
702		tick = gettick();
703		ticks = tick - tick_prev;
704		if (ticks > xc_tick_jump_limit) {
705			endtick = tick + xc_tick_limit;
706		} else if (tick > endtick) {
707			if (panic_quiesce)
708				return;
709			cmn_err(CE_PANIC,
710			    "send mondo timeout (target 0x%x) [%d NACK %d "
711			    "BUSY]", upaid, nack, busy);
712		}
713		if (idsr & IDSR_BUSY) {
714			busy++;
715			continue;
716		}
717		drv_usecwait(1);
718		shipit(upaid);
719		nack++;
720		busy = 0;
721	}
722#ifdef SEND_MONDO_STATS
723	x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++;
724#endif
725}
726
727void
728send_mondo_set(cpuset_t set)
729{
730	int i;
731
732	for (i = 0; i < NCPU; i++)
733		if (CPU_IN_SET(set, i)) {
734			send_one_mondo(i);
735			CPUSET_DEL(set, i);
736			if (CPUSET_ISNULL(set))
737				break;
738		}
739}
740
741void
742syncfpu(void)
743{
744}
745
746/*
747 * Determine the size of the CPU module's error structure in bytes.  This is
748 * called once during boot to initialize the error queues.
749 */
750int
751cpu_aflt_size(void)
752{
753	/*
754	 * We need to determine whether this is a sabre, Hummingbird or a
755	 * Spitfire/Blackbird impl and set the appropriate state variables for
756	 * ecache tag manipulation.  We can't do this in cpu_setup() as it is
757	 * too early in the boot flow and the cpunodes are not initialized.
758	 * This routine will be called once after cpunodes[] is ready, so do
759	 * it here.
760	 */
761	if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) {
762		isus2i = 1;
763		cpu_ec_tag_mask = SB_ECTAG_MASK;
764		cpu_ec_state_mask = SB_ECSTATE_MASK;
765		cpu_ec_par_mask = SB_ECPAR_MASK;
766		cpu_ec_par_shift = SB_ECPAR_SHIFT;
767		cpu_ec_tag_shift = SB_ECTAG_SHIFT;
768		cpu_ec_state_shift = SB_ECSTATE_SHIFT;
769		cpu_ec_state_exl = SB_ECSTATE_EXL;
770		cpu_ec_state_mod = SB_ECSTATE_MOD;
771
772		/* These states do not exist in sabre - set to 0xFF */
773		cpu_ec_state_shr = 0xFF;
774		cpu_ec_state_own = 0xFF;
775
776		cpu_ec_state_valid = SB_ECSTATE_VALID;
777		cpu_ec_state_dirty = SB_ECSTATE_DIRTY;
778		cpu_ec_state_parity = SB_ECSTATE_PARITY;
779		cpu_ec_parity = SB_EC_PARITY;
780	} else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) {
781		isus2e = 1;
782		cpu_ec_tag_mask = HB_ECTAG_MASK;
783		cpu_ec_state_mask = HB_ECSTATE_MASK;
784		cpu_ec_par_mask = HB_ECPAR_MASK;
785		cpu_ec_par_shift = HB_ECPAR_SHIFT;
786		cpu_ec_tag_shift = HB_ECTAG_SHIFT;
787		cpu_ec_state_shift = HB_ECSTATE_SHIFT;
788		cpu_ec_state_exl = HB_ECSTATE_EXL;
789		cpu_ec_state_mod = HB_ECSTATE_MOD;
790
791		/* These states do not exist in hummingbird - set to 0xFF */
792		cpu_ec_state_shr = 0xFF;
793		cpu_ec_state_own = 0xFF;
794
795		cpu_ec_state_valid = HB_ECSTATE_VALID;
796		cpu_ec_state_dirty = HB_ECSTATE_DIRTY;
797		cpu_ec_state_parity = HB_ECSTATE_PARITY;
798		cpu_ec_parity = HB_EC_PARITY;
799	}
800
801	return (sizeof (spitf_async_flt));
802}
803
804
805/*
806 * Correctable ecc error trap handler
807 */
808/*ARGSUSED*/
809void
810cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
811    uint_t p_afsr_high, uint_t p_afar_high)
812{
813	ushort_t sdbh, sdbl;
814	ushort_t e_syndh, e_syndl;
815	spitf_async_flt spf_flt;
816	struct async_flt *ecc;
817	int queue = 1;
818
819	uint64_t t_afar = p_afar;
820	uint64_t t_afsr = p_afsr;
821
822	/*
823	 * Note: the Spitfire data buffer error registers
824	 * (upper and lower halves) are or'ed into the upper
825	 * word of the afsr by ce_err().
826	 */
827	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
828	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
829
830	e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND);
831	e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND);
832
833	t_afsr &= S_AFSR_MASK;
834	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
835
836	/* Setup the async fault structure */
837	bzero(&spf_flt, sizeof (spitf_async_flt));
838	ecc = (struct async_flt *)&spf_flt;
839	ecc->flt_id = gethrtime_waitfree();
840	ecc->flt_stat = t_afsr;
841	ecc->flt_addr = t_afar;
842	ecc->flt_status = ECC_C_TRAP;
843	ecc->flt_bus_id = getprocessorid();
844	ecc->flt_inst = CPU->cpu_id;
845	ecc->flt_pc = (caddr_t)rp->r_pc;
846	ecc->flt_func = log_ce_err;
847	ecc->flt_in_memory =
848	    (pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0;
849	spf_flt.flt_sdbh = sdbh;
850	spf_flt.flt_sdbl = sdbl;
851
852	/*
853	 * Check for fatal conditions.
854	 */
855	check_misc_err(&spf_flt);
856
857	/*
858	 * Pananoid checks for valid AFSR and UDBs
859	 */
860	if ((t_afsr & P_AFSR_CE) == 0) {
861		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
862		    "** Panic due to CE bit not set in the AFSR",
863		    "  Corrected Memory Error on");
864	}
865
866	/*
867	 * We want to skip logging only if ALL the following
868	 * conditions are true:
869	 *
870	 *	1. There is only one error
871	 *	2. That error is a correctable memory error
872	 *	3. The error is caused by the memory scrubber (in which case
873	 *	    the error will have occurred under on_trap protection)
874	 *	4. The error is on a retired page
875	 *
876	 * Note: OT_DATA_EC is used places other than the memory scrubber.
877	 * However, none of those errors should occur on a retired page.
878	 */
879	if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE &&
880	    curthread->t_ontrap != NULL) {
881
882		if (curthread->t_ontrap->ot_prot & OT_DATA_EC) {
883			if (page_retire_check(ecc->flt_addr, NULL) == 0) {
884				queue = 0;
885			}
886		}
887	}
888
889	if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) {
890		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
891		    "** Panic due to CE bits not set in the UDBs",
892		    " Corrected Memory Error on");
893	}
894
895	if ((sdbh >> 8) & 1) {
896		ecc->flt_synd = e_syndh;
897		ce_scrub(ecc);
898		if (queue) {
899			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
900			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
901		}
902	}
903
904	if ((sdbl >> 8) & 1) {
905		ecc->flt_addr = t_afar | 0x8;	/* Sabres do not have a UDBL */
906		ecc->flt_synd = e_syndl | UDBL_REG;
907		ce_scrub(ecc);
908		if (queue) {
909			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
910			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
911		}
912	}
913
914	/*
915	 * Re-enable all error trapping (CEEN currently cleared).
916	 */
917	clr_datapath();
918	set_asyncflt(P_AFSR_CE);
919	set_error_enable(EER_ENABLE);
920}
921
922/*
923 * Cpu specific CE logging routine
924 */
925static void
926log_ce_err(struct async_flt *aflt, char *unum)
927{
928	spitf_async_flt spf_flt;
929
930	if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) {
931		return;
932	}
933
934	spf_flt.cmn_asyncflt = *aflt;
935	cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum,
936	    " Corrected Memory Error detected by");
937}
938
939/*
940 * Spitfire does not perform any further CE classification refinement
941 */
942/*ARGSUSED*/
943int
944ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep,
945    size_t afltoffset)
946{
947	return (0);
948}
949
950char *
951flt_to_error_type(struct async_flt *aflt)
952{
953	if (aflt->flt_status & ECC_INTERMITTENT)
954		return (ERR_TYPE_DESC_INTERMITTENT);
955	if (aflt->flt_status & ECC_PERSISTENT)
956		return (ERR_TYPE_DESC_PERSISTENT);
957	if (aflt->flt_status & ECC_STICKY)
958		return (ERR_TYPE_DESC_STICKY);
959	return (ERR_TYPE_DESC_UNKNOWN);
960}
961
962/*
963 * Called by correctable ecc error logging code to print out
964 * the stick/persistent/intermittent status of the error.
965 */
966static void
967cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum)
968{
969	ushort_t status;
970	char *status1_str = "Memory";
971	char *status2_str = "Intermittent";
972	struct async_flt *aflt = (struct async_flt *)spf_flt;
973
974	status = aflt->flt_status;
975
976	if (status & ECC_ECACHE)
977		status1_str = "Ecache";
978
979	if (status & ECC_STICKY)
980		status2_str = "Sticky";
981	else if (status & ECC_PERSISTENT)
982		status2_str = "Persistent";
983
984	cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST,
985	    NULL, " Corrected %s Error on %s is %s",
986	    status1_str, unum, status2_str);
987}
988
989/*
990 * check for a valid ce syndrome, then call the
991 * displacement flush scrubbing code, and then check the afsr to see if
992 * the error was persistent or intermittent. Reread the afar/afsr to see
993 * if the error was not scrubbed successfully, and is therefore sticky.
994 */
995/*ARGSUSED1*/
996void
997cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout)
998{
999	uint64_t eer, afsr;
1000	ushort_t status;
1001
1002	ASSERT(getpil() > LOCK_LEVEL);
1003
1004	/*
1005	 * It is possible that the flt_addr is not a valid
1006	 * physical address. To deal with this, we disable
1007	 * NCEEN while we scrub that address. If this causes
1008	 * a TIMEOUT/BERR, we know this is an invalid
1009	 * memory location.
1010	 */
1011	kpreempt_disable();
1012	eer = get_error_enable();
1013	if (eer & (EER_CEEN | EER_NCEEN))
1014		set_error_enable(eer & ~(EER_CEEN | EER_NCEEN));
1015
1016	/*
1017	 * To check if the error detected by IO is persistent, sticky or
1018	 * intermittent.
1019	 */
1020	if (ecc->flt_status & ECC_IOBUS) {
1021		ecc->flt_stat = P_AFSR_CE;
1022	}
1023
1024	scrubphys(P2ALIGN(ecc->flt_addr, 64),
1025	    cpunodes[CPU->cpu_id].ecache_size);
1026
1027	get_asyncflt(&afsr);
1028	if (afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1029		/*
1030		 * Must ensure that we don't get the TIMEOUT/BERR
1031		 * when we reenable NCEEN, so we clear the AFSR.
1032		 */
1033		set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR));
1034		if (eer & (EER_CEEN | EER_NCEEN))
1035			set_error_enable(eer);
1036		kpreempt_enable();
1037		return;
1038	}
1039
1040	if (eer & EER_NCEEN)
1041		set_error_enable(eer & ~EER_CEEN);
1042
1043	/*
1044	 * Check and clear any ECC errors from the scrub.  If the scrub did
1045	 * not trip over the error, mark it intermittent.  If the scrub did
1046	 * trip the error again and it did not scrub away, mark it sticky.
1047	 * Otherwise mark it persistent.
1048	 */
1049	if (check_ecc(ecc) != 0) {
1050		cpu_read_paddr(ecc, 0, 1);
1051
1052		if (check_ecc(ecc) != 0)
1053			status = ECC_STICKY;
1054		else
1055			status = ECC_PERSISTENT;
1056	} else
1057		status = ECC_INTERMITTENT;
1058
1059	if (eer & (EER_CEEN | EER_NCEEN))
1060		set_error_enable(eer);
1061	kpreempt_enable();
1062
1063	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
1064	ecc->flt_status |= status;
1065}
1066
1067/*
1068 * get the syndrome and unum, and then call the routines
1069 * to check the other cpus and iobuses, and then do the error logging.
1070 */
1071/*ARGSUSED1*/
1072void
1073cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep)
1074{
1075	char unum[UNUM_NAMLEN];
1076	int len = 0;
1077	int ce_verbose = 0;
1078	int err;
1079
1080	ASSERT(ecc->flt_func != NULL);
1081
1082	/* Get the unum string for logging purposes */
1083	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum,
1084	    UNUM_NAMLEN, &len);
1085
1086	/* Call specific error logging routine */
1087	(void) (*ecc->flt_func)(ecc, unum);
1088
1089	/*
1090	 * Count errors per unum.
1091	 * Non-memory errors are all counted via a special unum string.
1092	 */
1093	if ((err = ce_count_unum(ecc->flt_status, len, unum)) != PR_OK &&
1094	    automatic_page_removal) {
1095		(void) page_retire(ecc->flt_addr, err);
1096	}
1097
1098	if (ecc->flt_panic) {
1099		ce_verbose = 1;
1100	} else if ((ecc->flt_class == BUS_FAULT) ||
1101	    (ecc->flt_stat & P_AFSR_CE)) {
1102		ce_verbose = (ce_verbose_memory > 0);
1103	} else {
1104		ce_verbose = 1;
1105	}
1106
1107	if (ce_verbose) {
1108		spitf_async_flt sflt;
1109		int synd_code;
1110
1111		sflt.cmn_asyncflt = *ecc;	/* for cpu_aflt_log() */
1112
1113		cpu_ce_log_status(&sflt, unum);
1114
1115		synd_code = synd_to_synd_code(AFLT_STAT_VALID,
1116		    SYND(ecc->flt_synd));
1117
1118		if (SYND_IS_SINGLE_BIT_DATA(synd_code)) {
1119			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1120			    NULL, " ECC Data Bit %2d was in error "
1121			    "and corrected", synd_code);
1122		} else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) {
1123			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
1124			    NULL, " ECC Check Bit %2d was in error "
1125			    "and corrected", synd_code - C0);
1126		} else {
1127			/*
1128			 * These are UE errors - we shouldn't be getting CE
1129			 * traps for these; handle them in case of bad h/w.
1130			 */
1131			switch (synd_code) {
1132			case M2:
1133				cpu_aflt_log(CE_CONT, 0, &sflt,
1134				    CPU_ERRID_FIRST, NULL,
1135				    " Two ECC Bits were in error");
1136				break;
1137			case M3:
1138				cpu_aflt_log(CE_CONT, 0, &sflt,
1139				    CPU_ERRID_FIRST, NULL,
1140				    " Three ECC Bits were in error");
1141				break;
1142			case M4:
1143				cpu_aflt_log(CE_CONT, 0, &sflt,
1144				    CPU_ERRID_FIRST, NULL,
1145				    " Four ECC Bits were in error");
1146				break;
1147			case MX:
1148				cpu_aflt_log(CE_CONT, 0, &sflt,
1149				    CPU_ERRID_FIRST, NULL,
1150				    " More than Four ECC bits were "
1151				    "in error");
1152				break;
1153			default:
1154				cpu_aflt_log(CE_CONT, 0, &sflt,
1155				    CPU_ERRID_FIRST, NULL,
1156				    " Unknown fault syndrome %d",
1157				    synd_code);
1158				break;
1159			}
1160		}
1161	}
1162
1163	/* Display entire cache line, if valid address */
1164	if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR)
1165		read_ecc_data(ecc, 1, 1);
1166}
1167
1168/*
1169 * We route all errors through a single switch statement.
1170 */
1171void
1172cpu_ue_log_err(struct async_flt *aflt)
1173{
1174
1175	switch (aflt->flt_class) {
1176	case CPU_FAULT:
1177		cpu_async_log_err(aflt);
1178		break;
1179
1180	case BUS_FAULT:
1181		bus_async_log_err(aflt);
1182		break;
1183
1184	default:
1185		cmn_err(CE_WARN, "discarding async error 0x%p with invalid "
1186		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
1187		break;
1188	}
1189}
1190
1191/* Values for action variable in cpu_async_error() */
1192#define	ACTION_NONE		0
1193#define	ACTION_TRAMPOLINE	1
1194#define	ACTION_AST_FLAGS	2
1195
1196/*
1197 * Access error trap handler for asynchronous cpu errors.  This routine is
1198 * called to handle a data or instruction access error.  All fatal errors are
1199 * completely handled by this routine (by panicking).  Non fatal error logging
1200 * is queued for later processing either via AST or softint at a lower PIL.
1201 * In case of panic, the error log queue will also be processed as part of the
1202 * panic flow to ensure all errors are logged.  This routine is called with all
1203 * errors disabled at PIL15.  The AFSR bits are cleared and the UDBL and UDBH
1204 * error bits are also cleared.  The hardware has also disabled the I and
1205 * D-caches for us, so we must re-enable them before returning.
1206 *
1207 * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP:
1208 *
1209 *		_______________________________________________________________
1210 *		|        Privileged tl0		|         Unprivileged	      |
1211 *		| Protected	| Unprotected	| Protected	| Unprotected |
1212 *		|on_trap|lofault|		|		|	      |
1213 * -------------|-------|-------+---------------+---------------+-------------|
1214 *		|	|	|		|		|	      |
1215 * UE/LDP/EDP	| L,T,p	| L,R,p	| L,P		| n/a		| L,R,p	      |
1216 *		|	|	|		|		|	      |
1217 * TO/BERR	| T	| S	| L,P		| n/a		| S	      |
1218 *		|	|	|		|		|	      |
1219 * WP		| L,M,p | L,M,p	| L,M,p		| n/a		| L,M,p       |
1220 *		|	|	|		|		|	      |
1221 * CP (IIi/IIe)	| L,P	| L,P	| L,P		| n/a		| L,P	      |
1222 * ____________________________________________________________________________
1223 *
1224 *
1225 * Action codes:
1226 *
1227 * L - log
1228 * M - kick off memscrubber if flt_in_memory
1229 * P - panic
1230 * p - panic if US-IIi or US-IIe (Sabre); overrides R and M
1231 * R - i)  if aft_panic is set, panic
1232 *     ii) otherwise, send hwerr event to contract and SIGKILL to process
1233 * S - send SIGBUS to process
1234 * T - trampoline
1235 *
1236 * Special cases:
1237 *
1238 * 1) if aft_testfatal is set, all faults result in a panic regardless
1239 *    of type (even WP), protection (even on_trap), or privilege.
1240 */
1241/*ARGSUSED*/
1242void
1243cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
1244    uint_t p_afsr_high, uint_t p_afar_high)
1245{
1246	ushort_t sdbh, sdbl, ttype, tl;
1247	spitf_async_flt spf_flt;
1248	struct async_flt *aflt;
1249	char pr_reason[28];
1250	uint64_t oafsr;
1251	uint64_t acc_afsr = 0;			/* accumulated afsr */
1252	int action = ACTION_NONE;
1253	uint64_t t_afar = p_afar;
1254	uint64_t t_afsr = p_afsr;
1255	int expected = DDI_FM_ERR_UNEXPECTED;
1256	ddi_acc_hdl_t *hp;
1257
1258	/*
1259	 * We need to look at p_flag to determine if the thread detected an
1260	 * error while dumping core.  We can't grab p_lock here, but it's ok
1261	 * because we just need a consistent snapshot and we know that everyone
1262	 * else will store a consistent set of bits while holding p_lock.  We
1263	 * don't have to worry about a race because SDOCORE is set once prior
1264	 * to doing i/o from the process's address space and is never cleared.
1265	 */
1266	uint_t pflag = ttoproc(curthread)->p_flag;
1267
1268	pr_reason[0] = '\0';
1269
1270	/*
1271	 * Note: the Spitfire data buffer error registers
1272	 * (upper and lower halves) are or'ed into the upper
1273	 * word of the afsr by async_err() if P_AFSR_UE is set.
1274	 */
1275	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
1276	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
1277
1278	/*
1279	 * Grab the ttype encoded in <63:53> of the saved
1280	 * afsr passed from async_err()
1281	 */
1282	ttype = (ushort_t)((t_afsr >> 53) & 0x1FF);
1283	tl = (ushort_t)(t_afsr >> 62);
1284
1285	t_afsr &= S_AFSR_MASK;
1286	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
1287
1288	/*
1289	 * Initialize most of the common and CPU-specific structure.  We derive
1290	 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit.  The
1291	 * initial setting of aflt->flt_panic is based on TL: we must panic if
1292	 * the error occurred at TL > 0.  We also set flt_panic if the test/demo
1293	 * tuneable aft_testfatal is set (not the default).
1294	 */
1295	bzero(&spf_flt, sizeof (spitf_async_flt));
1296	aflt = (struct async_flt *)&spf_flt;
1297	aflt->flt_id = gethrtime_waitfree();
1298	aflt->flt_stat = t_afsr;
1299	aflt->flt_addr = t_afar;
1300	aflt->flt_bus_id = getprocessorid();
1301	aflt->flt_inst = CPU->cpu_id;
1302	aflt->flt_pc = (caddr_t)rp->r_pc;
1303	aflt->flt_prot = AFLT_PROT_NONE;
1304	aflt->flt_class = CPU_FAULT;
1305	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
1306	aflt->flt_tl = (uchar_t)tl;
1307	aflt->flt_panic = (tl != 0 || aft_testfatal != 0);
1308	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1309
1310	/*
1311	 * Set flt_status based on the trap type.  If we end up here as the
1312	 * result of a UE detected by the CE handling code, leave status 0.
1313	 */
1314	switch (ttype) {
1315	case T_DATA_ERROR:
1316		aflt->flt_status = ECC_D_TRAP;
1317		break;
1318	case T_INSTR_ERROR:
1319		aflt->flt_status = ECC_I_TRAP;
1320		break;
1321	}
1322
1323	spf_flt.flt_sdbh = sdbh;
1324	spf_flt.flt_sdbl = sdbl;
1325
1326	/*
1327	 * Check for fatal async errors.
1328	 */
1329	check_misc_err(&spf_flt);
1330
1331	/*
1332	 * If the trap occurred in privileged mode at TL=0, we need to check to
1333	 * see if we were executing in the kernel under on_trap() or t_lofault
1334	 * protection.  If so, modify the saved registers so that we return
1335	 * from the trap to the appropriate trampoline routine.
1336	 */
1337	if (aflt->flt_priv && tl == 0) {
1338		if (curthread->t_ontrap != NULL) {
1339			on_trap_data_t *otp = curthread->t_ontrap;
1340
1341			if (otp->ot_prot & OT_DATA_EC) {
1342				aflt->flt_prot = AFLT_PROT_EC;
1343				otp->ot_trap |= OT_DATA_EC;
1344				rp->r_pc = otp->ot_trampoline;
1345				rp->r_npc = rp->r_pc + 4;
1346				action = ACTION_TRAMPOLINE;
1347			}
1348
1349			if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) &&
1350			    (otp->ot_prot & OT_DATA_ACCESS)) {
1351				aflt->flt_prot = AFLT_PROT_ACCESS;
1352				otp->ot_trap |= OT_DATA_ACCESS;
1353				rp->r_pc = otp->ot_trampoline;
1354				rp->r_npc = rp->r_pc + 4;
1355				action = ACTION_TRAMPOLINE;
1356				/*
1357				 * for peeks and caut_gets errors are expected
1358				 */
1359				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1360				if (!hp)
1361					expected = DDI_FM_ERR_PEEK;
1362				else if (hp->ah_acc.devacc_attr_access ==
1363				    DDI_CAUTIOUS_ACC)
1364					expected = DDI_FM_ERR_EXPECTED;
1365			}
1366
1367		} else if (curthread->t_lofault) {
1368			aflt->flt_prot = AFLT_PROT_COPY;
1369			rp->r_g1 = EFAULT;
1370			rp->r_pc = curthread->t_lofault;
1371			rp->r_npc = rp->r_pc + 4;
1372			action = ACTION_TRAMPOLINE;
1373		}
1374	}
1375
1376	/*
1377	 * Determine if this error needs to be treated as fatal.  Note that
1378	 * multiple errors detected upon entry to this trap handler does not
1379	 * necessarily warrant a panic.  We only want to panic if the trap
1380	 * happened in privileged mode and not under t_ontrap or t_lofault
1381	 * protection.  The exception is WP: if we *only* get WP, it is not
1382	 * fatal even if the trap occurred in privileged mode, except on Sabre.
1383	 *
1384	 * aft_panic, if set, effectively makes us treat usermode
1385	 * UE/EDP/LDP faults as if they were privileged - so we we will
1386	 * panic instead of sending a contract event.  A lofault-protected
1387	 * fault will normally follow the contract event; if aft_panic is
1388	 * set this will be changed to a panic.
1389	 *
1390	 * For usermode BERR/BTO errors, eg from processes performing device
1391	 * control through mapped device memory, we need only deliver
1392	 * a SIGBUS to the offending process.
1393	 *
1394	 * Some additional flt_panic reasons (eg, WP on Sabre) will be
1395	 * checked later; for now we implement the common reasons.
1396	 */
1397	if (aflt->flt_prot == AFLT_PROT_NONE) {
1398		/*
1399		 * Beware - multiple bits may be set in AFSR
1400		 */
1401		if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) {
1402			if (aflt->flt_priv || aft_panic)
1403				aflt->flt_panic = 1;
1404		}
1405
1406		if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1407			if (aflt->flt_priv)
1408				aflt->flt_panic = 1;
1409		}
1410	} else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) {
1411		aflt->flt_panic = 1;
1412	}
1413
1414	/*
1415	 * UE/BERR/TO: Call our bus nexus friends to check for
1416	 * IO errors that may have resulted in this trap.
1417	 */
1418	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) {
1419		cpu_run_bus_error_handlers(aflt, expected);
1420	}
1421
1422	/*
1423	 * Handle UE: If the UE is in memory, we need to flush the bad line from
1424	 * the E-cache.  We also need to query the bus nexus for fatal errors.
1425	 * For sabre, we will panic on UEs. Attempts to do diagnostic read on
1426	 * caches may introduce more parity errors (especially when the module
1427	 * is bad) and in sabre there is no guarantee that such errors
1428	 * (if introduced) are written back as poisoned data.
1429	 */
1430	if (t_afsr & P_AFSR_UE) {
1431		int i;
1432
1433		(void) strcat(pr_reason, "UE ");
1434
1435		spf_flt.flt_type = CPU_UE_ERR;
1436		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1437		    MMU_PAGESHIFT)) ? 1: 0;
1438
1439		/*
1440		 * With UE, we have the PA of the fault.
1441		 * Let do a diagnostic read to get the ecache
1442		 * data and tag info of the bad line for logging.
1443		 */
1444		if (aflt->flt_in_memory) {
1445			uint32_t ec_set_size;
1446			uchar_t state;
1447			uint32_t ecache_idx;
1448			uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64);
1449
1450			/* touch the line to put it in ecache */
1451			acc_afsr |= read_and_clear_afsr();
1452			(void) lddphys(faultpa);
1453			acc_afsr |= (read_and_clear_afsr() &
1454			    ~(P_AFSR_EDP | P_AFSR_UE));
1455
1456			ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
1457			    ecache_associativity;
1458
1459			for (i = 0; i < ecache_associativity; i++) {
1460				ecache_idx = i * ec_set_size +
1461				    (aflt->flt_addr % ec_set_size);
1462				get_ecache_dtag(P2ALIGN(ecache_idx, 64),
1463				    (uint64_t *)&spf_flt.flt_ec_data[0],
1464				    &spf_flt.flt_ec_tag, &oafsr, &acc_afsr);
1465				acc_afsr |= oafsr;
1466
1467				state = (uchar_t)((spf_flt.flt_ec_tag &
1468				    cpu_ec_state_mask) >> cpu_ec_state_shift);
1469
1470				if ((state & cpu_ec_state_valid) &&
1471				    ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) ==
1472				    ((uint64_t)aflt->flt_addr >>
1473				    cpu_ec_tag_shift)))
1474					break;
1475			}
1476
1477			/*
1478			 * Check to see if the ecache tag is valid for the
1479			 * fault PA. In the very unlikely event where the
1480			 * line could be victimized, no ecache info will be
1481			 * available. If this is the case, capture the line
1482			 * from memory instead.
1483			 */
1484			if ((state & cpu_ec_state_valid) == 0 ||
1485			    (spf_flt.flt_ec_tag & cpu_ec_tag_mask) !=
1486			    ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) {
1487				for (i = 0; i < 8; i++, faultpa += 8) {
1488					ec_data_t *ecdptr;
1489
1490					ecdptr = &spf_flt.flt_ec_data[i];
1491					acc_afsr |= read_and_clear_afsr();
1492					ecdptr->ec_d8 = lddphys(faultpa);
1493					acc_afsr |= (read_and_clear_afsr() &
1494					    ~(P_AFSR_EDP | P_AFSR_UE));
1495					ecdptr->ec_afsr = 0;
1496							/* null afsr value */
1497				}
1498
1499				/*
1500				 * Mark tag invalid to indicate mem dump
1501				 * when we print out the info.
1502				 */
1503				spf_flt.flt_ec_tag = AFLT_INV_ADDR;
1504			}
1505			spf_flt.flt_ec_lcnt = 1;
1506
1507			/*
1508			 * Flush out the bad line
1509			 */
1510			flushecacheline(P2ALIGN(aflt->flt_addr, 64),
1511			    cpunodes[CPU->cpu_id].ecache_size);
1512
1513			acc_afsr |= clear_errors(NULL, NULL);
1514		}
1515
1516		/*
1517		 * Ask our bus nexus friends if they have any fatal errors. If
1518		 * so, they will log appropriate error messages and panic as a
1519		 * result. We then queue an event for each UDB that reports a
1520		 * UE. Each UE reported in a UDB will have its own log message.
1521		 *
1522		 * Note from kbn: In the case where there are multiple UEs
1523		 * (ME bit is set) - the AFAR address is only accurate to
1524		 * the 16-byte granularity. One cannot tell whether the AFAR
1525		 * belongs to the UDBH or UDBL syndromes. In this case, we
1526		 * always report the AFAR address to be 16-byte aligned.
1527		 *
1528		 * If we're on a Sabre, there is no SDBL, but it will always
1529		 * read as zero, so the sdbl test below will safely fail.
1530		 */
1531		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e)
1532			aflt->flt_panic = 1;
1533
1534		if (sdbh & P_DER_UE) {
1535			aflt->flt_synd = sdbh & P_DER_E_SYND;
1536			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1537			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1538			    aflt->flt_panic);
1539		}
1540		if (sdbl & P_DER_UE) {
1541			aflt->flt_synd = sdbl & P_DER_E_SYND;
1542			aflt->flt_synd |= UDBL_REG;	/* indicates UDBL */
1543			if (!(aflt->flt_stat & P_AFSR_ME))
1544				aflt->flt_addr |= 0x8;
1545			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
1546			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1547			    aflt->flt_panic);
1548		}
1549
1550		/*
1551		 * We got a UE and are panicking, save the fault PA in a known
1552		 * location so that the platform specific panic code can check
1553		 * for copyback errors.
1554		 */
1555		if (aflt->flt_panic && aflt->flt_in_memory) {
1556			panic_aflt = *aflt;
1557		}
1558	}
1559
1560	/*
1561	 * Handle EDP and LDP: Locate the line with bad parity and enqueue an
1562	 * async error for logging. For Sabre, we panic on EDP or LDP.
1563	 */
1564	if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) {
1565		spf_flt.flt_type = CPU_EDP_LDP_ERR;
1566
1567		if (t_afsr & P_AFSR_EDP)
1568			(void) strcat(pr_reason, "EDP ");
1569
1570		if (t_afsr & P_AFSR_LDP)
1571			(void) strcat(pr_reason, "LDP ");
1572
1573		/*
1574		 * Here we have no PA to work with.
1575		 * Scan each line in the ecache to look for
1576		 * the one with bad parity.
1577		 */
1578		aflt->flt_addr = AFLT_INV_ADDR;
1579		scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1580		    &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
1581		acc_afsr |= (oafsr & ~P_AFSR_WP);
1582
1583		/*
1584		 * If we found a bad PA, update the state to indicate if it is
1585		 * memory or I/O space.  This code will be important if we ever
1586		 * support cacheable frame buffers.
1587		 */
1588		if (aflt->flt_addr != AFLT_INV_ADDR) {
1589			aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
1590			    MMU_PAGESHIFT)) ? 1 : 0;
1591		}
1592
1593		if (isus2i || isus2e)
1594			aflt->flt_panic = 1;
1595
1596		cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ?
1597		    FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP,
1598		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1599		    aflt->flt_panic);
1600	}
1601
1602	/*
1603	 * Timeout and bus error handling.  There are two cases to consider:
1604	 *
1605	 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we
1606	 * have already modified the saved registers so that we will return
1607	 * from the trap to the appropriate trampoline routine; otherwise panic.
1608	 *
1609	 * (2) In user mode, we can simply use our AST mechanism to deliver
1610	 * a SIGBUS.  We do not log the occurence - processes performing
1611	 * device control would generate lots of uninteresting messages.
1612	 */
1613	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
1614		if (t_afsr & P_AFSR_TO)
1615			(void) strcat(pr_reason, "BTO ");
1616
1617		if (t_afsr & P_AFSR_BERR)
1618			(void) strcat(pr_reason, "BERR ");
1619
1620		spf_flt.flt_type = CPU_BTO_BERR_ERR;
1621		if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) {
1622			cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ?
1623			    FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR,
1624			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1625			    aflt->flt_panic);
1626		}
1627	}
1628
1629	/*
1630	 * Handle WP: WP happens when the ecache is victimized and a parity
1631	 * error was detected on a writeback.  The data in question will be
1632	 * poisoned as a UE will be written back.  The PA is not logged and
1633	 * it is possible that it doesn't belong to the trapped thread.  The
1634	 * WP trap is not fatal, but it could be fatal to someone that
1635	 * subsequently accesses the toxic page.  We set read_all_memscrub
1636	 * to force the memscrubber to read all of memory when it awakens.
1637	 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a
1638	 * UE back to poison the data.
1639	 */
1640	if (t_afsr & P_AFSR_WP) {
1641		(void) strcat(pr_reason, "WP ");
1642		if (isus2i || isus2e) {
1643			aflt->flt_panic = 1;
1644		} else {
1645			read_all_memscrub = 1;
1646		}
1647		spf_flt.flt_type = CPU_WP_ERR;
1648		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP,
1649		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1650		    aflt->flt_panic);
1651	}
1652
1653	/*
1654	 * Handle trapping CP error: In Sabre/Hummingbird, parity error in
1655	 * the ecache on a copyout due to a PCI DMA read is signaled as a CP.
1656	 * This is fatal.
1657	 */
1658
1659	if (t_afsr & P_AFSR_CP) {
1660		if (isus2i || isus2e) {
1661			(void) strcat(pr_reason, "CP ");
1662			aflt->flt_panic = 1;
1663			spf_flt.flt_type = CPU_TRAPPING_CP_ERR;
1664			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1665			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1666			    aflt->flt_panic);
1667		} else {
1668			/*
1669			 * Orphan CP: Happens due to signal integrity problem
1670			 * on a CPU, where a CP is reported, without reporting
1671			 * its associated UE. This is handled by locating the
1672			 * bad parity line and would kick off the memscrubber
1673			 * to find the UE if in memory or in another's cache.
1674			 */
1675			spf_flt.flt_type = CPU_ORPHAN_CP_ERR;
1676			(void) strcat(pr_reason, "ORPHAN_CP ");
1677
1678			/*
1679			 * Here we have no PA to work with.
1680			 * Scan each line in the ecache to look for
1681			 * the one with bad parity.
1682			 */
1683			aflt->flt_addr = AFLT_INV_ADDR;
1684			scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
1685			    &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt,
1686			    &oafsr);
1687			acc_afsr |= oafsr;
1688
1689			/*
1690			 * If we found a bad PA, update the state to indicate
1691			 * if it is memory or I/O space.
1692			 */
1693			if (aflt->flt_addr != AFLT_INV_ADDR) {
1694				aflt->flt_in_memory =
1695				    (pf_is_memory(aflt->flt_addr >>
1696				    MMU_PAGESHIFT)) ? 1 : 0;
1697			}
1698			read_all_memscrub = 1;
1699			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
1700			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1701			    aflt->flt_panic);
1702
1703		}
1704	}
1705
1706	/*
1707	 * If we queued an error other than WP or CP and we are going to return
1708	 * from the trap and the error was in user mode or inside of a
1709	 * copy routine, set AST flag so the queue will be drained before
1710	 * returning to user mode.
1711	 *
1712	 * For UE/LDP/EDP, the AST processing will SIGKILL the process
1713	 * and send an event to its process contract.
1714	 *
1715	 * For BERR/BTO, the AST processing will SIGBUS the process.  There
1716	 * will have been no error queued in this case.
1717	 */
1718	if ((t_afsr &
1719	    (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) &&
1720	    (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) {
1721			int pcb_flag = 0;
1722
1723			if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP))
1724				pcb_flag |= ASYNC_HWERR;
1725
1726			if (t_afsr & P_AFSR_BERR)
1727				pcb_flag |= ASYNC_BERR;
1728
1729			if (t_afsr & P_AFSR_TO)
1730				pcb_flag |= ASYNC_BTO;
1731
1732			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
1733			aston(curthread);
1734			action = ACTION_AST_FLAGS;
1735	}
1736
1737	/*
1738	 * In response to a deferred error, we must do one of three things:
1739	 * (1) set the AST flags, (2) trampoline, or (3) panic.  action is
1740	 * set in cases (1) and (2) - check that either action is set or
1741	 * (3) is true.
1742	 *
1743	 * On II, the WP writes poisoned data back to memory, which will
1744	 * cause a UE and a panic or reboot when read.  In this case, we
1745	 * don't need to panic at this time.  On IIi and IIe,
1746	 * aflt->flt_panic is already set above.
1747	 */
1748	ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) ||
1749	    (t_afsr & P_AFSR_WP));
1750
1751	/*
1752	 * Make a final sanity check to make sure we did not get any more async
1753	 * errors and accumulate the afsr.
1754	 */
1755	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
1756	    cpunodes[CPU->cpu_id].ecache_linesize);
1757	(void) clear_errors(&spf_flt, NULL);
1758
1759	/*
1760	 * Take care of a special case: If there is a UE in the ecache flush
1761	 * area, we'll see it in flush_ecache().  This will trigger the
1762	 * CPU_ADDITIONAL_ERRORS case below.
1763	 *
1764	 * This could occur if the original error was a UE in the flush area,
1765	 * or if the original error was an E$ error that was flushed out of
1766	 * the E$ in scan_ecache().
1767	 *
1768	 * If it's at the same address that we're already logging, then it's
1769	 * probably one of these cases.  Clear the bit so we don't trip over
1770	 * it on the additional errors case, which could cause an unnecessary
1771	 * panic.
1772	 */
1773	if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar)
1774		acc_afsr |= aflt->flt_stat & ~P_AFSR_UE;
1775	else
1776		acc_afsr |= aflt->flt_stat;
1777
1778	/*
1779	 * Check the acumulated afsr for the important bits.
1780	 * Make sure the spf_flt.flt_type value is set, and
1781	 * enque an error.
1782	 */
1783	if (acc_afsr &
1784	    (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) {
1785		if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP |
1786		    P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP |
1787		    P_AFSR_ISAP))
1788			aflt->flt_panic = 1;
1789
1790		spf_flt.flt_type = CPU_ADDITIONAL_ERR;
1791		aflt->flt_stat = acc_afsr;
1792		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN,
1793		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
1794		    aflt->flt_panic);
1795	}
1796
1797	/*
1798	 * If aflt->flt_panic is set at this point, we need to panic as the
1799	 * result of a trap at TL > 0, or an error we determined to be fatal.
1800	 * We've already enqueued the error in one of the if-clauses above,
1801	 * and it will be dequeued and logged as part of the panic flow.
1802	 */
1803	if (aflt->flt_panic) {
1804		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST,
1805		    "See previous message(s) for details", " %sError(s)",
1806		    pr_reason);
1807	}
1808
1809	/*
1810	 * Before returning, we must re-enable errors, and
1811	 * reset the caches to their boot-up state.
1812	 */
1813	set_lsu(get_lsu() | cache_boot_state);
1814	set_error_enable(EER_ENABLE);
1815}
1816
1817/*
1818 * Check for miscellaneous fatal errors and call CE_PANIC if any are seen.
1819 * This routine is shared by the CE and UE handling code.
1820 */
1821static void
1822check_misc_err(spitf_async_flt *spf_flt)
1823{
1824	struct async_flt *aflt = (struct async_flt *)spf_flt;
1825	char *fatal_str = NULL;
1826
1827	/*
1828	 * The ISAP and ETP errors are supposed to cause a POR
1829	 * from the system, so in theory we never, ever see these messages.
1830	 * ISAP, ETP and IVUE are considered to be fatal.
1831	 */
1832	if (aflt->flt_stat & P_AFSR_ISAP)
1833		fatal_str = " System Address Parity Error on";
1834	else if (aflt->flt_stat & P_AFSR_ETP)
1835		fatal_str = " Ecache Tag Parity Error on";
1836	else if (aflt->flt_stat & P_AFSR_IVUE)
1837		fatal_str = " Interrupt Vector Uncorrectable Error on";
1838	if (fatal_str != NULL) {
1839		cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS,
1840		    NULL, fatal_str);
1841	}
1842}
1843
1844/*
1845 * Routine to convert a syndrome into a syndrome code.
1846 */
1847static int
1848synd_to_synd_code(int synd_status, ushort_t synd)
1849{
1850	if (synd_status != AFLT_STAT_VALID)
1851		return (-1);
1852
1853	/*
1854	 * Use the 8-bit syndrome to index the ecc_syndrome_tab
1855	 * to get the code indicating which bit(s) is(are) bad.
1856	 */
1857	if ((synd == 0) || (synd >= SYND_TBL_SIZE))
1858		return (-1);
1859	else
1860		return (ecc_syndrome_tab[synd]);
1861}
1862
1863/* ARGSUSED */
1864int
1865cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
1866{
1867	return (ENOTSUP);
1868}
1869
1870/* ARGSUSED */
1871int
1872cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
1873{
1874	return (ENOTSUP);
1875}
1876
1877/* ARGSUSED */
1878int
1879cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
1880{
1881	return (ENOTSUP);
1882}
1883
1884/*
1885 * Routine to return a string identifying the physical name
1886 * associated with a memory/cache error.
1887 */
1888/* ARGSUSED */
1889int
1890cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr,
1891    uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status,
1892    char *buf, int buflen, int *lenp)
1893{
1894	short synd_code;
1895	int ret;
1896
1897	if (flt_in_memory) {
1898		synd_code = synd_to_synd_code(synd_status, synd);
1899		if (synd_code == -1) {
1900			ret = EINVAL;
1901		} else if (prom_get_unum(synd_code, P2ALIGN(afar, 8),
1902		    buf, buflen, lenp) != 0) {
1903			ret = EIO;
1904		} else if (*lenp <= 1) {
1905			ret = EINVAL;
1906		} else {
1907			ret = 0;
1908		}
1909	} else {
1910		ret = ENOTSUP;
1911	}
1912
1913	if (ret != 0) {
1914		buf[0] = '\0';
1915		*lenp = 0;
1916	}
1917
1918	return (ret);
1919}
1920
1921/*
1922 * Wrapper for cpu_get_mem_unum() routine that takes an
1923 * async_flt struct rather than explicit arguments.
1924 */
1925int
1926cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
1927    char *buf, int buflen, int *lenp)
1928{
1929	return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd),
1930	    aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id,
1931	    aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp));
1932}
1933
1934/*
1935 * This routine is a more generic interface to cpu_get_mem_unum(),
1936 * that may be used by other modules (e.g. mm).
1937 */
1938int
1939cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
1940    char *buf, int buflen, int *lenp)
1941{
1942	int synd_status, flt_in_memory, ret;
1943	char unum[UNUM_NAMLEN];
1944
1945	/*
1946	 * Check for an invalid address.
1947	 */
1948	if (afar == (uint64_t)-1)
1949		return (ENXIO);
1950
1951	if (synd == (uint64_t)-1)
1952		synd_status = AFLT_STAT_INVALID;
1953	else
1954		synd_status = AFLT_STAT_VALID;
1955
1956	flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0;
1957
1958	if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
1959	    CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp))
1960	    != 0)
1961		return (ret);
1962
1963	if (*lenp >= buflen)
1964		return (ENAMETOOLONG);
1965
1966	(void) strncpy(buf, unum, buflen);
1967
1968	return (0);
1969}
1970
1971/*
1972 * Routine to return memory information associated
1973 * with a physical address and syndrome.
1974 */
1975/* ARGSUSED */
1976int
1977cpu_get_mem_info(uint64_t synd, uint64_t afar,
1978    uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1979    int *segsp, int *banksp, int *mcidp)
1980{
1981	return (ENOTSUP);
1982}
1983
1984/*
1985 * Routine to return a string identifying the physical
1986 * name associated with a cpuid.
1987 */
1988/* ARGSUSED */
1989int
1990cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
1991{
1992	return (ENOTSUP);
1993}
1994
1995/*
1996 * This routine returns the size of the kernel's FRU name buffer.
1997 */
1998size_t
1999cpu_get_name_bufsize()
2000{
2001	return (UNUM_NAMLEN);
2002}
2003
2004/*
2005 * Cpu specific log func for UEs.
2006 */
2007static void
2008log_ue_err(struct async_flt *aflt, char *unum)
2009{
2010	spitf_async_flt *spf_flt = (spitf_async_flt *)aflt;
2011	int len = 0;
2012
2013#ifdef DEBUG
2014	int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0;
2015
2016	/*
2017	 * Paranoid Check for priv mismatch
2018	 * Only applicable for UEs
2019	 */
2020	if (afsr_priv != aflt->flt_priv) {
2021		/*
2022		 * The priv bits in %tstate and %afsr did not match; we expect
2023		 * this to be very rare, so flag it with a message.
2024		 */
2025		cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL,
2026		    ": PRIV bit in TSTATE and AFSR mismatched; "
2027		    "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0);
2028
2029		/* update saved afsr to reflect the correct priv */
2030		aflt->flt_stat &= ~P_AFSR_PRIV;
2031		if (aflt->flt_priv)
2032			aflt->flt_stat |= P_AFSR_PRIV;
2033	}
2034#endif /* DEBUG */
2035
2036	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum,
2037	    UNUM_NAMLEN, &len);
2038
2039	cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum,
2040	    " Uncorrectable Memory Error on");
2041
2042	if (SYND(aflt->flt_synd) == 0x3) {
2043		cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL,
2044		    " Syndrome 0x3 indicates that this may not be a "
2045		    "memory module problem");
2046	}
2047
2048	if (aflt->flt_in_memory)
2049		cpu_log_ecmem_info(spf_flt);
2050}
2051
2052
2053/*
2054 * The cpu_async_log_err() function is called via the ue_drain() function to
2055 * handle logging for CPU events that are dequeued.  As such, it can be invoked
2056 * from softint context, from AST processing in the trap() flow, or from the
2057 * panic flow.  We decode the CPU-specific data, and log appropriate messages.
2058 */
2059static void
2060cpu_async_log_err(void *flt)
2061{
2062	spitf_async_flt *spf_flt = (spitf_async_flt *)flt;
2063	struct async_flt *aflt = (struct async_flt *)flt;
2064	char unum[UNUM_NAMLEN];
2065	char *space;
2066	char *ecache_scrub_logstr = NULL;
2067
2068	switch (spf_flt->flt_type) {
2069	case CPU_UE_ERR:
2070		/*
2071		 * We want to skip logging only if ALL the following
2072		 * conditions are true:
2073		 *
2074		 *	1. We are not panicking
2075		 *	2. There is only one error
2076		 *	3. That error is a memory error
2077		 *	4. The error is caused by the memory scrubber (in
2078		 *	   which case the error will have occurred under
2079		 *	   on_trap protection)
2080		 *	5. The error is on a retired page
2081		 *
2082		 * Note 1: AFLT_PROT_EC is used places other than the memory
2083		 * scrubber.  However, none of those errors should occur
2084		 * on a retired page.
2085		 *
2086		 * Note 2: In the CE case, these errors are discarded before
2087		 * the errorq.  In the UE case, we must wait until now --
2088		 * softcall() grabs a mutex, which we can't do at a high PIL.
2089		 */
2090		if (!panicstr &&
2091		    (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE &&
2092		    aflt->flt_prot == AFLT_PROT_EC) {
2093			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2094				/* Zero the address to clear the error */
2095				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2096				return;
2097			}
2098		}
2099
2100		/*
2101		 * Log the UE and check for causes of this UE error that
2102		 * don't cause a trap (Copyback error).  cpu_async_error()
2103		 * has already checked the i/o buses for us.
2104		 */
2105		log_ue_err(aflt, unum);
2106		if (aflt->flt_in_memory)
2107			cpu_check_allcpus(aflt);
2108		break;
2109
2110	case CPU_EDP_LDP_ERR:
2111		if (aflt->flt_stat & P_AFSR_EDP)
2112			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2113			    NULL, " EDP event on");
2114
2115		if (aflt->flt_stat & P_AFSR_LDP)
2116			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
2117			    NULL, " LDP event on");
2118
2119		/* Log ecache info if exist */
2120		if (spf_flt->flt_ec_lcnt > 0) {
2121			cpu_log_ecmem_info(spf_flt);
2122
2123			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2124			    NULL, " AFAR was derived from E$Tag");
2125		} else {
2126			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
2127			    NULL, " No error found in ecache (No fault "
2128			    "PA available)");
2129		}
2130		break;
2131
2132	case CPU_WP_ERR:
2133		/*
2134		 * If the memscrub thread hasn't yet read
2135		 * all of memory, as we requested in the
2136		 * trap handler, then give it a kick to
2137		 * make sure it does.
2138		 */
2139		if (!isus2i && !isus2e && read_all_memscrub)
2140			memscrub_run();
2141
2142		cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL,
2143		    " WP event on");
2144		return;
2145
2146	case CPU_BTO_BERR_ERR:
2147		/*
2148		 * A bus timeout or error occurred that was in user mode or not
2149		 * in a protected kernel code region.
2150		 */
2151		if (aflt->flt_stat & P_AFSR_BERR) {
2152			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2153			    spf_flt, BERRTO_LFLAGS, NULL,
2154			    " Bus Error on System Bus in %s mode from",
2155			    aflt->flt_priv ? "privileged" : "user");
2156		}
2157
2158		if (aflt->flt_stat & P_AFSR_TO) {
2159			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
2160			    spf_flt, BERRTO_LFLAGS, NULL,
2161			    " Timeout on System Bus in %s mode from",
2162			    aflt->flt_priv ? "privileged" : "user");
2163		}
2164
2165		return;
2166
2167	case CPU_PANIC_CP_ERR:
2168		/*
2169		 * Process the Copyback (CP) error info (if any) obtained from
2170		 * polling all the cpus in the panic flow. This case is only
2171		 * entered if we are panicking.
2172		 */
2173		ASSERT(panicstr != NULL);
2174		ASSERT(aflt->flt_id == panic_aflt.flt_id);
2175
2176		/* See which space - this info may not exist */
2177		if (panic_aflt.flt_status & ECC_D_TRAP)
2178			space = "Data ";
2179		else if (panic_aflt.flt_status & ECC_I_TRAP)
2180			space = "Instruction ";
2181		else
2182			space = "";
2183
2184		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2185		    " AFAR was derived from UE report,"
2186		    " CP event on CPU%d (caused %saccess error on %s%d)",
2187		    aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ?
2188		    "IOBUS" : "CPU", panic_aflt.flt_bus_id);
2189
2190		if (spf_flt->flt_ec_lcnt > 0)
2191			cpu_log_ecmem_info(spf_flt);
2192		else
2193			cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST,
2194			    NULL, " No cache dump available");
2195
2196		return;
2197
2198	case CPU_TRAPPING_CP_ERR:
2199		/*
2200		 * For sabre only.  This is a copyback ecache parity error due
2201		 * to a PCI DMA read.  We should be panicking if we get here.
2202		 */
2203		ASSERT(panicstr != NULL);
2204		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
2205		    " AFAR was derived from UE report,"
2206		    " CP event on CPU%d (caused Data access error "
2207		    "on PCIBus)", aflt->flt_inst);
2208		return;
2209
2210		/*
2211		 * We log the ecache lines of the following states,
2212		 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and
2213		 * dirty_bad_busy if ecache_scrub_verbose is set and panic
2214		 * in addition to logging if ecache_scrub_panic is set.
2215		 */
2216	case CPU_BADLINE_CI_ERR:
2217		ecache_scrub_logstr = "CBI";
2218		/* FALLTHRU */
2219
2220	case CPU_BADLINE_CB_ERR:
2221		if (ecache_scrub_logstr == NULL)
2222			ecache_scrub_logstr = "CBB";
2223		/* FALLTHRU */
2224
2225	case CPU_BADLINE_DI_ERR:
2226		if (ecache_scrub_logstr == NULL)
2227			ecache_scrub_logstr = "DBI";
2228		/* FALLTHRU */
2229
2230	case CPU_BADLINE_DB_ERR:
2231		if (ecache_scrub_logstr == NULL)
2232			ecache_scrub_logstr = "DBB";
2233
2234		cpu_aflt_log(CE_NOTE, 2, spf_flt,
2235		    (CPU_ERRID_FIRST | CPU_FLTCPU), NULL,
2236		    " %s event on", ecache_scrub_logstr);
2237		cpu_log_ecmem_info(spf_flt);
2238
2239		return;
2240
2241	case CPU_ORPHAN_CP_ERR:
2242		/*
2243		 * Orphan CPs, where the CP bit is set, but when a CPU
2244		 * doesn't report a UE.
2245		 */
2246		if (read_all_memscrub)
2247			memscrub_run();
2248
2249		cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU),
2250		    NULL, " Orphan CP event on");
2251
2252		/* Log ecache info if exist */
2253		if (spf_flt->flt_ec_lcnt > 0)
2254			cpu_log_ecmem_info(spf_flt);
2255		else
2256			cpu_aflt_log(CE_NOTE, 2, spf_flt,
2257			    (CP_LFLAGS | CPU_FLTCPU), NULL,
2258			    " No error found in ecache (No fault "
2259			    "PA available");
2260		return;
2261
2262	case CPU_ECACHE_ADDR_PAR_ERR:
2263		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2264		    " E$ Tag Address Parity error on");
2265		cpu_log_ecmem_info(spf_flt);
2266		return;
2267
2268	case CPU_ECACHE_STATE_ERR:
2269		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2270		    " E$ Tag State Parity error on");
2271		cpu_log_ecmem_info(spf_flt);
2272		return;
2273
2274	case CPU_ECACHE_TAG_ERR:
2275		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2276		    " E$ Tag scrub event on");
2277		cpu_log_ecmem_info(spf_flt);
2278		return;
2279
2280	case CPU_ECACHE_ETP_ETS_ERR:
2281		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
2282		    " AFSR.ETP is set and AFSR.ETS is zero on");
2283		cpu_log_ecmem_info(spf_flt);
2284		return;
2285
2286
2287	case CPU_ADDITIONAL_ERR:
2288		cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL,
2289		    " Additional errors detected during error processing on");
2290		return;
2291
2292	default:
2293		cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown "
2294		    "fault type %x", (void *)spf_flt, spf_flt->flt_type);
2295		return;
2296	}
2297
2298	/* ... fall through from the UE, EDP, or LDP cases */
2299
2300	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2301		if (!panicstr) {
2302			(void) page_retire(aflt->flt_addr, PR_UE);
2303		} else {
2304			/*
2305			 * Clear UEs on panic so that we don't
2306			 * get haunted by them during panic or
2307			 * after reboot
2308			 */
2309			clearphys(P2ALIGN(aflt->flt_addr, 64),
2310			    cpunodes[CPU->cpu_id].ecache_size,
2311			    cpunodes[CPU->cpu_id].ecache_linesize);
2312
2313			(void) clear_errors(NULL, NULL);
2314		}
2315	}
2316
2317	/*
2318	 * Log final recover message
2319	 */
2320	if (!panicstr) {
2321		if (!aflt->flt_priv) {
2322			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2323			    NULL, " Above Error is in User Mode"
2324			    "\n    and is fatal: "
2325			    "will SIGKILL process and notify contract");
2326		} else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) {
2327			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2328			    NULL, " Above Error detected while dumping core;"
2329			    "\n    core file will be truncated");
2330		} else if (aflt->flt_prot == AFLT_PROT_COPY) {
2331			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
2332			    NULL, " Above Error is due to Kernel access"
2333			    "\n    to User space and is fatal: "
2334			    "will SIGKILL process and notify contract");
2335		} else if (aflt->flt_prot == AFLT_PROT_EC) {
2336			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL,
2337			    " Above Error detected by protected Kernel code"
2338			    "\n    that will try to clear error from system");
2339		}
2340	}
2341}
2342
2343
2344/*
2345 * Check all cpus for non-trapping UE-causing errors
2346 * In Ultra I/II, we look for copyback errors (CPs)
2347 */
2348void
2349cpu_check_allcpus(struct async_flt *aflt)
2350{
2351	spitf_async_flt cp;
2352	spitf_async_flt *spf_cpflt = &cp;
2353	struct async_flt *cpflt = (struct async_flt *)&cp;
2354	int pix;
2355
2356	cpflt->flt_id = aflt->flt_id;
2357	cpflt->flt_addr = aflt->flt_addr;
2358
2359	for (pix = 0; pix < NCPU; pix++) {
2360		if (CPU_XCALL_READY(pix)) {
2361			xc_one(pix, (xcfunc_t *)get_cpu_status,
2362			    (uint64_t)cpflt, 0);
2363
2364			if (cpflt->flt_stat & P_AFSR_CP) {
2365				char *space;
2366
2367				/* See which space - this info may not exist */
2368				if (aflt->flt_status & ECC_D_TRAP)
2369					space = "Data ";
2370				else if (aflt->flt_status & ECC_I_TRAP)
2371					space = "Instruction ";
2372				else
2373					space = "";
2374
2375				cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS,
2376				    NULL, " AFAR was derived from UE report,"
2377				    " CP event on CPU%d (caused %saccess "
2378				    "error on %s%d)", pix, space,
2379				    (aflt->flt_status & ECC_IOBUS) ?
2380				    "IOBUS" : "CPU", aflt->flt_bus_id);
2381
2382				if (spf_cpflt->flt_ec_lcnt > 0)
2383					cpu_log_ecmem_info(spf_cpflt);
2384				else
2385					cpu_aflt_log(CE_WARN, 2, spf_cpflt,
2386					    CPU_ERRID_FIRST, NULL,
2387					    " No cache dump available");
2388			}
2389		}
2390	}
2391}
2392
2393#ifdef DEBUG
2394int test_mp_cp = 0;
2395#endif
2396
2397/*
2398 * Cross-call callback routine to tell a CPU to read its own %afsr to check
2399 * for copyback errors and capture relevant information.
2400 */
2401static uint_t
2402get_cpu_status(uint64_t arg)
2403{
2404	struct async_flt *aflt = (struct async_flt *)arg;
2405	spitf_async_flt *spf_flt = (spitf_async_flt *)arg;
2406	uint64_t afsr;
2407	uint32_t ec_idx;
2408	uint64_t sdbh, sdbl;
2409	int i;
2410	uint32_t ec_set_size;
2411	uchar_t valid;
2412	ec_data_t ec_data[8];
2413	uint64_t ec_tag, flt_addr_tag, oafsr;
2414	uint64_t *acc_afsr = NULL;
2415
2416	get_asyncflt(&afsr);
2417	if (CPU_PRIVATE(CPU) != NULL) {
2418		acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2419		afsr |= *acc_afsr;
2420		*acc_afsr = 0;
2421	}
2422
2423#ifdef DEBUG
2424	if (test_mp_cp)
2425		afsr |= P_AFSR_CP;
2426#endif
2427	aflt->flt_stat = afsr;
2428
2429	if (afsr & P_AFSR_CP) {
2430		/*
2431		 * Capture the UDBs
2432		 */
2433		get_udb_errors(&sdbh, &sdbl);
2434		spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF);
2435		spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF);
2436
2437		/*
2438		 * Clear CP bit before capturing ecache data
2439		 * and AFSR info.
2440		 */
2441		set_asyncflt(P_AFSR_CP);
2442
2443		/*
2444		 * See if we can capture the ecache line for the
2445		 * fault PA.
2446		 *
2447		 * Return a valid matching ecache line, if any.
2448		 * Otherwise, return the first matching ecache
2449		 * line marked invalid.
2450		 */
2451		flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift;
2452		ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
2453		    ecache_associativity;
2454		spf_flt->flt_ec_lcnt = 0;
2455
2456		for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size);
2457		    i < ecache_associativity; i++, ec_idx += ec_set_size) {
2458			get_ecache_dtag(P2ALIGN(ec_idx, 64),
2459			    (uint64_t *)&ec_data[0], &ec_tag, &oafsr,
2460			    acc_afsr);
2461
2462			if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag)
2463				continue;
2464
2465			valid = cpu_ec_state_valid &
2466			    (uchar_t)((ec_tag & cpu_ec_state_mask) >>
2467			    cpu_ec_state_shift);
2468
2469			if (valid || spf_flt->flt_ec_lcnt == 0) {
2470				spf_flt->flt_ec_tag = ec_tag;
2471				bcopy(&ec_data, &spf_flt->flt_ec_data,
2472				    sizeof (ec_data));
2473				spf_flt->flt_ec_lcnt = 1;
2474
2475				if (valid)
2476					break;
2477			}
2478		}
2479	}
2480	return (0);
2481}
2482
2483/*
2484 * CPU-module callback for the non-panicking CPUs.  This routine is invoked
2485 * from panic_idle() as part of the other CPUs stopping themselves when a
2486 * panic occurs.  We need to be VERY careful what we do here, since panicstr
2487 * is NOT set yet and we cannot blow through locks.  If panic_aflt is set
2488 * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for
2489 * CP error information.
2490 */
2491void
2492cpu_async_panic_callb(void)
2493{
2494	spitf_async_flt cp;
2495	struct async_flt *aflt = (struct async_flt *)&cp;
2496	uint64_t *scrub_afsr;
2497
2498	if (panic_aflt.flt_id != 0) {
2499		aflt->flt_addr = panic_aflt.flt_addr;
2500		(void) get_cpu_status((uint64_t)aflt);
2501
2502		if (CPU_PRIVATE(CPU) != NULL) {
2503			scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2504			if (*scrub_afsr & P_AFSR_CP) {
2505				aflt->flt_stat |= *scrub_afsr;
2506				*scrub_afsr = 0;
2507			}
2508		}
2509		if (aflt->flt_stat & P_AFSR_CP) {
2510			aflt->flt_id = panic_aflt.flt_id;
2511			aflt->flt_panic = 1;
2512			aflt->flt_inst = CPU->cpu_id;
2513			aflt->flt_class = CPU_FAULT;
2514			cp.flt_type = CPU_PANIC_CP_ERR;
2515			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
2516			    (void *)&cp, sizeof (cp), ue_queue,
2517			    aflt->flt_panic);
2518		}
2519	}
2520}
2521
2522/*
2523 * Turn off all cpu error detection, normally only used for panics.
2524 */
2525void
2526cpu_disable_errors(void)
2527{
2528	xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE);
2529}
2530
2531/*
2532 * Enable errors.
2533 */
2534void
2535cpu_enable_errors(void)
2536{
2537	xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE);
2538}
2539
2540static void
2541cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
2542{
2543	uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8);
2544	int i, loop = 1;
2545	ushort_t ecc_0;
2546	uint64_t paddr;
2547	uint64_t data;
2548
2549	if (verbose)
2550		loop = 8;
2551	for (i = 0; i < loop; i++) {
2552		paddr = aligned_addr + (i * 8);
2553		data = lddphys(paddr);
2554		if (verbose) {
2555			if (ce_err) {
2556				ecc_0 = ecc_gen((uint32_t)(data>>32),
2557				    (uint32_t)data);
2558				cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2559				    NULL, "    Paddr 0x%" PRIx64 ", "
2560				    "Data 0x%08x.%08x, ECC 0x%x", paddr,
2561				    (uint32_t)(data>>32), (uint32_t)data,
2562				    ecc_0);
2563			} else {
2564				cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
2565				    NULL, "    Paddr 0x%" PRIx64 ", "
2566				    "Data 0x%08x.%08x", paddr,
2567				    (uint32_t)(data>>32), (uint32_t)data);
2568			}
2569		}
2570	}
2571}
2572
2573static struct {		/* sec-ded-s4ed ecc code */
2574	uint_t hi, lo;
2575} ecc_code[8] = {
2576	{ 0xee55de23U, 0x16161161U },
2577	{ 0x55eede93U, 0x61612212U },
2578	{ 0xbb557b8cU, 0x49494494U },
2579	{ 0x55bb7b6cU, 0x94948848U },
2580	{ 0x16161161U, 0xee55de23U },
2581	{ 0x61612212U, 0x55eede93U },
2582	{ 0x49494494U, 0xbb557b8cU },
2583	{ 0x94948848U, 0x55bb7b6cU }
2584};
2585
2586static ushort_t
2587ecc_gen(uint_t high_bytes, uint_t low_bytes)
2588{
2589	int i, j;
2590	uchar_t checker, bit_mask;
2591	struct {
2592		uint_t hi, lo;
2593	} hex_data, masked_data[8];
2594
2595	hex_data.hi = high_bytes;
2596	hex_data.lo = low_bytes;
2597
2598	/* mask out bits according to sec-ded-s4ed ecc code */
2599	for (i = 0; i < 8; i++) {
2600		masked_data[i].hi = hex_data.hi & ecc_code[i].hi;
2601		masked_data[i].lo = hex_data.lo & ecc_code[i].lo;
2602	}
2603
2604	/*
2605	 * xor all bits in masked_data[i] to get bit_i of checker,
2606	 * where i = 0 to 7
2607	 */
2608	checker = 0;
2609	for (i = 0; i < 8; i++) {
2610		bit_mask = 1 << i;
2611		for (j = 0; j < 32; j++) {
2612			if (masked_data[i].lo & 1) checker ^= bit_mask;
2613			if (masked_data[i].hi & 1) checker ^= bit_mask;
2614			masked_data[i].hi >>= 1;
2615			masked_data[i].lo >>= 1;
2616		}
2617	}
2618	return (checker);
2619}
2620
2621/*
2622 * Flush the entire ecache using displacement flush by reading through a
2623 * physical address range as large as the ecache.
2624 */
2625void
2626cpu_flush_ecache(void)
2627{
2628	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2629	    cpunodes[CPU->cpu_id].ecache_linesize);
2630}
2631
2632/*
2633 * read and display the data in the cache line where the
2634 * original ce error occurred.
2635 * This routine is mainly used for debugging new hardware.
2636 */
2637void
2638read_ecc_data(struct async_flt *ecc, short verbose, short ce_err)
2639{
2640	kpreempt_disable();
2641	/* disable ECC error traps */
2642	set_error_enable(EER_ECC_DISABLE);
2643
2644	/*
2645	 * flush the ecache
2646	 * read the data
2647	 * check to see if an ECC error occured
2648	 */
2649	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
2650	    cpunodes[CPU->cpu_id].ecache_linesize);
2651	set_lsu(get_lsu() | cache_boot_state);
2652	cpu_read_paddr(ecc, verbose, ce_err);
2653	(void) check_ecc(ecc);
2654
2655	/* enable ECC error traps */
2656	set_error_enable(EER_ENABLE);
2657	kpreempt_enable();
2658}
2659
2660/*
2661 * Check the AFSR bits for UE/CE persistence.
2662 * If UE or CE errors are detected, the routine will
2663 * clears all the AFSR sticky bits (except CP for
2664 * spitfire/blackbird) and the UDBs.
2665 * if ce_debug or ue_debug is set, log any ue/ce errors detected.
2666 */
2667static int
2668check_ecc(struct async_flt *ecc)
2669{
2670	uint64_t t_afsr;
2671	uint64_t t_afar;
2672	uint64_t udbh;
2673	uint64_t udbl;
2674	ushort_t udb;
2675	int persistent = 0;
2676
2677	/*
2678	 * Capture the AFSR, AFAR and UDBs info
2679	 */
2680	get_asyncflt(&t_afsr);
2681	get_asyncaddr(&t_afar);
2682	t_afar &= SABRE_AFAR_PA;
2683	get_udb_errors(&udbh, &udbl);
2684
2685	if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) {
2686		/*
2687		 * Clear the errors
2688		 */
2689		clr_datapath();
2690
2691		if (isus2i || isus2e)
2692			set_asyncflt(t_afsr);
2693		else
2694			set_asyncflt(t_afsr & ~P_AFSR_CP);
2695
2696		/*
2697		 * determine whether to check UDBH or UDBL for persistence
2698		 */
2699		if (ecc->flt_synd & UDBL_REG) {
2700			udb = (ushort_t)udbl;
2701			t_afar |= 0x8;
2702		} else {
2703			udb = (ushort_t)udbh;
2704		}
2705
2706		if (ce_debug || ue_debug) {
2707			spitf_async_flt spf_flt; /* for logging */
2708			struct async_flt *aflt =
2709			    (struct async_flt *)&spf_flt;
2710
2711			/* Package the info nicely in the spf_flt struct */
2712			bzero(&spf_flt, sizeof (spitf_async_flt));
2713			aflt->flt_stat = t_afsr;
2714			aflt->flt_addr = t_afar;
2715			spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF);
2716			spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF);
2717
2718			cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR |
2719			    CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL,
2720			    " check_ecc: Dumping captured error states ...");
2721		}
2722
2723		/*
2724		 * if the fault addresses don't match, not persistent
2725		 */
2726		if (t_afar != ecc->flt_addr) {
2727			return (persistent);
2728		}
2729
2730		/*
2731		 * check for UE persistence
2732		 * since all DIMMs in the bank are identified for a UE,
2733		 * there's no reason to check the syndrome
2734		 */
2735		if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) {
2736			persistent = 1;
2737		}
2738
2739		/*
2740		 * check for CE persistence
2741		 */
2742		if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) {
2743			if ((udb & P_DER_E_SYND) ==
2744			    (ecc->flt_synd & P_DER_E_SYND)) {
2745				persistent = 1;
2746			}
2747		}
2748	}
2749	return (persistent);
2750}
2751
2752#ifdef HUMMINGBIRD
2753#define	HB_FULL_DIV		1
2754#define	HB_HALF_DIV		2
2755#define	HB_LOWEST_DIV		8
2756#define	HB_ECLK_INVALID		0xdeadbad
2757static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = {
2758	HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID,
2759	HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID,
2760	HB_ECLK_8 };
2761
2762#define	HB_SLOW_DOWN		0
2763#define	HB_SPEED_UP		1
2764
2765#define	SET_ESTAR_MODE(mode)					\
2766	stdphysio(HB_ESTAR_MODE, (mode));			\
2767	/*							\
2768	 * PLL logic requires minimum of 16 clock		\
2769	 * cycles to lock to the new clock speed.		\
2770	 * Wait 1 usec to satisfy this requirement.		\
2771	 */							\
2772	drv_usecwait(1);
2773
2774#define	CHANGE_REFRESH_COUNT(direction, cur_div, new_div)	\
2775{								\
2776	volatile uint64_t data;					\
2777	uint64_t count, new_count;				\
2778	clock_t delay;						\
2779	data = lddphysio(HB_MEM_CNTRL0);			\
2780	count = (data & HB_REFRESH_COUNT_MASK) >>		\
2781	    HB_REFRESH_COUNT_SHIFT;				\
2782	new_count = (HB_REFRESH_INTERVAL *			\
2783	    cpunodes[CPU->cpu_id].clock_freq) /			\
2784	    (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\
2785	data = (data & ~HB_REFRESH_COUNT_MASK) |		\
2786	    (new_count << HB_REFRESH_COUNT_SHIFT);		\
2787	stdphysio(HB_MEM_CNTRL0, data);				\
2788	data = lddphysio(HB_MEM_CNTRL0);			\
2789	/*							\
2790	 * If we are slowing down the cpu and Memory		\
2791	 * Self Refresh is not enabled, it is required		\
2792	 * to wait for old refresh count to count-down and	\
2793	 * new refresh count to go into effect (let new value	\
2794	 * counts down once).					\
2795	 */							\
2796	if ((direction) == HB_SLOW_DOWN &&			\
2797	    (data & HB_SELF_REFRESH_MASK) == 0) {		\
2798		/*						\
2799		 * Each count takes 64 cpu clock cycles		\
2800		 * to decrement.  Wait for current refresh	\
2801		 * count plus new refresh count at current	\
2802		 * cpu speed to count down to zero.  Round	\
2803		 * up the delay time.				\
2804		 */						\
2805		delay = ((HB_REFRESH_CLOCKS_PER_COUNT *		\
2806		    (count + new_count) * MICROSEC * (cur_div)) /\
2807		    cpunodes[CPU->cpu_id].clock_freq) + 1;	\
2808		drv_usecwait(delay);				\
2809	}							\
2810}
2811
2812#define	SET_SELF_REFRESH(bit)					\
2813{								\
2814	volatile uint64_t data;					\
2815	data = lddphysio(HB_MEM_CNTRL0);			\
2816	data = (data & ~HB_SELF_REFRESH_MASK) |			\
2817	    ((bit) << HB_SELF_REFRESH_SHIFT);			\
2818	stdphysio(HB_MEM_CNTRL0, data);				\
2819	data = lddphysio(HB_MEM_CNTRL0);			\
2820}
2821#endif	/* HUMMINGBIRD */
2822
2823/* ARGSUSED */
2824void
2825cpu_change_speed(uint64_t new_divisor, uint64_t arg2)
2826{
2827#ifdef HUMMINGBIRD
2828	uint64_t cur_mask, cur_divisor = 0;
2829	volatile uint64_t reg;
2830	processor_info_t *pi = &(CPU->cpu_type_info);
2831	int index;
2832
2833	if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) ||
2834	    (hb_eclk[new_divisor] == HB_ECLK_INVALID)) {
2835		cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx",
2836		    new_divisor);
2837		return;
2838	}
2839
2840	reg = lddphysio(HB_ESTAR_MODE);
2841	cur_mask = reg & HB_ECLK_MASK;
2842	for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) {
2843		if (hb_eclk[index] == cur_mask) {
2844			cur_divisor = index;
2845			break;
2846		}
2847	}
2848
2849	if (cur_divisor == 0)
2850		cmn_err(CE_PANIC, "cpu_change_speed: current divisor "
2851		    "can't be determined!");
2852
2853	/*
2854	 * If we are already at the requested divisor speed, just
2855	 * return.
2856	 */
2857	if (cur_divisor == new_divisor)
2858		return;
2859
2860	if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) {
2861		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2862		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2863		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2864
2865	} else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2866		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2867		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2868		/* LINTED: E_FALSE_LOGICAL_EXPR */
2869		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2870
2871	} else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) {
2872		/*
2873		 * Transition to 1/2 speed first, then to
2874		 * lower speed.
2875		 */
2876		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV);
2877		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2878		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
2879
2880		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor);
2881		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2882
2883	} else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
2884		/*
2885		 * Transition to 1/2 speed first, then to
2886		 * full speed.
2887		 */
2888		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
2889		/* LINTED: E_FALSE_LOGICAL_EXPR */
2890		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV);
2891
2892		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
2893		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2894		/* LINTED: E_FALSE_LOGICAL_EXPR */
2895		CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor);
2896
2897	} else if (cur_divisor < new_divisor) {
2898		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
2899		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2900
2901	} else if (cur_divisor > new_divisor) {
2902		SET_ESTAR_MODE(hb_eclk[new_divisor]);
2903		/* LINTED: E_FALSE_LOGICAL_EXPR */
2904		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
2905	}
2906	CPU->cpu_m.divisor = (uchar_t)new_divisor;
2907	cpu_set_curr_clock(((uint64_t)pi->pi_clock * 1000000) / new_divisor);
2908#endif
2909}
2910
2911/*
2912 * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird,
2913 * we clear all the sticky bits. If a non-null pointer to a async fault
2914 * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs)
2915 * info will be returned in the structure.  If a non-null pointer to a
2916 * uint64_t is passed in, this will be updated if the CP bit is set in the
2917 * AFSR.  The afsr will be returned.
2918 */
2919static uint64_t
2920clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr)
2921{
2922	struct async_flt *aflt = (struct async_flt *)spf_flt;
2923	uint64_t afsr;
2924	uint64_t udbh, udbl;
2925
2926	get_asyncflt(&afsr);
2927
2928	if ((acc_afsr != NULL) && (afsr & P_AFSR_CP))
2929		*acc_afsr |= afsr;
2930
2931	if (spf_flt != NULL) {
2932		aflt->flt_stat = afsr;
2933		get_asyncaddr(&aflt->flt_addr);
2934		aflt->flt_addr &= SABRE_AFAR_PA;
2935
2936		get_udb_errors(&udbh, &udbl);
2937		spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF);
2938		spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF);
2939	}
2940
2941	set_asyncflt(afsr);		/* clear afsr */
2942	clr_datapath();			/* clear udbs */
2943	return (afsr);
2944}
2945
2946/*
2947 * Scan the ecache to look for bad lines.  If found, the afsr, afar, e$ data
2948 * tag of the first bad line will be returned. We also return the old-afsr
2949 * (before clearing the sticky bits). The linecnt data will be updated to
2950 * indicate the number of bad lines detected.
2951 */
2952static void
2953scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data,
2954    uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr)
2955{
2956	ec_data_t t_ecdata[8];
2957	uint64_t t_etag, oafsr;
2958	uint64_t pa = AFLT_INV_ADDR;
2959	uint32_t i, j, ecache_sz;
2960	uint64_t acc_afsr = 0;
2961	uint64_t *cpu_afsr = NULL;
2962
2963	if (CPU_PRIVATE(CPU) != NULL)
2964		cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
2965
2966	*linecnt = 0;
2967	ecache_sz = cpunodes[CPU->cpu_id].ecache_size;
2968
2969	for (i = 0; i < ecache_sz; i += 64) {
2970		get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr,
2971		    cpu_afsr);
2972		acc_afsr |= oafsr;
2973
2974		/*
2975		 * Scan through the whole 64 bytes line in 8 8-byte chunks
2976		 * looking for the first occurrence of an EDP error.  The AFSR
2977		 * info is captured for each 8-byte chunk.  Note that for
2978		 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in
2979		 * 16-byte chunk granularity (i.e. the AFSR will be the same
2980		 * for the high and low 8-byte words within the 16-byte chunk).
2981		 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte
2982		 * granularity and only PSYND bits [7:0] are used.
2983		 */
2984		for (j = 0; j < 8; j++) {
2985			ec_data_t *ecdptr = &t_ecdata[j];
2986
2987			if (ecdptr->ec_afsr & P_AFSR_EDP) {
2988				uint64_t errpa;
2989				ushort_t psynd;
2990				uint32_t ec_set_size = ecache_sz /
2991				    ecache_associativity;
2992
2993				/*
2994				 * For Spitfire/Blackbird, we need to look at
2995				 * the PSYND to make sure that this 8-byte chunk
2996				 * is the right one.  PSYND bits [15:8] belong
2997				 * to the upper 8-byte (even) chunk.  Bits
2998				 * [7:0] belong to the lower 8-byte chunk (odd).
2999				 */
3000				psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
3001				if (!isus2i && !isus2e) {
3002					if (j & 0x1)
3003						psynd = psynd & 0xFF;
3004					else
3005						psynd = psynd >> 8;
3006
3007					if (!psynd)
3008						continue; /* wrong chunk */
3009				}
3010
3011				/* Construct the PA */
3012				errpa = ((t_etag & cpu_ec_tag_mask) <<
3013				    cpu_ec_tag_shift) | ((i | (j << 3)) %
3014				    ec_set_size);
3015
3016				/* clean up the cache line */
3017				flushecacheline(P2ALIGN(errpa, 64),
3018				    cpunodes[CPU->cpu_id].ecache_size);
3019
3020				oafsr = clear_errors(NULL, cpu_afsr);
3021				acc_afsr |= oafsr;
3022
3023				(*linecnt)++;
3024
3025				/*
3026				 * Capture the PA for the first bad line found.
3027				 * Return the ecache dump and tag info.
3028				 */
3029				if (pa == AFLT_INV_ADDR) {
3030					int k;
3031
3032					pa = errpa;
3033					for (k = 0; k < 8; k++)
3034						ecache_data[k] = t_ecdata[k];
3035					*ecache_tag = t_etag;
3036				}
3037				break;
3038			}
3039		}
3040	}
3041	*t_afar = pa;
3042	*t_afsr = acc_afsr;
3043}
3044
3045static void
3046cpu_log_ecmem_info(spitf_async_flt *spf_flt)
3047{
3048	struct async_flt *aflt = (struct async_flt *)spf_flt;
3049	uint64_t ecache_tag = spf_flt->flt_ec_tag;
3050	char linestr[30];
3051	char *state_str;
3052	int i;
3053
3054	/*
3055	 * Check the ecache tag to make sure it
3056	 * is valid. If invalid, a memory dump was
3057	 * captured instead of a ecache dump.
3058	 */
3059	if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) {
3060		uchar_t eparity = (uchar_t)
3061		    ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift);
3062
3063		uchar_t estate = (uchar_t)
3064		    ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift);
3065
3066		if (estate == cpu_ec_state_shr)
3067			state_str = "Shared";
3068		else if (estate == cpu_ec_state_exl)
3069			state_str = "Exclusive";
3070		else if (estate == cpu_ec_state_own)
3071			state_str = "Owner";
3072		else if (estate == cpu_ec_state_mod)
3073			state_str = "Modified";
3074		else
3075			state_str = "Invalid";
3076
3077		if (spf_flt->flt_ec_lcnt > 1) {
3078			(void) snprintf(linestr, sizeof (linestr),
3079			    "Badlines found=%d", spf_flt->flt_ec_lcnt);
3080		} else {
3081			linestr[0] = '\0';
3082		}
3083
3084		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3085		    " PA=0x%08x.%08x\n    E$tag 0x%08x.%08x E$State: %s "
3086		    "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32),
3087		    (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32),
3088		    (uint32_t)ecache_tag, state_str,
3089		    (uint32_t)eparity, linestr);
3090	} else {
3091		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
3092		    " E$tag != PA from AFAR; E$line was victimized"
3093		    "\n    dumping memory from PA 0x%08x.%08x instead",
3094		    (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32),
3095		    (uint32_t)P2ALIGN(aflt->flt_addr, 64));
3096	}
3097
3098	/*
3099	 * Dump out all 8 8-byte ecache data captured
3100	 * For each 8-byte data captured, we check the
3101	 * captured afsr's parity syndrome to find out
3102	 * which 8-byte chunk is bad. For memory dump, the
3103	 * AFSR values were initialized to 0.
3104	 */
3105	for (i = 0; i < 8; i++) {
3106		ec_data_t *ecdptr;
3107		uint_t offset;
3108		ushort_t psynd;
3109		ushort_t bad;
3110		uint64_t edp;
3111
3112		offset = i << 3;	/* multiply by 8 */
3113		ecdptr = &spf_flt->flt_ec_data[i];
3114		psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
3115		edp = ecdptr->ec_afsr & P_AFSR_EDP;
3116
3117		/*
3118		 * For Sabre/Hummingbird, parity synd is captured only
3119		 * in [7:0] of AFSR.PSYND for each 8-byte chunk.
3120		 * For spitfire/blackbird, AFSR.PSYND is captured
3121		 * in 16-byte granularity. [15:8] represent
3122		 * the upper 8 byte and [7:0] the lower 8 byte.
3123		 */
3124		if (isus2i || isus2e || (i & 0x1))
3125			bad = (psynd & 0xFF);		/* check bits [7:0] */
3126		else
3127			bad = (psynd & 0xFF00);		/* check bits [15:8] */
3128
3129		if (bad && edp) {
3130			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3131			    " E$Data (0x%02x): 0x%08x.%08x "
3132			    "*Bad* PSYND=0x%04x", offset,
3133			    (uint32_t)(ecdptr->ec_d8 >> 32),
3134			    (uint32_t)ecdptr->ec_d8, psynd);
3135		} else {
3136			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
3137			    " E$Data (0x%02x): 0x%08x.%08x", offset,
3138			    (uint32_t)(ecdptr->ec_d8 >> 32),
3139			    (uint32_t)ecdptr->ec_d8);
3140		}
3141	}
3142}
3143
3144/*
3145 * Common logging function for all cpu async errors.  This function allows the
3146 * caller to generate a single cmn_err() call that logs the appropriate items
3147 * from the fault structure, and implements our rules for AFT logging levels.
3148 *
3149 *	ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT)
3150 *	tagnum: 0, 1, 2, .. generate the [AFT#] tag
3151 *	spflt: pointer to spitfire async fault structure
3152 *	logflags: bitflags indicating what to output
3153 *	endstr: a end string to appear at the end of this log
3154 *	fmt: a format string to appear at the beginning of the log
3155 *
3156 * The logflags allows the construction of predetermined output from the spflt
3157 * structure.  The individual data items always appear in a consistent order.
3158 * Note that either or both of the spflt structure pointer and logflags may be
3159 * NULL or zero respectively, indicating that the predetermined output
3160 * substrings are not requested in this log.  The output looks like this:
3161 *
3162 *	[AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU>
3163 *	<CPU_SPACE><CPU_ERRID>
3164 *	newline+4spaces<CPU_AFSR><CPU_AFAR>
3165 *	newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC>
3166 *	newline+4spaces<CPU_UDBH><CPU_UDBL>
3167 *	newline+4spaces<CPU_SYND>
3168 *	newline+4spaces<endstr>
3169 *
3170 * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>;
3171 * it is assumed that <endstr> will be the unum string in this case.  The size
3172 * of our intermediate formatting buf[] is based on the worst case of all flags
3173 * being enabled.  We pass the caller's varargs directly to vcmn_err() for
3174 * formatting so we don't need additional stack space to format them here.
3175 */
3176/*PRINTFLIKE6*/
3177static void
3178cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags,
3179    const char *endstr, const char *fmt, ...)
3180{
3181	struct async_flt *aflt = (struct async_flt *)spflt;
3182	char buf[400], *p, *q; /* see comments about buf[] size above */
3183	va_list ap;
3184	int console_log_flag;
3185
3186	if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) &&
3187	    (aflt->flt_stat & P_AFSR_LEVEL1)) ||
3188	    (aflt->flt_panic)) {
3189		console_log_flag = (tagnum < 2) || aft_verbose;
3190	} else {
3191		int verbose = ((aflt->flt_class == BUS_FAULT) ||
3192		    (aflt->flt_stat & P_AFSR_CE)) ?
3193		    ce_verbose_memory : ce_verbose_other;
3194
3195		if (!verbose)
3196			return;
3197
3198		console_log_flag = (verbose > 1);
3199	}
3200
3201	if (console_log_flag)
3202		(void) sprintf(buf, "[AFT%d]", tagnum);
3203	else
3204		(void) sprintf(buf, "![AFT%d]", tagnum);
3205
3206	p = buf + strlen(buf);	/* current buffer position */
3207	q = buf + sizeof (buf);	/* pointer past end of buffer */
3208
3209	if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) {
3210		(void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x",
3211		    (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id);
3212		p += strlen(p);
3213	}
3214
3215	/*
3216	 * Copy the caller's format string verbatim into buf[].  It will be
3217	 * formatted by the call to vcmn_err() at the end of this function.
3218	 */
3219	if (fmt != NULL && p < q) {
3220		(void) strncpy(p, fmt, (size_t)(q - p - 1));
3221		buf[sizeof (buf) - 1] = '\0';
3222		p += strlen(p);
3223	}
3224
3225	if (spflt != NULL) {
3226		if (logflags & CPU_FLTCPU) {
3227			(void) snprintf(p, (size_t)(q - p), " CPU%d",
3228			    aflt->flt_inst);
3229			p += strlen(p);
3230		}
3231
3232		if (logflags & CPU_SPACE) {
3233			if (aflt->flt_status & ECC_D_TRAP)
3234				(void) snprintf(p, (size_t)(q - p),
3235				    " Data access");
3236			else if (aflt->flt_status & ECC_I_TRAP)
3237				(void) snprintf(p, (size_t)(q - p),
3238				    " Instruction access");
3239			p += strlen(p);
3240		}
3241
3242		if (logflags & CPU_TL) {
3243			(void) snprintf(p, (size_t)(q - p), " at TL%s",
3244			    aflt->flt_tl ? ">0" : "=0");
3245			p += strlen(p);
3246		}
3247
3248		if (logflags & CPU_ERRID) {
3249			(void) snprintf(p, (size_t)(q - p),
3250			    ", errID 0x%08x.%08x",
3251			    (uint32_t)(aflt->flt_id >> 32),
3252			    (uint32_t)aflt->flt_id);
3253			p += strlen(p);
3254		}
3255
3256		if (logflags & CPU_AFSR) {
3257			(void) snprintf(p, (size_t)(q - p),
3258			    "\n    AFSR 0x%8b.%8b",
3259			    (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0,
3260			    (uint32_t)aflt->flt_stat, AFSR_FMTSTR1);
3261			p += strlen(p);
3262		}
3263
3264		if (logflags & CPU_AFAR) {
3265			(void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x",
3266			    (uint32_t)(aflt->flt_addr >> 32),
3267			    (uint32_t)aflt->flt_addr);
3268			p += strlen(p);
3269		}
3270
3271		if (logflags & CPU_AF_PSYND) {
3272			ushort_t psynd = (ushort_t)
3273			    (aflt->flt_stat & P_AFSR_P_SYND);
3274
3275			(void) snprintf(p, (size_t)(q - p),
3276			    "\n    AFSR.PSYND 0x%04x(Score %02d)",
3277			    psynd, ecc_psynd_score(psynd));
3278			p += strlen(p);
3279		}
3280
3281		if (logflags & CPU_AF_ETS) {
3282			(void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x",
3283			    (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16));
3284			p += strlen(p);
3285		}
3286
3287		if (logflags & CPU_FAULTPC) {
3288			(void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p",
3289			    (void *)aflt->flt_pc);
3290			p += strlen(p);
3291		}
3292
3293		if (logflags & CPU_UDBH) {
3294			(void) snprintf(p, (size_t)(q - p),
3295			    "\n    UDBH 0x%4b UDBH.ESYND 0x%02x",
3296			    spflt->flt_sdbh, UDB_FMTSTR,
3297			    spflt->flt_sdbh & 0xFF);
3298			p += strlen(p);
3299		}
3300
3301		if (logflags & CPU_UDBL) {
3302			(void) snprintf(p, (size_t)(q - p),
3303			    " UDBL 0x%4b UDBL.ESYND 0x%02x",
3304			    spflt->flt_sdbl, UDB_FMTSTR,
3305			    spflt->flt_sdbl & 0xFF);
3306			p += strlen(p);
3307		}
3308
3309		if (logflags & CPU_SYND) {
3310			ushort_t synd = SYND(aflt->flt_synd);
3311
3312			(void) snprintf(p, (size_t)(q - p),
3313			    "\n    %s Syndrome 0x%x Memory Module ",
3314			    UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd);
3315			p += strlen(p);
3316		}
3317	}
3318
3319	if (endstr != NULL) {
3320		if (!(logflags & CPU_SYND))
3321			(void) snprintf(p, (size_t)(q - p), "\n    %s", endstr);
3322		else
3323			(void) snprintf(p, (size_t)(q - p), "%s", endstr);
3324		p += strlen(p);
3325	}
3326
3327	if (ce_code == CE_CONT && (p < q - 1))
3328		(void) strcpy(p, "\n"); /* add final \n if needed */
3329
3330	va_start(ap, fmt);
3331	vcmn_err(ce_code, buf, ap);
3332	va_end(ap);
3333}
3334
3335/*
3336 * Ecache Scrubbing
3337 *
3338 * The basic idea is to prevent lines from sitting in the ecache long enough
3339 * to build up soft errors which can lead to ecache parity errors.
3340 *
3341 * The following rules are observed when flushing the ecache:
3342 *
3343 * 1. When the system is busy, flush bad clean lines
3344 * 2. When the system is idle, flush all clean lines
3345 * 3. When the system is idle, flush good dirty lines
3346 * 4. Never flush bad dirty lines.
3347 *
3348 *	modify	parity	busy   idle
3349 *	----------------------------
3350 *	clean	good		X
3351 *	clean	bad	X	X
3352 *	dirty	good		X
3353 *	dirty	bad
3354 *
3355 * Bad or good refers to whether a line has an E$ parity error or not.
3356 * Clean or dirty refers to the state of the modified bit.  We currently
3357 * default the scan rate to 100 (scan 10% of the cache per second).
3358 *
3359 * The following are E$ states and actions.
3360 *
3361 * We encode our state as a 3-bit number, consisting of:
3362 *	ECACHE_STATE_MODIFIED	(0=clean, 1=dirty)
3363 *	ECACHE_STATE_PARITY	(0=good,  1=bad)
3364 *	ECACHE_STATE_BUSY	(0=idle,  1=busy)
3365 *
3366 * We associate a flushing and a logging action with each state.
3367 *
3368 * E$ actions are different for Spitfire and Sabre/Hummingbird modules.
3369 * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored
3370 * E$ only, in addition to value being set by ec_flush.
3371 */
3372
3373#define	ALWAYS_FLUSH		0x1	/* flush E$ line on all E$ types */
3374#define	NEVER_FLUSH		0x0	/* never the flush the E$ line */
3375#define	MIRROR_FLUSH		0xF	/* flush E$ line on mirrored E$ only */
3376
3377struct {
3378	char	ec_flush;		/* whether to flush or not */
3379	char	ec_log;			/* ecache logging */
3380	char	ec_log_type;		/* log type info */
3381} ec_action[] = {	/* states of the E$ line in M P B */
3382	{ ALWAYS_FLUSH, 0, 0 },			 /* 0 0 0 clean_good_idle */
3383	{ MIRROR_FLUSH, 0, 0 },			 /* 0 0 1 clean_good_busy */
3384	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */
3385	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */
3386	{ ALWAYS_FLUSH, 0, 0 },			 /* 1 0 0 dirty_good_idle */
3387	{ MIRROR_FLUSH, 0, 0 },			 /* 1 0 1 dirty_good_busy */
3388	{ NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR },	 /* 1 1 0 dirty_bad_idle */
3389	{ NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR }	 /* 1 1 1 dirty_bad_busy */
3390};
3391
3392/*
3393 * Offsets into the ec_action[] that determines clean_good_busy and
3394 * dirty_good_busy lines.
3395 */
3396#define	ECACHE_CGB_LINE		1	/* E$ clean_good_busy line */
3397#define	ECACHE_DGB_LINE		5	/* E$ dirty_good_busy line */
3398
3399/*
3400 * We are flushing lines which are Clean_Good_Busy and also the lines
3401 * Dirty_Good_Busy. And we only follow it for non-mirrored E$.
3402 */
3403#define	CGB(x, m)	(((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR))
3404#define	DGB(x, m)	(((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR))
3405
3406#define	ECACHE_STATE_MODIFIED	0x4
3407#define	ECACHE_STATE_PARITY	0x2
3408#define	ECACHE_STATE_BUSY	0x1
3409
3410/*
3411 * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced.
3412 */
3413int ecache_calls_a_sec_mirrored = 1;
3414int ecache_lines_per_call_mirrored = 1;
3415
3416int ecache_scrub_enable = 1;	/* ecache scrubbing is on by default */
3417int ecache_scrub_verbose = 1;		/* prints clean and dirty lines */
3418int ecache_scrub_panic = 0;		/* panics on a clean and dirty line */
3419int ecache_calls_a_sec = 100;		/* scrubber calls per sec */
3420int ecache_scan_rate = 100;		/* scan rate (in tenths of a percent) */
3421int ecache_idle_factor = 1;		/* increase the scan rate when idle */
3422int ecache_flush_clean_good_busy = 50;	/* flush rate (in percent) */
3423int ecache_flush_dirty_good_busy = 100;	/* flush rate (in percent) */
3424
3425volatile int ec_timeout_calls = 1;	/* timeout calls */
3426
3427/*
3428 * Interrupt number and pil for ecache scrubber cross-trap calls.
3429 */
3430static uint64_t ecache_scrub_inum;
3431uint_t ecache_scrub_pil = PIL_9;
3432
3433/*
3434 * Kstats for the E$ scrubber.
3435 */
3436typedef struct ecache_kstat {
3437	kstat_named_t clean_good_idle;		/* # of lines scrubbed */
3438	kstat_named_t clean_good_busy;		/* # of lines skipped */
3439	kstat_named_t clean_bad_idle;		/* # of lines scrubbed */
3440	kstat_named_t clean_bad_busy;		/* # of lines scrubbed */
3441	kstat_named_t dirty_good_idle;		/* # of lines scrubbed */
3442	kstat_named_t dirty_good_busy;		/* # of lines skipped */
3443	kstat_named_t dirty_bad_idle;		/* # of lines skipped */
3444	kstat_named_t dirty_bad_busy;		/* # of lines skipped */
3445	kstat_named_t invalid_lines;		/* # of invalid lines */
3446	kstat_named_t clean_good_busy_flush;    /* # of lines scrubbed */
3447	kstat_named_t dirty_good_busy_flush;    /* # of lines scrubbed */
3448	kstat_named_t tags_cleared;		/* # of E$ tags cleared */
3449} ecache_kstat_t;
3450
3451static ecache_kstat_t ec_kstat_template = {
3452	{ "clean_good_idle", KSTAT_DATA_ULONG },
3453	{ "clean_good_busy", KSTAT_DATA_ULONG },
3454	{ "clean_bad_idle", KSTAT_DATA_ULONG },
3455	{ "clean_bad_busy", KSTAT_DATA_ULONG },
3456	{ "dirty_good_idle", KSTAT_DATA_ULONG },
3457	{ "dirty_good_busy", KSTAT_DATA_ULONG },
3458	{ "dirty_bad_idle", KSTAT_DATA_ULONG },
3459	{ "dirty_bad_busy", KSTAT_DATA_ULONG },
3460	{ "invalid_lines", KSTAT_DATA_ULONG },
3461	{ "clean_good_busy_flush", KSTAT_DATA_ULONG },
3462	{ "dirty_good_busy_flush", KSTAT_DATA_ULONG },
3463	{ "ecache_tags_cleared", KSTAT_DATA_ULONG }
3464};
3465
3466struct kmem_cache *sf_private_cache;
3467
3468/*
3469 * Called periodically on each CPU to scan the ecache once a sec.
3470 * adjusting the ecache line index appropriately
3471 */
3472void
3473scrub_ecache_line()
3474{
3475	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3476	int cpuid = CPU->cpu_id;
3477	uint32_t index = ssmp->ecache_flush_index;
3478	uint64_t ec_size = cpunodes[cpuid].ecache_size;
3479	size_t ec_linesize = cpunodes[cpuid].ecache_linesize;
3480	int nlines = ssmp->ecache_nlines;
3481	uint32_t ec_set_size = ec_size / ecache_associativity;
3482	int ec_mirror = ssmp->ecache_mirror;
3483	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
3484
3485	int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0;
3486	int mpb;		/* encode Modified, Parity, Busy for action */
3487	uchar_t state;
3488	uint64_t ec_tag, paddr, oafsr, tafsr, nafsr;
3489	uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3490	ec_data_t ec_data[8];
3491	kstat_named_t *ec_knp;
3492
3493	switch (ec_mirror) {
3494		default:
3495		case ECACHE_CPU_NON_MIRROR:
3496			/*
3497			 * The E$ scan rate is expressed in units of tenths of
3498			 * a percent.  ecache_scan_rate = 1000 (100%) means the
3499			 * whole cache is scanned every second.
3500			 */
3501			scan_lines = (nlines * ecache_scan_rate) /
3502			    (1000 * ecache_calls_a_sec);
3503			if (!(ssmp->ecache_busy)) {
3504				if (ecache_idle_factor > 0) {
3505					scan_lines *= ecache_idle_factor;
3506				}
3507			} else {
3508				flush_clean_busy = (scan_lines *
3509				    ecache_flush_clean_good_busy) / 100;
3510				flush_dirty_busy = (scan_lines *
3511				    ecache_flush_dirty_good_busy) / 100;
3512			}
3513
3514			ec_timeout_calls = (ecache_calls_a_sec ?
3515			    ecache_calls_a_sec : 1);
3516			break;
3517
3518		case ECACHE_CPU_MIRROR:
3519			scan_lines = ecache_lines_per_call_mirrored;
3520			ec_timeout_calls = (ecache_calls_a_sec_mirrored ?
3521			    ecache_calls_a_sec_mirrored : 1);
3522			break;
3523	}
3524
3525	/*
3526	 * The ecache scrubber algorithm operates by reading and
3527	 * decoding the E$ tag to determine whether the corresponding E$ line
3528	 * can be scrubbed. There is a implicit assumption in the scrubber
3529	 * logic that the E$ tag is valid. Unfortunately, this assertion is
3530	 * flawed since the E$ tag may also be corrupted and have parity errors
3531	 * The scrubber logic is enhanced to check the validity of the E$ tag
3532	 * before scrubbing. When a parity error is detected in the E$ tag,
3533	 * it is possible to recover and scrub the tag under certain conditions
3534	 * so that a ETP error condition can be avoided.
3535	 */
3536
3537	for (mpb = line = 0; line < scan_lines; line++, mpb = 0) {
3538		/*
3539		 * We get the old-AFSR before clearing the AFSR sticky bits
3540		 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag}
3541		 * If CP bit is set in the old-AFSR, we log an Orphan CP event.
3542		 */
3543		ec_tag = get_ecache_tag(index, &nafsr, acc_afsr);
3544		state = (uchar_t)((ec_tag & cpu_ec_state_mask) >>
3545		    cpu_ec_state_shift);
3546
3547		/*
3548		 * ETP is set try to scrub the ecache tag.
3549		 */
3550		if (nafsr & P_AFSR_ETP) {
3551			ecache_scrub_tag_err(nafsr, state, index);
3552		} else if (state & cpu_ec_state_valid) {
3553			/*
3554			 * ETP is not set, E$ tag is valid.
3555			 * Proceed with the E$ scrubbing.
3556			 */
3557			if (state & cpu_ec_state_dirty)
3558				mpb |= ECACHE_STATE_MODIFIED;
3559
3560			tafsr = check_ecache_line(index, acc_afsr);
3561
3562			if (tafsr & P_AFSR_EDP) {
3563				mpb |= ECACHE_STATE_PARITY;
3564
3565				if (ecache_scrub_verbose ||
3566				    ecache_scrub_panic) {
3567					get_ecache_dtag(P2ALIGN(index, 64),
3568					    (uint64_t *)&ec_data[0],
3569					    &ec_tag, &oafsr, acc_afsr);
3570				}
3571			}
3572
3573			if (ssmp->ecache_busy)
3574				mpb |= ECACHE_STATE_BUSY;
3575
3576			ec_knp = (kstat_named_t *)ec_ksp + mpb;
3577			ec_knp->value.ul++;
3578
3579			paddr = ((ec_tag & cpu_ec_tag_mask) <<
3580			    cpu_ec_tag_shift) | (index % ec_set_size);
3581
3582			/*
3583			 * We flush the E$ lines depending on the ec_flush,
3584			 * we additionally flush clean_good_busy and
3585			 * dirty_good_busy lines for mirrored E$.
3586			 */
3587			if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) {
3588				flushecacheline(paddr, ec_size);
3589			} else if ((ec_mirror == ECACHE_CPU_MIRROR) &&
3590			    (ec_action[mpb].ec_flush == MIRROR_FLUSH)) {
3591				flushecacheline(paddr, ec_size);
3592			} else if (ec_action[mpb].ec_flush == NEVER_FLUSH) {
3593				softcall(ecache_page_retire, (void *)paddr);
3594			}
3595
3596			/*
3597			 * Conditionally flush both the clean_good and
3598			 * dirty_good lines when busy.
3599			 */
3600			if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) {
3601				flush_clean_busy--;
3602				flushecacheline(paddr, ec_size);
3603				ec_ksp->clean_good_busy_flush.value.ul++;
3604			} else if (DGB(mpb, ec_mirror) &&
3605			    (flush_dirty_busy > 0)) {
3606				flush_dirty_busy--;
3607				flushecacheline(paddr, ec_size);
3608				ec_ksp->dirty_good_busy_flush.value.ul++;
3609			}
3610
3611			if (ec_action[mpb].ec_log && (ecache_scrub_verbose ||
3612			    ecache_scrub_panic)) {
3613				ecache_scrub_log(ec_data, ec_tag, paddr, mpb,
3614				    tafsr);
3615			}
3616
3617		} else {
3618			ec_ksp->invalid_lines.value.ul++;
3619		}
3620
3621		if ((index += ec_linesize) >= ec_size)
3622			index = 0;
3623
3624	}
3625
3626	/*
3627	 * set the ecache scrub index for the next time around
3628	 */
3629	ssmp->ecache_flush_index = index;
3630
3631	if (*acc_afsr & P_AFSR_CP) {
3632		uint64_t ret_afsr;
3633
3634		ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr);
3635		if ((ret_afsr & P_AFSR_CP) == 0)
3636			*acc_afsr = 0;
3637	}
3638}
3639
3640/*
3641 * Handler for ecache_scrub_inum softint.  Call scrub_ecache_line until
3642 * we decrement the outstanding request count to zero.
3643 */
3644
3645/*ARGSUSED*/
3646uint_t
3647scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
3648{
3649	int i;
3650	int outstanding;
3651	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
3652	uint32_t *countp = &ssmp->ec_scrub_outstanding;
3653
3654	do {
3655		outstanding = *countp;
3656		ASSERT(outstanding > 0);
3657		for (i = 0; i < outstanding; i++)
3658			scrub_ecache_line();
3659	} while (atomic_add_32_nv(countp, -outstanding));
3660
3661	return (DDI_INTR_CLAIMED);
3662}
3663
3664/*
3665 * force each cpu to perform an ecache scrub, called from a timeout
3666 */
3667extern xcfunc_t ecache_scrubreq_tl1;
3668
3669void
3670do_scrub_ecache_line(void)
3671{
3672	long delta;
3673
3674	if (ecache_calls_a_sec > hz)
3675		ecache_calls_a_sec = hz;
3676	else if (ecache_calls_a_sec <= 0)
3677		ecache_calls_a_sec = 1;
3678
3679	if (ecache_calls_a_sec_mirrored > hz)
3680		ecache_calls_a_sec_mirrored = hz;
3681	else if (ecache_calls_a_sec_mirrored <= 0)
3682		ecache_calls_a_sec_mirrored = 1;
3683
3684	if (ecache_scrub_enable) {
3685		xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0);
3686		delta = hz / ec_timeout_calls;
3687	} else {
3688		delta = hz;
3689	}
3690
3691	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3692	    delta);
3693}
3694
3695/*
3696 * initialization for ecache scrubbing
3697 * This routine is called AFTER all cpus have had cpu_init_private called
3698 * to initialize their private data areas.
3699 */
3700void
3701cpu_init_cache_scrub(void)
3702{
3703	if (ecache_calls_a_sec > hz) {
3704		cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); "
3705		    "resetting to hz (%d)", ecache_calls_a_sec, hz);
3706		ecache_calls_a_sec = hz;
3707	}
3708
3709	/*
3710	 * Register softint for ecache scrubbing.
3711	 */
3712	ecache_scrub_inum = add_softintr(ecache_scrub_pil,
3713	    scrub_ecache_line_intr, NULL, SOFTINT_MT);
3714
3715	/*
3716	 * kick off the scrubbing using realtime timeout
3717	 */
3718	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
3719	    hz / ecache_calls_a_sec);
3720}
3721
3722/*
3723 * Unset the busy flag for this cpu.
3724 */
3725void
3726cpu_idle_ecache_scrub(struct cpu *cp)
3727{
3728	if (CPU_PRIVATE(cp) != NULL) {
3729		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3730		    sfpr_scrub_misc);
3731		ssmp->ecache_busy = ECACHE_CPU_IDLE;
3732	}
3733}
3734
3735/*
3736 * Set the busy flag for this cpu.
3737 */
3738void
3739cpu_busy_ecache_scrub(struct cpu *cp)
3740{
3741	if (CPU_PRIVATE(cp) != NULL) {
3742		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
3743		    sfpr_scrub_misc);
3744		ssmp->ecache_busy = ECACHE_CPU_BUSY;
3745	}
3746}
3747
3748/*
3749 * initialize the ecache scrubber data structures
3750 * The global entry point cpu_init_private replaces this entry point.
3751 *
3752 */
3753static void
3754cpu_init_ecache_scrub_dr(struct cpu *cp)
3755{
3756	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3757	int cpuid = cp->cpu_id;
3758
3759	/*
3760	 * intialize bookkeeping for cache scrubbing
3761	 */
3762	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3763
3764	ssmp->ecache_flush_index = 0;
3765
3766	ssmp->ecache_nlines =
3767	    cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize;
3768
3769	/*
3770	 * Determine whether we are running on mirrored SRAM
3771	 */
3772
3773	if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR)
3774		ssmp->ecache_mirror = ECACHE_CPU_MIRROR;
3775	else
3776		ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR;
3777
3778	cpu_busy_ecache_scrub(cp);
3779
3780	/*
3781	 * initialize the kstats
3782	 */
3783	ecache_kstat_init(cp);
3784}
3785
3786/*
3787 * uninitialize the ecache scrubber data structures
3788 * The global entry point cpu_uninit_private replaces this entry point.
3789 */
3790static void
3791cpu_uninit_ecache_scrub_dr(struct cpu *cp)
3792{
3793	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3794
3795	if (ssmp->ecache_ksp != NULL) {
3796		kstat_delete(ssmp->ecache_ksp);
3797		ssmp->ecache_ksp = NULL;
3798	}
3799
3800	/*
3801	 * un-initialize bookkeeping for cache scrubbing
3802	 */
3803	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
3804
3805	cpu_idle_ecache_scrub(cp);
3806}
3807
3808struct kmem_cache *sf_private_cache;
3809
3810/*
3811 * Cpu private initialization.  This includes allocating the cpu_private
3812 * data structure, initializing it, and initializing the scrubber for this
3813 * cpu.  This is called once for EVERY cpu, including CPU 0. This function
3814 * calls cpu_init_ecache_scrub_dr to init the scrubber.
3815 * We use kmem_cache_create for the spitfire private data structure because it
3816 * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary.
3817 */
3818void
3819cpu_init_private(struct cpu *cp)
3820{
3821	spitfire_private_t *sfprp;
3822
3823	ASSERT(CPU_PRIVATE(cp) == NULL);
3824
3825	/*
3826	 * If the sf_private_cache has not been created, create it.
3827	 */
3828	if (sf_private_cache == NULL) {
3829		sf_private_cache = kmem_cache_create("sf_private_cache",
3830		    sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL,
3831		    NULL, NULL, NULL, NULL, 0);
3832		ASSERT(sf_private_cache);
3833	}
3834
3835	sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP);
3836
3837	bzero(sfprp, sizeof (spitfire_private_t));
3838
3839	cpu_init_ecache_scrub_dr(cp);
3840}
3841
3842/*
3843 * Cpu private unitialization.  Uninitialize the Ecache scrubber and
3844 * deallocate the scrubber data structures and cpu_private data structure.
3845 * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit
3846 * the scrubber for the specified cpu.
3847 */
3848void
3849cpu_uninit_private(struct cpu *cp)
3850{
3851	ASSERT(CPU_PRIVATE(cp));
3852
3853	cpu_uninit_ecache_scrub_dr(cp);
3854	kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp));
3855	CPU_PRIVATE(cp) = NULL;
3856}
3857
3858/*
3859 * initialize the ecache kstats for each cpu
3860 */
3861static void
3862ecache_kstat_init(struct cpu *cp)
3863{
3864	struct kstat *ksp;
3865	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
3866
3867	ASSERT(ssmp != NULL);
3868
3869	if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc",
3870	    KSTAT_TYPE_NAMED,
3871	    sizeof (ecache_kstat_t) / sizeof (kstat_named_t),
3872	    KSTAT_FLAG_WRITABLE)) == NULL) {
3873		ssmp->ecache_ksp = NULL;
3874		cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id);
3875		return;
3876	}
3877
3878	ssmp->ecache_ksp = ksp;
3879	bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t));
3880	kstat_install(ksp);
3881}
3882
3883/*
3884 * log the bad ecache information
3885 */
3886static void
3887ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb,
3888    uint64_t afsr)
3889{
3890	spitf_async_flt spf_flt;
3891	struct async_flt *aflt;
3892	int i;
3893	char *class;
3894
3895	bzero(&spf_flt, sizeof (spitf_async_flt));
3896	aflt = &spf_flt.cmn_asyncflt;
3897
3898	for (i = 0; i < 8; i++) {
3899		spf_flt.flt_ec_data[i] = ec_data[i];
3900	}
3901
3902	spf_flt.flt_ec_tag = ec_tag;
3903
3904	if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) {
3905		spf_flt.flt_type = ec_action[mpb].ec_log_type;
3906	} else spf_flt.flt_type = (ushort_t)mpb;
3907
3908	aflt->flt_inst = CPU->cpu_id;
3909	aflt->flt_class = CPU_FAULT;
3910	aflt->flt_id = gethrtime_waitfree();
3911	aflt->flt_addr = paddr;
3912	aflt->flt_stat = afsr;
3913	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3914
3915	switch (mpb) {
3916	case CPU_ECACHE_TAG_ERR:
3917	case CPU_ECACHE_ADDR_PAR_ERR:
3918	case CPU_ECACHE_ETP_ETS_ERR:
3919	case CPU_ECACHE_STATE_ERR:
3920		class = FM_EREPORT_CPU_USII_ESCRUB_TAG;
3921		break;
3922	default:
3923		class = FM_EREPORT_CPU_USII_ESCRUB_DATA;
3924		break;
3925	}
3926
3927	cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt),
3928	    ue_queue, aflt->flt_panic);
3929
3930	if (aflt->flt_panic)
3931		cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$"
3932		    "line detected");
3933}
3934
3935/*
3936 * Process an ecache error that occured during the E$ scrubbing.
3937 * We do the ecache scan to find the bad line, flush the bad line
3938 * and start the memscrubber to find any UE (in memory or in another cache)
3939 */
3940static uint64_t
3941ecache_scrub_misc_err(int type, uint64_t afsr)
3942{
3943	spitf_async_flt spf_flt;
3944	struct async_flt *aflt;
3945	uint64_t oafsr;
3946
3947	bzero(&spf_flt, sizeof (spitf_async_flt));
3948	aflt = &spf_flt.cmn_asyncflt;
3949
3950	/*
3951	 * Scan each line in the cache to look for the one
3952	 * with bad parity
3953	 */
3954	aflt->flt_addr = AFLT_INV_ADDR;
3955	scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
3956	    &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
3957
3958	if (oafsr & P_AFSR_CP) {
3959		uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
3960		*cp_afsr |= oafsr;
3961	}
3962
3963	/*
3964	 * If we found a bad PA, update the state to indicate if it is
3965	 * memory or I/O space.
3966	 */
3967	if (aflt->flt_addr != AFLT_INV_ADDR) {
3968		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
3969		    MMU_PAGESHIFT)) ? 1 : 0;
3970	}
3971
3972	spf_flt.flt_type = (ushort_t)type;
3973
3974	aflt->flt_inst = CPU->cpu_id;
3975	aflt->flt_class = CPU_FAULT;
3976	aflt->flt_id = gethrtime_waitfree();
3977	aflt->flt_status = afsr;
3978	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
3979
3980	/*
3981	 * We have the bad line, flush that line and start
3982	 * the memscrubber.
3983	 */
3984	if (spf_flt.flt_ec_lcnt > 0) {
3985		flushecacheline(P2ALIGN(aflt->flt_addr, 64),
3986		    cpunodes[CPU->cpu_id].ecache_size);
3987		read_all_memscrub = 1;
3988		memscrub_run();
3989	}
3990
3991	cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ?
3992	    FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN,
3993	    (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic);
3994
3995	return (oafsr);
3996}
3997
3998static void
3999ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index)
4000{
4001	ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT;
4002	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
4003	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
4004	uint64_t ec_tag, paddr, oafsr;
4005	ec_data_t ec_data[8];
4006	int cpuid = CPU->cpu_id;
4007	uint32_t ec_set_size = cpunodes[cpuid].ecache_size /
4008	    ecache_associativity;
4009	uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
4010
4011	get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag,
4012	    &oafsr, cpu_afsr);
4013	paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) |
4014	    (index % ec_set_size);
4015
4016	/*
4017	 * E$ tag state has good parity
4018	 */
4019	if ((afsr_ets & cpu_ec_state_parity) == 0) {
4020		if (afsr_ets & cpu_ec_parity) {
4021			/*
4022			 * E$ tag state bits indicate the line is clean,
4023			 * invalidate the E$ tag and continue.
4024			 */
4025			if (!(state & cpu_ec_state_dirty)) {
4026				/*
4027				 * Zero the tag and mark the state invalid
4028				 * with good parity for the tag.
4029				 */
4030				if (isus2i || isus2e)
4031					write_hb_ec_tag_parity(index);
4032				else
4033					write_ec_tag_parity(index);
4034
4035				/* Sync with the dual tag */
4036				flushecacheline(0,
4037				    cpunodes[CPU->cpu_id].ecache_size);
4038				ec_ksp->tags_cleared.value.ul++;
4039				ecache_scrub_log(ec_data, ec_tag, paddr,
4040				    CPU_ECACHE_TAG_ERR, afsr);
4041				return;
4042			} else {
4043				ecache_scrub_log(ec_data, ec_tag, paddr,
4044				    CPU_ECACHE_ADDR_PAR_ERR, afsr);
4045				cmn_err(CE_PANIC, " E$ tag address has bad"
4046				    " parity");
4047			}
4048		} else if ((afsr_ets & cpu_ec_parity) == 0) {
4049			/*
4050			 * ETS is zero but ETP is set
4051			 */
4052			ecache_scrub_log(ec_data, ec_tag, paddr,
4053			    CPU_ECACHE_ETP_ETS_ERR, afsr);
4054			cmn_err(CE_PANIC, "AFSR.ETP is set and"
4055			    " AFSR.ETS is zero");
4056		}
4057	} else {
4058		/*
4059		 * E$ tag state bit has a bad parity
4060		 */
4061		ecache_scrub_log(ec_data, ec_tag, paddr,
4062		    CPU_ECACHE_STATE_ERR, afsr);
4063		cmn_err(CE_PANIC, "E$ tag state has bad parity");
4064	}
4065}
4066
4067static void
4068ecache_page_retire(void *arg)
4069{
4070	uint64_t paddr = (uint64_t)arg;
4071	(void) page_retire(paddr, PR_UE);
4072}
4073
4074void
4075sticksync_slave(void)
4076{}
4077
4078void
4079sticksync_master(void)
4080{}
4081
4082/*ARGSUSED*/
4083void
4084cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp)
4085{}
4086
4087void
4088cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
4089{
4090	int status;
4091	ddi_fm_error_t de;
4092
4093	bzero(&de, sizeof (ddi_fm_error_t));
4094
4095	de.fme_version = DDI_FME_VERSION;
4096	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
4097	    FM_ENA_FMT1);
4098	de.fme_flag = expected;
4099	de.fme_bus_specific = (void *)aflt->flt_addr;
4100	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
4101
4102	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
4103		aflt->flt_panic = 1;
4104}
4105
4106/*ARGSUSED*/
4107void
4108cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
4109    errorq_t *eqp, uint_t flag)
4110{
4111	struct async_flt *aflt = (struct async_flt *)payload;
4112
4113	aflt->flt_erpt_class = error_class;
4114	errorq_dispatch(eqp, payload, payload_sz, flag);
4115}
4116
4117#define	MAX_SIMM	8
4118
4119struct ce_info {
4120	char    name[UNUM_NAMLEN];
4121	uint64_t intermittent_total;
4122	uint64_t persistent_total;
4123	uint64_t sticky_total;
4124	unsigned short leaky_bucket_cnt;
4125};
4126
4127/*
4128 * Separately-defined structure for use in reporting the ce_info
4129 * to SunVTS without exposing the internal layout and implementation
4130 * of struct ce_info.
4131 */
4132static struct ecc_error_info ecc_error_info_data = {
4133	{ "version", KSTAT_DATA_UINT32 },
4134	{ "maxcount", KSTAT_DATA_UINT32 },
4135	{ "count", KSTAT_DATA_UINT32 }
4136};
4137static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) /
4138    sizeof (struct kstat_named);
4139
4140#if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN
4141#error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN"
4142#endif
4143
4144struct ce_info  *mem_ce_simm = NULL;
4145size_t mem_ce_simm_size = 0;
4146
4147/*
4148 * Default values for the number of CE's allowed per interval.
4149 * Interval is defined in minutes
4150 * SOFTERR_MIN_TIMEOUT is defined in microseconds
4151 */
4152#define	SOFTERR_LIMIT_DEFAULT		2
4153#define	SOFTERR_INTERVAL_DEFAULT	1440		/* This is 24 hours */
4154#define	SOFTERR_MIN_TIMEOUT		(60 * MICROSEC)	/* This is 1 minute */
4155#define	TIMEOUT_NONE			((timeout_id_t)0)
4156#define	TIMEOUT_SET			((timeout_id_t)1)
4157
4158/*
4159 * timeout identifer for leaky_bucket
4160 */
4161static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE;
4162
4163/*
4164 * Tunables for maximum number of allowed CE's in a given time
4165 */
4166int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4167int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4168
4169void
4170cpu_mp_init(void)
4171{
4172	size_t size = cpu_aflt_size();
4173	size_t i;
4174	kstat_t *ksp;
4175
4176	/*
4177	 * Initialize the CE error handling buffers.
4178	 */
4179	mem_ce_simm_size = MAX_SIMM * max_ncpus;
4180	size = sizeof (struct ce_info) * mem_ce_simm_size;
4181	mem_ce_simm = kmem_zalloc(size, KM_SLEEP);
4182
4183	ksp = kstat_create("unix", 0, "ecc-info", "misc",
4184	    KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL);
4185	if (ksp != NULL) {
4186		ksp->ks_data = (struct kstat_named *)&ecc_error_info_data;
4187		ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER;
4188		ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size;
4189		ecc_error_info_data.count.value.ui32 = 0;
4190		kstat_install(ksp);
4191	}
4192
4193	for (i = 0; i < mem_ce_simm_size; i++) {
4194		struct kstat_ecc_mm_info *kceip;
4195
4196		kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info),
4197		    KM_SLEEP);
4198		ksp = kstat_create("mm", i, "ecc-info", "misc",
4199		    KSTAT_TYPE_NAMED,
4200		    sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t),
4201		    KSTAT_FLAG_VIRTUAL);
4202		if (ksp != NULL) {
4203			/*
4204			 * Re-declare ks_data_size to include room for the
4205			 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE
4206			 * set.
4207			 */
4208			ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) +
4209			    KSTAT_CE_UNUM_NAMLEN;
4210			ksp->ks_data = kceip;
4211			kstat_named_init(&kceip->name,
4212			    "name", KSTAT_DATA_STRING);
4213			kstat_named_init(&kceip->intermittent_total,
4214			    "intermittent_total", KSTAT_DATA_UINT64);
4215			kstat_named_init(&kceip->persistent_total,
4216			    "persistent_total", KSTAT_DATA_UINT64);
4217			kstat_named_init(&kceip->sticky_total,
4218			    "sticky_total", KSTAT_DATA_UINT64);
4219			/*
4220			 * Use the default snapshot routine as it knows how to
4221			 * deal with named kstats with long strings.
4222			 */
4223			ksp->ks_update = ecc_kstat_update;
4224			kstat_install(ksp);
4225		} else {
4226			kmem_free(kceip, sizeof (struct kstat_ecc_mm_info));
4227		}
4228	}
4229}
4230
4231/*ARGSUSED*/
4232static void
4233leaky_bucket_timeout(void *arg)
4234{
4235	int i;
4236	struct ce_info *psimm = mem_ce_simm;
4237
4238	for (i = 0; i < mem_ce_simm_size; i++) {
4239		if (psimm[i].leaky_bucket_cnt > 0)
4240			atomic_dec_16(&psimm[i].leaky_bucket_cnt);
4241	}
4242	add_leaky_bucket_timeout();
4243}
4244
4245static void
4246add_leaky_bucket_timeout(void)
4247{
4248	long timeout_in_microsecs;
4249
4250	/*
4251	 * create timeout for next leak.
4252	 *
4253	 * The timeout interval is calculated as follows
4254	 *
4255	 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit
4256	 *
4257	 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds
4258	 * in a minute), then multiply this by MICROSEC to get the interval
4259	 * in microseconds.  Divide this total by ecc_softerr_limit so that
4260	 * the timeout interval is accurate to within a few microseconds.
4261	 */
4262
4263	if (ecc_softerr_limit <= 0)
4264		ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
4265	if (ecc_softerr_interval <= 0)
4266		ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
4267
4268	timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) /
4269	    ecc_softerr_limit;
4270
4271	if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT)
4272		timeout_in_microsecs = SOFTERR_MIN_TIMEOUT;
4273
4274	leaky_bucket_timeout_id = timeout(leaky_bucket_timeout,
4275	    (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs));
4276}
4277
4278/*
4279 * Legacy Correctable ECC Error Hash
4280 *
4281 * All of the code below this comment is used to implement a legacy array
4282 * which counted intermittent, persistent, and sticky CE errors by unum,
4283 * and then was later extended to publish the data as a kstat for SunVTS.
4284 * All of this code is replaced by FMA, and remains here until such time
4285 * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed.
4286 *
4287 * Errors are saved in three buckets per-unum:
4288 * (1) sticky - scrub was unsuccessful, cannot be scrubbed
4289 *     This could represent a problem, and is immediately printed out.
4290 * (2) persistent - was successfully scrubbed
4291 *     These errors use the leaky bucket algorithm to determine
4292 *     if there is a serious problem.
4293 * (3) intermittent - may have originated from the cpu or upa/safari bus,
4294 *     and does not necessarily indicate any problem with the dimm itself,
4295 *     is critical information for debugging new hardware.
4296 *     Because we do not know if it came from the dimm, it would be
4297 *     inappropriate to include these in the leaky bucket counts.
4298 *
4299 * If the E$ line was modified before the scrub operation began, then the
4300 * displacement flush at the beginning of scrubphys() will cause the modified
4301 * line to be written out, which will clean up the CE.  Then, any subsequent
4302 * read will not cause an error, which will cause persistent errors to be
4303 * identified as intermittent.
4304 *
4305 * If a DIMM is going bad, it will produce true persistents as well as
4306 * false intermittents, so these intermittents can be safely ignored.
4307 *
4308 * If the error count is excessive for a DIMM, this function will return
4309 * PR_MCE, and the CPU module may then decide to remove that page from use.
4310 */
4311static int
4312ce_count_unum(int status, int len, char *unum)
4313{
4314	int i;
4315	struct ce_info *psimm = mem_ce_simm;
4316	int page_status = PR_OK;
4317
4318	ASSERT(psimm != NULL);
4319
4320	if (len <= 0 ||
4321	    (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0)
4322		return (page_status);
4323
4324	/*
4325	 * Initialize the leaky_bucket timeout
4326	 */
4327	if (atomic_cas_ptr(&leaky_bucket_timeout_id,
4328	    TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE)
4329		add_leaky_bucket_timeout();
4330
4331	for (i = 0; i < mem_ce_simm_size; i++) {
4332		if (psimm[i].name[0] == '\0') {
4333			/*
4334			 * Hit the end of the valid entries, add
4335			 * a new one.
4336			 */
4337			(void) strncpy(psimm[i].name, unum, len);
4338			if (status & ECC_STICKY) {
4339				/*
4340				 * Sticky - the leaky bucket is used to track
4341				 * soft errors.  Since a sticky error is a
4342				 * hard error and likely to be retired soon,
4343				 * we do not count it in the leaky bucket.
4344				 */
4345				psimm[i].leaky_bucket_cnt = 0;
4346				psimm[i].intermittent_total = 0;
4347				psimm[i].persistent_total = 0;
4348				psimm[i].sticky_total = 1;
4349				cmn_err(CE_NOTE,
4350				    "[AFT0] Sticky Softerror encountered "
4351				    "on Memory Module %s\n", unum);
4352				page_status = PR_MCE;
4353			} else if (status & ECC_PERSISTENT) {
4354				psimm[i].leaky_bucket_cnt = 1;
4355				psimm[i].intermittent_total = 0;
4356				psimm[i].persistent_total = 1;
4357				psimm[i].sticky_total = 0;
4358			} else {
4359				/*
4360				 * Intermittent - Because the scrub operation
4361				 * cannot find the error in the DIMM, we will
4362				 * not count these in the leaky bucket
4363				 */
4364				psimm[i].leaky_bucket_cnt = 0;
4365				psimm[i].intermittent_total = 1;
4366				psimm[i].persistent_total = 0;
4367				psimm[i].sticky_total = 0;
4368			}
4369			ecc_error_info_data.count.value.ui32++;
4370			break;
4371		} else if (strncmp(unum, psimm[i].name, len) == 0) {
4372			/*
4373			 * Found an existing entry for the current
4374			 * memory module, adjust the counts.
4375			 */
4376			if (status & ECC_STICKY) {
4377				psimm[i].sticky_total++;
4378				cmn_err(CE_NOTE,
4379				    "[AFT0] Sticky Softerror encountered "
4380				    "on Memory Module %s\n", unum);
4381				page_status = PR_MCE;
4382			} else if (status & ECC_PERSISTENT) {
4383				int new_value;
4384
4385				new_value = atomic_inc_16_nv(
4386				    &psimm[i].leaky_bucket_cnt);
4387				psimm[i].persistent_total++;
4388				if (new_value > ecc_softerr_limit) {
4389					cmn_err(CE_NOTE, "[AFT0] Most recent %d"
4390					    " soft errors from Memory Module"
4391					    " %s exceed threshold (N=%d,"
4392					    " T=%dh:%02dm) triggering page"
4393					    " retire", new_value, unum,
4394					    ecc_softerr_limit,
4395					    ecc_softerr_interval / 60,
4396					    ecc_softerr_interval % 60);
4397					atomic_dec_16(
4398					    &psimm[i].leaky_bucket_cnt);
4399					page_status = PR_MCE;
4400				}
4401			} else { /* Intermittent */
4402				psimm[i].intermittent_total++;
4403			}
4404			break;
4405		}
4406	}
4407
4408	if (i >= mem_ce_simm_size)
4409		cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of "
4410		    "space.\n");
4411
4412	return (page_status);
4413}
4414
4415/*
4416 * Function to support counting of IO detected CEs.
4417 */
4418void
4419cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
4420{
4421	int err;
4422
4423	err = ce_count_unum(ecc->flt_status, len, unum);
4424	if (err != PR_OK && automatic_page_removal) {
4425		(void) page_retire(ecc->flt_addr, err);
4426	}
4427}
4428
4429static int
4430ecc_kstat_update(kstat_t *ksp, int rw)
4431{
4432	struct kstat_ecc_mm_info *kceip = ksp->ks_data;
4433	struct ce_info *ceip = mem_ce_simm;
4434	int i = ksp->ks_instance;
4435
4436	if (rw == KSTAT_WRITE)
4437		return (EACCES);
4438
4439	ASSERT(ksp->ks_data != NULL);
4440	ASSERT(i < mem_ce_simm_size && i >= 0);
4441
4442	/*
4443	 * Since we're not using locks, make sure that we don't get partial
4444	 * data. The name is always copied before the counters are incremented
4445	 * so only do this update routine if at least one of the counters is
4446	 * non-zero, which ensures that ce_count_unum() is done, and the
4447	 * string is fully copied.
4448	 */
4449	if (ceip[i].intermittent_total == 0 &&
4450	    ceip[i].persistent_total == 0 &&
4451	    ceip[i].sticky_total == 0) {
4452		/*
4453		 * Uninitialized or partially initialized. Ignore.
4454		 * The ks_data buffer was allocated via kmem_zalloc,
4455		 * so no need to bzero it.
4456		 */
4457		return (0);
4458	}
4459
4460	kstat_named_setstr(&kceip->name, ceip[i].name);
4461	kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total;
4462	kceip->persistent_total.value.ui64 = ceip[i].persistent_total;
4463	kceip->sticky_total.value.ui64 = ceip[i].sticky_total;
4464
4465	return (0);
4466}
4467
4468#define	VIS_BLOCKSIZE		64
4469
4470int
4471dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
4472{
4473	int ret, watched;
4474
4475	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4476	ret = dtrace_blksuword32(addr, data, 0);
4477	if (watched)
4478		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
4479
4480	return (ret);
4481}
4482
4483/*ARGSUSED*/
4484void
4485cpu_faulted_enter(struct cpu *cp)
4486{
4487}
4488
4489/*ARGSUSED*/
4490void
4491cpu_faulted_exit(struct cpu *cp)
4492{
4493}
4494
4495/*ARGSUSED*/
4496void
4497mmu_init_kernel_pgsz(struct hat *hat)
4498{
4499}
4500
4501size_t
4502mmu_get_kernel_lpsize(size_t lpsize)
4503{
4504	uint_t tte;
4505
4506	if (lpsize == 0) {
4507		/* no setting for segkmem_lpsize in /etc/system: use default */
4508		return (MMU_PAGESIZE4M);
4509	}
4510
4511	for (tte = TTE8K; tte <= TTE4M; tte++) {
4512		if (lpsize == TTEBYTES(tte))
4513			return (lpsize);
4514	}
4515
4516	return (TTEBYTES(TTE8K));
4517}
4518