1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <sys/types.h>
27#include <sys/systm.h>
28#include <sys/ddi.h>
29#include <sys/sysmacros.h>
30#include <sys/archsystm.h>
31#include <sys/vmsystm.h>
32#include <sys/machparam.h>
33#include <sys/machsystm.h>
34#include <sys/machthread.h>
35#include <sys/cpu.h>
36#include <sys/cmp.h>
37#include <sys/elf_SPARC.h>
38#include <vm/vm_dep.h>
39#include <vm/hat_sfmmu.h>
40#include <vm/seg_kpm.h>
41#include <sys/cpuvar.h>
42#include <sys/cheetahregs.h>
43#include <sys/us3_module.h>
44#include <sys/async.h>
45#include <sys/cmn_err.h>
46#include <sys/debug.h>
47#include <sys/dditypes.h>
48#include <sys/prom_debug.h>
49#include <sys/prom_plat.h>
50#include <sys/cpu_module.h>
51#include <sys/sysmacros.h>
52#include <sys/intreg.h>
53#include <sys/clock.h>
54#include <sys/platform_module.h>
55#include <sys/machtrap.h>
56#include <sys/ontrap.h>
57#include <sys/panic.h>
58#include <sys/memlist.h>
59#include <sys/bootconf.h>
60#include <sys/ivintr.h>
61#include <sys/atomic.h>
62#include <sys/taskq.h>
63#include <sys/note.h>
64#include <sys/ndifm.h>
65#include <sys/ddifm.h>
66#include <sys/fm/protocol.h>
67#include <sys/fm/util.h>
68#include <sys/fm/cpu/UltraSPARC-III.h>
69#include <sys/fpras_impl.h>
70#include <sys/dtrace.h>
71#include <sys/watchpoint.h>
72#include <sys/plat_ecc_unum.h>
73#include <sys/cyclic.h>
74#include <sys/errorq.h>
75#include <sys/errclassify.h>
76#include <sys/pghw.h>
77#include <sys/clock_impl.h>
78
79#ifdef	CHEETAHPLUS_ERRATUM_25
80#include <sys/xc_impl.h>
81#endif	/* CHEETAHPLUS_ERRATUM_25 */
82
83ch_cpu_logout_t	clop_before_flush;
84ch_cpu_logout_t	clop_after_flush;
85uint_t	flush_retries_done = 0;
86/*
87 * Note that 'Cheetah PRM' refers to:
88 *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
89 */
90
91/*
92 * Per CPU pointers to physical address of TL>0 logout data areas.
93 * These pointers have to be in the kernel nucleus to avoid MMU
94 * misses.
95 */
96uint64_t ch_err_tl1_paddrs[NCPU];
97
98/*
99 * One statically allocated structure to use during startup/DR
100 * to prevent unnecessary panics.
101 */
102ch_err_tl1_data_t ch_err_tl1_data;
103
104/*
105 * Per CPU pending error at TL>0, used by level15 softint handler
106 */
107uchar_t ch_err_tl1_pending[NCPU];
108
109/*
110 * For deferred CE re-enable after trap.
111 */
112taskq_t		*ch_check_ce_tq;
113
114/*
115 * Internal functions.
116 */
117static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
118static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
119static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
120    ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
121static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt,
122    uint64_t t_afsr_bit);
123static int clear_ecc(struct async_flt *ecc);
124#if defined(CPU_IMP_ECACHE_ASSOC)
125static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
126#endif
127int cpu_ecache_set_size(struct cpu *cp);
128static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
129int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
130uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
131int cpu_ectag_pa_to_subblk_state(int cachesize,
132				uint64_t subaddr, uint64_t tag);
133static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
134static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
135static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
136static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
137static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
138static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
139static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *, char *buf);
140static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
141static void cpu_scrubphys(struct async_flt *aflt);
142static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
143    int *, int *);
144static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
145static void cpu_ereport_init(struct async_flt *aflt);
146static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
147static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
148static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
149    uint64_t nceen, ch_cpu_logout_t *clop);
150static int cpu_ce_delayed_ec_logout(uint64_t);
151static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
152static int cpu_error_is_ecache_data(int, uint64_t);
153static void cpu_fmri_cpu_set(nvlist_t *, int);
154static int cpu_error_to_resource_type(struct async_flt *aflt);
155
156#ifdef	CHEETAHPLUS_ERRATUM_25
157static int mondo_recover_proc(uint16_t, int);
158static void cheetah_nudge_init(void);
159static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
160    cyc_time_t *when);
161static void cheetah_nudge_buddy(void);
162#endif	/* CHEETAHPLUS_ERRATUM_25 */
163
164#if defined(CPU_IMP_L1_CACHE_PARITY)
165static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
166static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
167static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
168    ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
169static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
170static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
171static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
172static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
173static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
174static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
175#endif	/* CPU_IMP_L1_CACHE_PARITY */
176
177int (*p2get_mem_info)(int synd_code, uint64_t paddr,
178    uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
179    int *segsp, int *banksp, int *mcidp);
180
181/*
182 * This table is used to determine which bit(s) is(are) bad when an ECC
183 * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
184 * of this array have the following semantics:
185 *
186 *      00-127  The number of the bad bit, when only one bit is bad.
187 *      128     ECC bit C0 is bad.
188 *      129     ECC bit C1 is bad.
189 *      130     ECC bit C2 is bad.
190 *      131     ECC bit C3 is bad.
191 *      132     ECC bit C4 is bad.
192 *      133     ECC bit C5 is bad.
193 *      134     ECC bit C6 is bad.
194 *      135     ECC bit C7 is bad.
195 *      136     ECC bit C8 is bad.
196 *	137-143 reserved for Mtag Data and ECC.
197 *      144(M2) Two bits are bad within a nibble.
198 *      145(M3) Three bits are bad within a nibble.
199 *      146(M3) Four bits are bad within a nibble.
200 *      147(M)  Multiple bits (5 or more) are bad.
201 *      148     NO bits are bad.
202 * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
203 */
204
205#define	C0	128
206#define	C1	129
207#define	C2	130
208#define	C3	131
209#define	C4	132
210#define	C5	133
211#define	C6	134
212#define	C7	135
213#define	C8	136
214#define	MT0	137	/* Mtag Data bit 0 */
215#define	MT1	138
216#define	MT2	139
217#define	MTC0	140	/* Mtag Check bit 0 */
218#define	MTC1	141
219#define	MTC2	142
220#define	MTC3	143
221#define	M2	144
222#define	M3	145
223#define	M4	146
224#define	M	147
225#define	NA	148
226#if defined(JALAPENO) || defined(SERRANO)
227#define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
228#define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
229#define	SLAST	S003MEM	/* last special syndrome */
230#else /* JALAPENO || SERRANO */
231#define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
232#define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
233#define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
234#define	SLAST	S11C	/* last special syndrome */
235#endif /* JALAPENO || SERRANO */
236#if defined(JALAPENO) || defined(SERRANO)
237#define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
238#define	BPAR15	167
239#endif	/* JALAPENO || SERRANO */
240
241static uint8_t ecc_syndrome_tab[] =
242{
243NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
244C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
245C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
246M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
247C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
248M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
249M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
250#if defined(JALAPENO) || defined(SERRANO)
251116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
252#else	/* JALAPENO || SERRANO */
253116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
254#endif	/* JALAPENO || SERRANO */
255C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
256M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
257M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
258103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
259M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
260102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
26198,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
262M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
263C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
264#if defined(JALAPENO) || defined(SERRANO)
265M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
266#else	/* JALAPENO || SERRANO */
267M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
268#endif	/* JALAPENO || SERRANO */
269M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
27094,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
271M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
27289,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
27386,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
274M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
275M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
27677,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
27774,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
278M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
27980,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
280M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
281M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
282111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
283};
284
285#define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
286
287#if !(defined(JALAPENO) || defined(SERRANO))
288/*
289 * This table is used to determine which bit(s) is(are) bad when a Mtag
290 * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
291 * of this array have the following semantics:
292 *
293 *      -1	Invalid mtag syndrome.
294 *      137     Mtag Data 0 is bad.
295 *      138     Mtag Data 1 is bad.
296 *      139     Mtag Data 2 is bad.
297 *      140     Mtag ECC 0 is bad.
298 *      141     Mtag ECC 1 is bad.
299 *      142     Mtag ECC 2 is bad.
300 *      143     Mtag ECC 3 is bad.
301 * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
302 */
303short mtag_syndrome_tab[] =
304{
305NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
306};
307
308#define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
309
310#else /* !(JALAPENO || SERRANO) */
311
312#define	BSYND_TBL_SIZE	16
313
314#endif /* !(JALAPENO || SERRANO) */
315
316/*
317 * Virtual Address bit flag in the data cache. This is actually bit 2 in the
318 * dcache data tag.
319 */
320#define	VA13	INT64_C(0x0000000000000002)
321
322/*
323 * Types returned from cpu_error_to_resource_type()
324 */
325#define	ERRTYPE_UNKNOWN		0
326#define	ERRTYPE_CPU		1
327#define	ERRTYPE_MEMORY		2
328#define	ERRTYPE_ECACHE_DATA	3
329
330/*
331 * CE initial classification and subsequent action lookup table
332 */
333static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
334static int ce_disp_inited;
335
336/*
337 * Set to disable leaky and partner check for memory correctables
338 */
339int ce_xdiag_off;
340
341/*
342 * The following are not incremented atomically so are indicative only
343 */
344static int ce_xdiag_drops;
345static int ce_xdiag_lkydrops;
346static int ce_xdiag_ptnrdrops;
347static int ce_xdiag_bad;
348
349/*
350 * CE leaky check callback structure
351 */
352typedef struct {
353	struct async_flt *lkycb_aflt;
354	errorq_t *lkycb_eqp;
355	errorq_elem_t *lkycb_eqep;
356} ce_lkychk_cb_t;
357
358/*
359 * defines for various ecache_flush_flag's
360 */
361#define	ECACHE_FLUSH_LINE	1
362#define	ECACHE_FLUSH_ALL	2
363
364/*
365 * STICK sync
366 */
367#define	STICK_ITERATION 10
368#define	MAX_TSKEW	1
369#define	EV_A_START	0
370#define	EV_A_END	1
371#define	EV_B_START	2
372#define	EV_B_END	3
373#define	EVENTS		4
374
375static int64_t stick_iter = STICK_ITERATION;
376static int64_t stick_tsk = MAX_TSKEW;
377
378typedef enum {
379	EVENT_NULL = 0,
380	SLAVE_START,
381	SLAVE_CONT,
382	MASTER_START
383} event_cmd_t;
384
385static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
386static int64_t timestamp[EVENTS];
387static volatile int slave_done;
388
389#ifdef DEBUG
390#define	DSYNC_ATTEMPTS 64
391typedef struct {
392	int64_t	skew_val[DSYNC_ATTEMPTS];
393} ss_t;
394
395ss_t stick_sync_stats[NCPU];
396#endif /* DEBUG */
397
398uint_t cpu_impl_dual_pgsz = 0;
399#if defined(CPU_IMP_DUAL_PAGESIZE)
400uint_t disable_dual_pgsz = 0;
401#endif	/* CPU_IMP_DUAL_PAGESIZE */
402
403/*
404 * Save the cache bootup state for use when internal
405 * caches are to be re-enabled after an error occurs.
406 */
407uint64_t cache_boot_state;
408
409/*
410 * PA[22:0] represent Displacement in Safari configuration space.
411 */
412uint_t	root_phys_addr_lo_mask = 0x7fffffu;
413
414bus_config_eclk_t bus_config_eclk[] = {
415#if defined(JALAPENO) || defined(SERRANO)
416	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
417	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
418	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
419#else /* JALAPENO || SERRANO */
420	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
421	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
422	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
423#endif /* JALAPENO || SERRANO */
424	{0, 0}
425};
426
427/*
428 * Interval for deferred CEEN reenable
429 */
430int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
431
432/*
433 * set in /etc/system to control logging of user BERR/TO's
434 */
435int cpu_berr_to_verbose = 0;
436
437/*
438 * set to 0 in /etc/system to defer CEEN reenable for all CEs
439 */
440uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
441uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
442
443/*
444 * Set of all offline cpus
445 */
446cpuset_t cpu_offline_set;
447
448static void cpu_delayed_check_ce_errors(void *);
449static void cpu_check_ce_errors(void *);
450void cpu_error_ecache_flush(ch_async_flt_t *);
451static int cpu_error_ecache_flush_required(ch_async_flt_t *);
452static void cpu_log_and_clear_ce(ch_async_flt_t *);
453void cpu_ce_detected(ch_cpu_errors_t *, int);
454
455/*
456 * CE Leaky check timeout in microseconds.  This is chosen to be twice the
457 * memory refresh interval of current DIMMs (64ms).  After initial fix that
458 * gives at least one full refresh cycle in which the cell can leak
459 * (whereafter further refreshes simply reinforce any incorrect bit value).
460 */
461clock_t cpu_ce_lkychk_timeout_usec = 128000;
462
463/*
464 * CE partner check partner caching period in seconds
465 */
466int cpu_ce_ptnr_cachetime_sec = 60;
467
468/*
469 * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
470 */
471#define	CH_SET_TRAP(ttentry, ttlabel)			\
472		bcopy((const void *)&ttlabel, &ttentry, 32);		\
473		flush_instr_mem((caddr_t)&ttentry, 32);
474
475static int min_ecache_size;
476static uint_t priv_hcl_1;
477static uint_t priv_hcl_2;
478static uint_t priv_hcl_4;
479static uint_t priv_hcl_8;
480
481void
482cpu_setup(void)
483{
484	extern int at_flags;
485	extern int cpc_has_overflow_intr;
486
487	/*
488	 * Setup chip-specific trap handlers.
489	 */
490	cpu_init_trap();
491
492	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
493
494	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
495
496	/*
497	 * save the cache bootup state.
498	 */
499	cache_boot_state = get_dcu() & DCU_CACHE;
500
501	/*
502	 * Due to the number of entries in the fully-associative tlb
503	 * this may have to be tuned lower than in spitfire.
504	 */
505	pp_slots = MIN(8, MAXPP_SLOTS);
506
507	/*
508	 * Block stores do not invalidate all pages of the d$, pagecopy
509	 * et. al. need virtual translations with virtual coloring taken
510	 * into consideration.  prefetch/ldd will pollute the d$ on the
511	 * load side.
512	 */
513	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
514
515	if (use_page_coloring) {
516		do_pg_coloring = 1;
517	}
518
519	isa_list =
520	    "sparcv9+vis2 sparcv9+vis sparcv9 "
521	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
522	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
523
524	/*
525	 * On Panther-based machines, this should
526	 * also include AV_SPARC_POPC too
527	 */
528	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
529
530	/*
531	 * On cheetah, there's no hole in the virtual address space
532	 */
533	hole_start = hole_end = 0;
534
535	/*
536	 * The kpm mapping window.
537	 * kpm_size:
538	 *	The size of a single kpm range.
539	 *	The overall size will be: kpm_size * vac_colors.
540	 * kpm_vbase:
541	 *	The virtual start address of the kpm range within the kernel
542	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
543	 */
544	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
545	kpm_size_shift = 43;
546	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
547	kpm_smallpages = 1;
548
549	/*
550	 * The traptrace code uses either %tick or %stick for
551	 * timestamping.  We have %stick so we can use it.
552	 */
553	traptrace_use_stick = 1;
554
555	/*
556	 * Cheetah has a performance counter overflow interrupt
557	 */
558	cpc_has_overflow_intr = 1;
559
560#if defined(CPU_IMP_DUAL_PAGESIZE)
561	/*
562	 * Use Cheetah+ and later dual page size support.
563	 */
564	if (!disable_dual_pgsz) {
565		cpu_impl_dual_pgsz = 1;
566	}
567#endif	/* CPU_IMP_DUAL_PAGESIZE */
568
569	/*
570	 * Declare that this architecture/cpu combination does fpRAS.
571	 */
572	fpras_implemented = 1;
573
574	/*
575	 * Setup CE lookup table
576	 */
577	CE_INITDISPTBL_POPULATE(ce_disp_table);
578	ce_disp_inited = 1;
579}
580
581/*
582 * Called by setcpudelay
583 */
584void
585cpu_init_tick_freq(void)
586{
587	/*
588	 * For UltraSPARC III and beyond we want to use the
589	 * system clock rate as the basis for low level timing,
590	 * due to support of mixed speed CPUs and power managment.
591	 */
592	if (system_clock_freq == 0)
593		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
594
595	sys_tick_freq = system_clock_freq;
596}
597
598#ifdef CHEETAHPLUS_ERRATUM_25
599/*
600 * Tunables
601 */
602int cheetah_bpe_off = 0;
603int cheetah_sendmondo_recover = 1;
604int cheetah_sendmondo_fullscan = 0;
605int cheetah_sendmondo_recover_delay = 5;
606
607#define	CHEETAH_LIVELOCK_MIN_DELAY	1
608
609/*
610 * Recovery Statistics
611 */
612typedef struct cheetah_livelock_entry	{
613	int cpuid;		/* fallen cpu */
614	int buddy;		/* cpu that ran recovery */
615	clock_t lbolt;		/* when recovery started */
616	hrtime_t recovery_time;	/* time spent in recovery */
617} cheetah_livelock_entry_t;
618
619#define	CHEETAH_LIVELOCK_NENTRY	32
620
621cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
622int cheetah_livelock_entry_nxt;
623
624#define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
625	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
626	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
627		cheetah_livelock_entry_nxt = 0;				\
628	}								\
629}
630
631#define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
632
633struct {
634	hrtime_t hrt;		/* maximum recovery time */
635	int recovery;		/* recovered */
636	int full_claimed;	/* maximum pages claimed in full recovery */
637	int proc_entry;		/* attempted to claim TSB */
638	int proc_tsb_scan;	/* tsb scanned */
639	int proc_tsb_partscan;	/* tsb partially scanned */
640	int proc_tsb_fullscan;	/* whole tsb scanned */
641	int proc_claimed;	/* maximum pages claimed in tsb scan */
642	int proc_user;		/* user thread */
643	int proc_kernel;	/* kernel thread */
644	int proc_onflt;		/* bad stack */
645	int proc_cpu;		/* null cpu */
646	int proc_thread;	/* null thread */
647	int proc_proc;		/* null proc */
648	int proc_as;		/* null as */
649	int proc_hat;		/* null hat */
650	int proc_hat_inval;	/* hat contents don't make sense */
651	int proc_hat_busy;	/* hat is changing TSBs */
652	int proc_tsb_reloc;	/* TSB skipped because being relocated */
653	int proc_cnum_bad;	/* cnum out of range */
654	int proc_cnum;		/* last cnum processed */
655	tte_t proc_tte;		/* last tte processed */
656} cheetah_livelock_stat;
657
658#define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
659
660#define	CHEETAH_LIVELOCK_STATSET(item, value)		\
661	cheetah_livelock_stat.item = value
662
663#define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
664	if (value > cheetah_livelock_stat.item)		\
665		cheetah_livelock_stat.item = value;	\
666}
667
668/*
669 * Attempt to recover a cpu by claiming every cache line as saved
670 * in the TSB that the non-responsive cpu is using. Since we can't
671 * grab any adaptive lock, this is at best an attempt to do so. Because
672 * we don't grab any locks, we must operate under the protection of
673 * on_fault().
674 *
675 * Return 1 if cpuid could be recovered, 0 if failed.
676 */
677int
678mondo_recover_proc(uint16_t cpuid, int bn)
679{
680	label_t ljb;
681	cpu_t *cp;
682	kthread_t *t;
683	proc_t *p;
684	struct as *as;
685	struct hat *hat;
686	uint_t  cnum;
687	struct tsb_info *tsbinfop;
688	struct tsbe *tsbep;
689	caddr_t tsbp;
690	caddr_t end_tsbp;
691	uint64_t paddr;
692	uint64_t idsr;
693	u_longlong_t pahi, palo;
694	int pages_claimed = 0;
695	tte_t tsbe_tte;
696	int tried_kernel_tsb = 0;
697	mmu_ctx_t *mmu_ctxp;
698
699	CHEETAH_LIVELOCK_STAT(proc_entry);
700
701	if (on_fault(&ljb)) {
702		CHEETAH_LIVELOCK_STAT(proc_onflt);
703		goto badstruct;
704	}
705
706	if ((cp = cpu[cpuid]) == NULL) {
707		CHEETAH_LIVELOCK_STAT(proc_cpu);
708		goto badstruct;
709	}
710
711	if ((t = cp->cpu_thread) == NULL) {
712		CHEETAH_LIVELOCK_STAT(proc_thread);
713		goto badstruct;
714	}
715
716	if ((p = ttoproc(t)) == NULL) {
717		CHEETAH_LIVELOCK_STAT(proc_proc);
718		goto badstruct;
719	}
720
721	if ((as = p->p_as) == NULL) {
722		CHEETAH_LIVELOCK_STAT(proc_as);
723		goto badstruct;
724	}
725
726	if ((hat = as->a_hat) == NULL) {
727		CHEETAH_LIVELOCK_STAT(proc_hat);
728		goto badstruct;
729	}
730
731	if (hat != ksfmmup) {
732		CHEETAH_LIVELOCK_STAT(proc_user);
733		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
734			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
735			goto badstruct;
736		}
737		tsbinfop = hat->sfmmu_tsb;
738		if (tsbinfop == NULL) {
739			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
740			goto badstruct;
741		}
742		tsbp = tsbinfop->tsb_va;
743		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
744	} else {
745		CHEETAH_LIVELOCK_STAT(proc_kernel);
746		tsbinfop = NULL;
747		tsbp = ktsb_base;
748		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
749	}
750
751	/* Verify as */
752	if (hat->sfmmu_as != as) {
753		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
754		goto badstruct;
755	}
756
757	mmu_ctxp = CPU_MMU_CTXP(cp);
758	ASSERT(mmu_ctxp);
759	cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum;
760	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
761
762	if ((cnum < 0) || (cnum == INVALID_CONTEXT) ||
763	    (cnum >= mmu_ctxp->mmu_nctxs)) {
764		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
765		goto badstruct;
766	}
767
768	do {
769		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
770
771		/*
772		 * Skip TSBs being relocated.  This is important because
773		 * we want to avoid the following deadlock scenario:
774		 *
775		 * 1) when we came in we set ourselves to "in recover" state.
776		 * 2) when we try to touch TSB being relocated the mapping
777		 *    will be in the suspended state so we'll spin waiting
778		 *    for it to be unlocked.
779		 * 3) when the CPU that holds the TSB mapping locked tries to
780		 *    unlock it it will send a xtrap which will fail to xcall
781		 *    us or the CPU we're trying to recover, and will in turn
782		 *    enter the mondo code.
783		 * 4) since we are still spinning on the locked mapping
784		 *    no further progress will be made and the system will
785		 *    inevitably hard hang.
786		 *
787		 * A TSB not being relocated can't begin being relocated
788		 * while we're accessing it because we check
789		 * sendmondo_in_recover before relocating TSBs.
790		 */
791		if (hat != ksfmmup &&
792		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
793			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
794			goto next_tsbinfo;
795		}
796
797		for (tsbep = (struct tsbe *)tsbp;
798		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
799			tsbe_tte = tsbep->tte_data;
800
801			if (tsbe_tte.tte_val == 0) {
802				/*
803				 * Invalid tte
804				 */
805				continue;
806			}
807			if (tsbe_tte.tte_se) {
808				/*
809				 * Don't want device registers
810				 */
811				continue;
812			}
813			if (tsbe_tte.tte_cp == 0) {
814				/*
815				 * Must be cached in E$
816				 */
817				continue;
818			}
819			if (tsbep->tte_tag.tag_invalid != 0) {
820				/*
821				 * Invalid tag, ingnore this entry.
822				 */
823				continue;
824			}
825			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
826			idsr = getidsr();
827			if ((idsr & (IDSR_NACK_BIT(bn) |
828			    IDSR_BUSY_BIT(bn))) == 0) {
829				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
830				goto done;
831			}
832			pahi = tsbe_tte.tte_pahi;
833			palo = tsbe_tte.tte_palo;
834			paddr = (uint64_t)((pahi << 32) |
835			    (palo << MMU_PAGESHIFT));
836			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
837			    CH_ECACHE_SUBBLK_SIZE);
838			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
839				shipit(cpuid, bn);
840			}
841			pages_claimed++;
842		}
843next_tsbinfo:
844		if (tsbinfop != NULL)
845			tsbinfop = tsbinfop->tsb_next;
846		if (tsbinfop != NULL) {
847			tsbp = tsbinfop->tsb_va;
848			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
849		} else if (tsbp == ktsb_base) {
850			tried_kernel_tsb = 1;
851		} else if (!tried_kernel_tsb) {
852			tsbp = ktsb_base;
853			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
854			hat = ksfmmup;
855			tsbinfop = NULL;
856		}
857	} while (tsbinfop != NULL ||
858	    ((tsbp == ktsb_base) && !tried_kernel_tsb));
859
860	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
861	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
862	no_fault();
863	idsr = getidsr();
864	if ((idsr & (IDSR_NACK_BIT(bn) |
865	    IDSR_BUSY_BIT(bn))) == 0) {
866		return (1);
867	} else {
868		return (0);
869	}
870
871done:
872	no_fault();
873	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
874	return (1);
875
876badstruct:
877	no_fault();
878	return (0);
879}
880
881/*
882 * Attempt to claim ownership, temporarily, of every cache line that a
883 * non-responsive cpu might be using.  This might kick that cpu out of
884 * this state.
885 *
886 * The return value indicates to the caller if we have exhausted all recovery
887 * techniques. If 1 is returned, it is useless to call this function again
888 * even for a different target CPU.
889 */
890int
891mondo_recover(uint16_t cpuid, int bn)
892{
893	struct memseg *seg;
894	uint64_t begin_pa, end_pa, cur_pa;
895	hrtime_t begin_hrt, end_hrt;
896	int retval = 0;
897	int pages_claimed = 0;
898	cheetah_livelock_entry_t *histp;
899	uint64_t idsr;
900
901	if (atomic_cas_32(&sendmondo_in_recover, 0, 1) != 0) {
902		/*
903		 * Wait while recovery takes place
904		 */
905		while (sendmondo_in_recover) {
906			drv_usecwait(1);
907		}
908		/*
909		 * Assume we didn't claim the whole memory. If
910		 * the target of this caller is not recovered,
911		 * it will come back.
912		 */
913		return (retval);
914	}
915
916	CHEETAH_LIVELOCK_ENTRY_NEXT(histp);
917	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, LBOLT_WAITFREE);
918	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
919	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
920
921	begin_hrt = gethrtime_waitfree();
922	/*
923	 * First try to claim the lines in the TSB the target
924	 * may have been using.
925	 */
926	if (mondo_recover_proc(cpuid, bn) == 1) {
927		/*
928		 * Didn't claim the whole memory
929		 */
930		goto done;
931	}
932
933	/*
934	 * We tried using the TSB. The target is still
935	 * not recovered. Check if complete memory scan is
936	 * enabled.
937	 */
938	if (cheetah_sendmondo_fullscan == 0) {
939		/*
940		 * Full memory scan is disabled.
941		 */
942		retval = 1;
943		goto done;
944	}
945
946	/*
947	 * Try claiming the whole memory.
948	 */
949	for (seg = memsegs; seg; seg = seg->next) {
950		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
951		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
952		for (cur_pa = begin_pa; cur_pa < end_pa;
953		    cur_pa += MMU_PAGESIZE) {
954			idsr = getidsr();
955			if ((idsr & (IDSR_NACK_BIT(bn) |
956			    IDSR_BUSY_BIT(bn))) == 0) {
957				/*
958				 * Didn't claim all memory
959				 */
960				goto done;
961			}
962			claimlines(cur_pa, MMU_PAGESIZE,
963			    CH_ECACHE_SUBBLK_SIZE);
964			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
965				shipit(cpuid, bn);
966			}
967			pages_claimed++;
968		}
969	}
970
971	/*
972	 * We did all we could.
973	 */
974	retval = 1;
975
976done:
977	/*
978	 * Update statistics
979	 */
980	end_hrt = gethrtime_waitfree();
981	CHEETAH_LIVELOCK_STAT(recovery);
982	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
983	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
984	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
985	    (end_hrt -  begin_hrt));
986
987	while (atomic_cas_32(&sendmondo_in_recover, 1, 0) != 1)
988		;
989
990	return (retval);
991}
992
993/*
994 * This is called by the cyclic framework when this CPU becomes online
995 */
996/*ARGSUSED*/
997static void
998cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
999{
1000
1001	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
1002	hdlr->cyh_level = CY_LOW_LEVEL;
1003	hdlr->cyh_arg = NULL;
1004
1005	/*
1006	 * Stagger the start time
1007	 */
1008	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1009	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1010		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1011	}
1012	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1013}
1014
1015/*
1016 * Create a low level cyclic to send a xtrap to the next cpu online.
1017 * However, there's no need to have this running on a uniprocessor system.
1018 */
1019static void
1020cheetah_nudge_init(void)
1021{
1022	cyc_omni_handler_t hdlr;
1023
1024	if (max_ncpus == 1) {
1025		return;
1026	}
1027
1028	hdlr.cyo_online = cheetah_nudge_onln;
1029	hdlr.cyo_offline = NULL;
1030	hdlr.cyo_arg = NULL;
1031
1032	mutex_enter(&cpu_lock);
1033	(void) cyclic_add_omni(&hdlr);
1034	mutex_exit(&cpu_lock);
1035}
1036
1037/*
1038 * Cyclic handler to wake up buddy
1039 */
1040void
1041cheetah_nudge_buddy(void)
1042{
1043	/*
1044	 * Disable kernel preemption to protect the cpu list
1045	 */
1046	kpreempt_disable();
1047	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1048		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1049		    0, 0);
1050	}
1051	kpreempt_enable();
1052}
1053
1054#endif	/* CHEETAHPLUS_ERRATUM_25 */
1055
1056#ifdef SEND_MONDO_STATS
1057uint32_t x_one_stimes[64];
1058uint32_t x_one_ltimes[16];
1059uint32_t x_set_stimes[64];
1060uint32_t x_set_ltimes[16];
1061uint32_t x_set_cpus[NCPU];
1062uint32_t x_nack_stimes[64];
1063#endif
1064
1065/*
1066 * Note: A version of this function is used by the debugger via the KDI,
1067 * and must be kept in sync with this version.  Any changes made to this
1068 * function to support new chips or to accomodate errata must also be included
1069 * in the KDI-specific version.  See us3_kdi.c.
1070 */
1071void
1072send_one_mondo(int cpuid)
1073{
1074	int busy, nack;
1075	uint64_t idsr, starttick, endtick, tick, lasttick;
1076	uint64_t busymask;
1077#ifdef	CHEETAHPLUS_ERRATUM_25
1078	int recovered = 0;
1079#endif
1080
1081	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1082	starttick = lasttick = gettick();
1083	shipit(cpuid, 0);
1084	endtick = starttick + xc_tick_limit;
1085	busy = nack = 0;
1086#if defined(JALAPENO) || defined(SERRANO)
1087	/*
1088	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1089	 * will be used for dispatching interrupt. For now, assume
1090	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1091	 * issues with respect to BUSY/NACK pair usage.
1092	 */
1093	busymask  = IDSR_BUSY_BIT(cpuid);
1094#else /* JALAPENO || SERRANO */
1095	busymask = IDSR_BUSY;
1096#endif /* JALAPENO || SERRANO */
1097	for (;;) {
1098		idsr = getidsr();
1099		if (idsr == 0)
1100			break;
1101
1102		tick = gettick();
1103		/*
1104		 * If there is a big jump between the current tick
1105		 * count and lasttick, we have probably hit a break
1106		 * point.  Adjust endtick accordingly to avoid panic.
1107		 */
1108		if (tick > (lasttick + xc_tick_jump_limit))
1109			endtick += (tick - lasttick);
1110		lasttick = tick;
1111		if (tick > endtick) {
1112			if (panic_quiesce)
1113				return;
1114#ifdef	CHEETAHPLUS_ERRATUM_25
1115			if (cheetah_sendmondo_recover && recovered == 0) {
1116				if (mondo_recover(cpuid, 0)) {
1117					/*
1118					 * We claimed the whole memory or
1119					 * full scan is disabled.
1120					 */
1121					recovered++;
1122				}
1123				tick = gettick();
1124				endtick = tick + xc_tick_limit;
1125				lasttick = tick;
1126				/*
1127				 * Recheck idsr
1128				 */
1129				continue;
1130			} else
1131#endif	/* CHEETAHPLUS_ERRATUM_25 */
1132			{
1133				cmn_err(CE_PANIC, "send mondo timeout "
1134				    "(target 0x%x) [%d NACK %d BUSY]",
1135				    cpuid, nack, busy);
1136			}
1137		}
1138
1139		if (idsr & busymask) {
1140			busy++;
1141			continue;
1142		}
1143		drv_usecwait(1);
1144		shipit(cpuid, 0);
1145		nack++;
1146		busy = 0;
1147	}
1148#ifdef SEND_MONDO_STATS
1149	{
1150		int n = gettick() - starttick;
1151		if (n < 8192)
1152			x_one_stimes[n >> 7]++;
1153		else
1154			x_one_ltimes[(n >> 13) & 0xf]++;
1155	}
1156#endif
1157}
1158
1159void
1160syncfpu(void)
1161{
1162}
1163
1164/*
1165 * Return processor specific async error structure
1166 * size used.
1167 */
1168int
1169cpu_aflt_size(void)
1170{
1171	return (sizeof (ch_async_flt_t));
1172}
1173
1174/*
1175 * Tunable to disable the checking of other cpu logout areas during panic for
1176 * potential syndrome 71 generating errors.
1177 */
1178int enable_check_other_cpus_logout = 1;
1179
1180/*
1181 * Check other cpus logout area for potential synd 71 generating
1182 * errors.
1183 */
1184static void
1185cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1186    ch_cpu_logout_t *clop)
1187{
1188	struct async_flt *aflt;
1189	ch_async_flt_t ch_flt;
1190	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1191
1192	if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1193		return;
1194	}
1195
1196	bzero(&ch_flt, sizeof (ch_async_flt_t));
1197
1198	t_afar = clop->clo_data.chd_afar;
1199	t_afsr = clop->clo_data.chd_afsr;
1200	t_afsr_ext = clop->clo_data.chd_afsr_ext;
1201#if defined(SERRANO)
1202	ch_flt.afar2 = clop->clo_data.chd_afar2;
1203#endif	/* SERRANO */
1204
1205	/*
1206	 * In order to simplify code, we maintain this afsr_errs
1207	 * variable which holds the aggregate of AFSR and AFSR_EXT
1208	 * sticky bits.
1209	 */
1210	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1211	    (t_afsr & C_AFSR_ALL_ERRS);
1212
1213	/* Setup the async fault structure */
1214	aflt = (struct async_flt *)&ch_flt;
1215	aflt->flt_id = gethrtime_waitfree();
1216	ch_flt.afsr_ext = t_afsr_ext;
1217	ch_flt.afsr_errs = t_afsr_errs;
1218	aflt->flt_stat = t_afsr;
1219	aflt->flt_addr = t_afar;
1220	aflt->flt_bus_id = cpuid;
1221	aflt->flt_inst = cpuid;
1222	aflt->flt_pc = tpc;
1223	aflt->flt_prot = AFLT_PROT_NONE;
1224	aflt->flt_class = CPU_FAULT;
1225	aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1226	aflt->flt_tl = tl;
1227	aflt->flt_status = ecc_type;
1228	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1229
1230	/*
1231	 * Queue events on the async event queue, one event per error bit.
1232	 * If no events are queued, queue an event to complain.
1233	 */
1234	if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1235		ch_flt.flt_type = CPU_INV_AFSR;
1236		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1237		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1238		    aflt->flt_panic);
1239	}
1240
1241	/*
1242	 * Zero out + invalidate CPU logout.
1243	 */
1244	bzero(clop, sizeof (ch_cpu_logout_t));
1245	clop->clo_data.chd_afar = LOGOUT_INVALID;
1246}
1247
1248/*
1249 * Check the logout areas of all other cpus for unlogged errors.
1250 */
1251static void
1252cpu_check_other_cpus_logout(void)
1253{
1254	int i, j;
1255	processorid_t myid;
1256	struct cpu *cp;
1257	ch_err_tl1_data_t *cl1p;
1258
1259	myid = CPU->cpu_id;
1260	for (i = 0; i < NCPU; i++) {
1261		cp = cpu[i];
1262
1263		if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1264		    (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1265			continue;
1266		}
1267
1268		/*
1269		 * Check each of the tl>0 logout areas
1270		 */
1271		cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1272		for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1273			if (cl1p->ch_err_tl1_flags == 0)
1274				continue;
1275
1276			cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1277			    1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1278		}
1279
1280		/*
1281		 * Check each of the remaining logout areas
1282		 */
1283		cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1284		    CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1285		cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1286		    CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1287		cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1288		    CPU_PRIVATE_PTR(cp, chpr_async_logout));
1289	}
1290}
1291
1292/*
1293 * The fast_ecc_err handler transfers control here for UCU, UCC events.
1294 * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1295 * flush the error that caused the UCU/UCC, then again here at the end to
1296 * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1297 * the probability of getting a TL>1 Fast ECC trap when we're fielding
1298 * another Fast ECC trap.
1299 *
1300 * Cheetah+ also handles: TSCE: No additional processing required.
1301 * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1302 *
1303 * Note that the p_clo_flags input is only valid in cases where the
1304 * cpu_private struct is not yet initialized (since that is the only
1305 * time that information cannot be obtained from the logout struct.)
1306 */
1307/*ARGSUSED*/
1308void
1309cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1310{
1311	ch_cpu_logout_t *clop;
1312	uint64_t ceen, nceen;
1313
1314	/*
1315	 * Get the CPU log out info. If we can't find our CPU private
1316	 * pointer, then we will have to make due without any detailed
1317	 * logout information.
1318	 */
1319	if (CPU_PRIVATE(CPU) == NULL) {
1320		clop = NULL;
1321		ceen = p_clo_flags & EN_REG_CEEN;
1322		nceen = p_clo_flags & EN_REG_NCEEN;
1323	} else {
1324		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1325		ceen = clop->clo_flags & EN_REG_CEEN;
1326		nceen = clop->clo_flags & EN_REG_NCEEN;
1327	}
1328
1329	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1330	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1331}
1332
1333/*
1334 * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1335 * ECC at TL>0.  Need to supply either a error register pointer or a
1336 * cpu logout structure pointer.
1337 */
1338static void
1339cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1340    uint64_t nceen, ch_cpu_logout_t *clop)
1341{
1342	struct async_flt *aflt;
1343	ch_async_flt_t ch_flt;
1344	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1345	char pr_reason[MAX_REASON_STRING];
1346	ch_cpu_errors_t cpu_error_regs;
1347
1348	bzero(&ch_flt, sizeof (ch_async_flt_t));
1349	/*
1350	 * If no cpu logout data, then we will have to make due without
1351	 * any detailed logout information.
1352	 */
1353	if (clop == NULL) {
1354		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1355		get_cpu_error_state(&cpu_error_regs);
1356		set_cpu_error_state(&cpu_error_regs);
1357		t_afar = cpu_error_regs.afar;
1358		t_afsr = cpu_error_regs.afsr;
1359		t_afsr_ext = cpu_error_regs.afsr_ext;
1360#if defined(SERRANO)
1361		ch_flt.afar2 = cpu_error_regs.afar2;
1362#endif	/* SERRANO */
1363	} else {
1364		t_afar = clop->clo_data.chd_afar;
1365		t_afsr = clop->clo_data.chd_afsr;
1366		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1367#if defined(SERRANO)
1368		ch_flt.afar2 = clop->clo_data.chd_afar2;
1369#endif	/* SERRANO */
1370	}
1371
1372	/*
1373	 * In order to simplify code, we maintain this afsr_errs
1374	 * variable which holds the aggregate of AFSR and AFSR_EXT
1375	 * sticky bits.
1376	 */
1377	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1378	    (t_afsr & C_AFSR_ALL_ERRS);
1379	pr_reason[0] = '\0';
1380
1381	/* Setup the async fault structure */
1382	aflt = (struct async_flt *)&ch_flt;
1383	aflt->flt_id = gethrtime_waitfree();
1384	ch_flt.afsr_ext = t_afsr_ext;
1385	ch_flt.afsr_errs = t_afsr_errs;
1386	aflt->flt_stat = t_afsr;
1387	aflt->flt_addr = t_afar;
1388	aflt->flt_bus_id = getprocessorid();
1389	aflt->flt_inst = CPU->cpu_id;
1390	aflt->flt_pc = tpc;
1391	aflt->flt_prot = AFLT_PROT_NONE;
1392	aflt->flt_class = CPU_FAULT;
1393	aflt->flt_priv = priv;
1394	aflt->flt_tl = tl;
1395	aflt->flt_status = ECC_F_TRAP;
1396	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1397
1398	/*
1399	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1400	 * cmn_err messages out to the console.  The situation is a UCU (in
1401	 * priv mode) which causes a WDU which causes a UE (on the retry).
1402	 * The messages for the UCU and WDU are enqueued and then pulled off
1403	 * the async queue via softint and syslogd starts to process them
1404	 * but doesn't get them to the console.  The UE causes a panic, but
1405	 * since the UCU/WDU messages are already in transit, those aren't
1406	 * on the async queue.  The hack is to check if we have a matching
1407	 * WDU event for the UCU, and if it matches, we're more than likely
1408	 * going to panic with a UE, unless we're under protection.  So, we
1409	 * check to see if we got a matching WDU event and if we're under
1410	 * protection.
1411	 *
1412	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1413	 * looks like this:
1414	 *    UCU->WDU->UE
1415	 * For Panther, it could look like either of these:
1416	 *    UCU---->WDU->L3_WDU->UE
1417	 *    L3_UCU->WDU->L3_WDU->UE
1418	 */
1419	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1420	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1421	    curthread->t_ontrap == NULL &&
1422	    curthread->t_lofault == (uintptr_t)NULL) {
1423		get_cpu_error_state(&cpu_error_regs);
1424		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
1425			aflt->flt_panic |=
1426			    ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1427			    (cpu_error_regs.afsr_ext & C_AFSR_L3_WDU) &&
1428			    (cpu_error_regs.afar == t_afar));
1429			aflt->flt_panic |= ((clop == NULL) &&
1430			    (t_afsr_errs & C_AFSR_WDU) &&
1431			    (t_afsr_errs & C_AFSR_L3_WDU));
1432		} else {
1433			aflt->flt_panic |=
1434			    ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1435			    (cpu_error_regs.afar == t_afar));
1436			aflt->flt_panic |= ((clop == NULL) &&
1437			    (t_afsr_errs & C_AFSR_WDU));
1438		}
1439	}
1440
1441	/*
1442	 * Queue events on the async event queue, one event per error bit.
1443	 * If no events are queued or no Fast ECC events are on in the AFSR,
1444	 * queue an event to complain.
1445	 */
1446	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1447	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1448		ch_flt.flt_type = CPU_INV_AFSR;
1449		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1450		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1451		    aflt->flt_panic);
1452	}
1453
1454	/*
1455	 * Zero out + invalidate CPU logout.
1456	 */
1457	if (clop) {
1458		bzero(clop, sizeof (ch_cpu_logout_t));
1459		clop->clo_data.chd_afar = LOGOUT_INVALID;
1460	}
1461
1462	/*
1463	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1464	 * or disrupting errors have happened.  We do this because if a
1465	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1466	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1467	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1468	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1469	 * deferred or disrupting error happening between checking the AFSR and
1470	 * enabling NCEEN/CEEN.
1471	 *
1472	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1473	 * taken.
1474	 */
1475	set_error_enable(get_error_enable() | (nceen | ceen));
1476	if (clear_errors(&ch_flt)) {
1477		aflt->flt_panic |= ((ch_flt.afsr_errs &
1478		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1479		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1480		    NULL);
1481	}
1482
1483	/*
1484	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1485	 * be logged as part of the panic flow.
1486	 */
1487	if (aflt->flt_panic)
1488		fm_panic("%sError(s)", pr_reason);
1489
1490	/*
1491	 * Flushing the Ecache here gets the part of the trap handler that
1492	 * is run at TL=1 out of the Ecache.
1493	 */
1494	cpu_flush_ecache();
1495}
1496
1497/*
1498 * This is called via sys_trap from pil15_interrupt code if the
1499 * corresponding entry in ch_err_tl1_pending is set.  Checks the
1500 * various ch_err_tl1_data structures for valid entries based on the bit
1501 * settings in the ch_err_tl1_flags entry of the structure.
1502 */
1503/*ARGSUSED*/
1504void
1505cpu_tl1_error(struct regs *rp, int panic)
1506{
1507	ch_err_tl1_data_t *cl1p, cl1;
1508	int i, ncl1ps;
1509	uint64_t me_flags;
1510	uint64_t ceen, nceen;
1511
1512	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1513		cl1p = &ch_err_tl1_data;
1514		ncl1ps = 1;
1515	} else if (CPU_PRIVATE(CPU) != NULL) {
1516		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1517		ncl1ps = CH_ERR_TL1_TLMAX;
1518	} else {
1519		ncl1ps = 0;
1520	}
1521
1522	for (i = 0; i < ncl1ps; i++, cl1p++) {
1523		if (cl1p->ch_err_tl1_flags == 0)
1524			continue;
1525
1526		/*
1527		 * Grab a copy of the logout data and invalidate
1528		 * the logout area.
1529		 */
1530		cl1 = *cl1p;
1531		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1532		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1533		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1534
1535		/*
1536		 * Log "first error" in ch_err_tl1_data.
1537		 */
1538		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1539			ceen = get_error_enable() & EN_REG_CEEN;
1540			nceen = get_error_enable() & EN_REG_NCEEN;
1541			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1542			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
1543		}
1544#if defined(CPU_IMP_L1_CACHE_PARITY)
1545		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1546			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1547			    (caddr_t)cl1.ch_err_tl1_tpc);
1548		}
1549#endif	/* CPU_IMP_L1_CACHE_PARITY */
1550
1551		/*
1552		 * Log "multiple events" in ch_err_tl1_data.  Note that
1553		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1554		 * if the structure is busy, we just do the cache flushing
1555		 * we have to do and then do the retry.  So the AFSR/AFAR
1556		 * at this point *should* have some relevant info.  If there
1557		 * are no valid errors in the AFSR, we'll assume they've
1558		 * already been picked up and logged.  For I$/D$ parity,
1559		 * we just log an event with an "Unknown" (NULL) TPC.
1560		 */
1561		if (me_flags & CH_ERR_FECC) {
1562			ch_cpu_errors_t cpu_error_regs;
1563			uint64_t t_afsr_errs;
1564
1565			/*
1566			 * Get the error registers and see if there's
1567			 * a pending error.  If not, don't bother
1568			 * generating an "Invalid AFSR" error event.
1569			 */
1570			get_cpu_error_state(&cpu_error_regs);
1571			t_afsr_errs = (cpu_error_regs.afsr_ext &
1572			    C_AFSR_EXT_ALL_ERRS) |
1573			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1574			if (t_afsr_errs != 0) {
1575				ceen = get_error_enable() & EN_REG_CEEN;
1576				nceen = get_error_enable() & EN_REG_NCEEN;
1577				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1578				    1, ceen, nceen, NULL);
1579			}
1580		}
1581#if defined(CPU_IMP_L1_CACHE_PARITY)
1582		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1583			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1584		}
1585#endif	/* CPU_IMP_L1_CACHE_PARITY */
1586	}
1587}
1588
1589/*
1590 * Called from Fast ECC TL>0 handler in case of fatal error.
1591 * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1592 * but if we don't, we'll panic with something reasonable.
1593 */
1594/*ARGSUSED*/
1595void
1596cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1597{
1598	cpu_tl1_error(rp, 1);
1599	/*
1600	 * Should never return, but just in case.
1601	 */
1602	fm_panic("Unsurvivable ECC Error at TL>0");
1603}
1604
1605/*
1606 * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1607 * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1608 * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1609 * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1610 *
1611 * Cheetah+ also handles (No additional processing required):
1612 *    DUE, DTO, DBERR	(NCEEN controlled)
1613 *    THCE		(CEEN and ET_ECC_en controlled)
1614 *    TUE		(ET_ECC_en controlled)
1615 *
1616 * Panther further adds:
1617 *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1618 *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1619 *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1620 *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1621 *    THCE			(CEEN and L2_tag_ECC_en controlled)
1622 *    L3_THCE			(CEEN and ET_ECC_en controlled)
1623 *
1624 * Note that the p_clo_flags input is only valid in cases where the
1625 * cpu_private struct is not yet initialized (since that is the only
1626 * time that information cannot be obtained from the logout struct.)
1627 */
1628/*ARGSUSED*/
1629void
1630cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1631{
1632	struct async_flt *aflt;
1633	ch_async_flt_t ch_flt;
1634	char pr_reason[MAX_REASON_STRING];
1635	ch_cpu_logout_t *clop;
1636	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1637	ch_cpu_errors_t cpu_error_regs;
1638
1639	bzero(&ch_flt, sizeof (ch_async_flt_t));
1640	/*
1641	 * Get the CPU log out info. If we can't find our CPU private
1642	 * pointer, then we will have to make due without any detailed
1643	 * logout information.
1644	 */
1645	if (CPU_PRIVATE(CPU) == NULL) {
1646		clop = NULL;
1647		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1648		get_cpu_error_state(&cpu_error_regs);
1649		set_cpu_error_state(&cpu_error_regs);
1650		t_afar = cpu_error_regs.afar;
1651		t_afsr = cpu_error_regs.afsr;
1652		t_afsr_ext = cpu_error_regs.afsr_ext;
1653#if defined(SERRANO)
1654		ch_flt.afar2 = cpu_error_regs.afar2;
1655#endif	/* SERRANO */
1656	} else {
1657		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1658		t_afar = clop->clo_data.chd_afar;
1659		t_afsr = clop->clo_data.chd_afsr;
1660		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1661#if defined(SERRANO)
1662		ch_flt.afar2 = clop->clo_data.chd_afar2;
1663#endif	/* SERRANO */
1664	}
1665
1666	/*
1667	 * In order to simplify code, we maintain this afsr_errs
1668	 * variable which holds the aggregate of AFSR and AFSR_EXT
1669	 * sticky bits.
1670	 */
1671	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1672	    (t_afsr & C_AFSR_ALL_ERRS);
1673
1674	pr_reason[0] = '\0';
1675	/* Setup the async fault structure */
1676	aflt = (struct async_flt *)&ch_flt;
1677	ch_flt.afsr_ext = t_afsr_ext;
1678	ch_flt.afsr_errs = t_afsr_errs;
1679	aflt->flt_stat = t_afsr;
1680	aflt->flt_addr = t_afar;
1681	aflt->flt_pc = (caddr_t)rp->r_pc;
1682	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1683	aflt->flt_tl = 0;
1684	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1685
1686	/*
1687	 * If this trap is a result of one of the errors not masked
1688	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1689	 * indicate that a timeout is to be set later.
1690	 */
1691	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1692	    !aflt->flt_panic)
1693		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1694	else
1695		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1696
1697	/*
1698	 * log the CE and clean up
1699	 */
1700	cpu_log_and_clear_ce(&ch_flt);
1701
1702	/*
1703	 * We re-enable CEEN (if required) and check if any disrupting errors
1704	 * have happened.  We do this because if a disrupting error had occurred
1705	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1706	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1707	 * we enable CEEN *before* checking the AFSR to avoid the small window
1708	 * of a error happening between checking the AFSR and enabling CEEN.
1709	 */
1710	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1711		set_error_enable(get_error_enable() | EN_REG_CEEN);
1712	if (clear_errors(&ch_flt)) {
1713		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1714		    NULL);
1715	}
1716
1717	/*
1718	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1719	 * be logged as part of the panic flow.
1720	 */
1721	if (aflt->flt_panic)
1722		fm_panic("%sError(s)", pr_reason);
1723}
1724
1725/*
1726 * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1727 * L3_EDU:BLD, TO, and BERR events.
1728 * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1729 *
1730 * Cheetah+: No additional errors handled.
1731 *
1732 * Note that the p_clo_flags input is only valid in cases where the
1733 * cpu_private struct is not yet initialized (since that is the only
1734 * time that information cannot be obtained from the logout struct.)
1735 */
1736/*ARGSUSED*/
1737void
1738cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1739{
1740	ushort_t ttype, tl;
1741	ch_async_flt_t ch_flt;
1742	struct async_flt *aflt;
1743	int trampolined = 0;
1744	char pr_reason[MAX_REASON_STRING];
1745	ch_cpu_logout_t *clop;
1746	uint64_t ceen, clo_flags;
1747	uint64_t log_afsr;
1748	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1749	ch_cpu_errors_t cpu_error_regs;
1750	int expected = DDI_FM_ERR_UNEXPECTED;
1751	ddi_acc_hdl_t *hp;
1752
1753	/*
1754	 * We need to look at p_flag to determine if the thread detected an
1755	 * error while dumping core.  We can't grab p_lock here, but it's ok
1756	 * because we just need a consistent snapshot and we know that everyone
1757	 * else will store a consistent set of bits while holding p_lock.  We
1758	 * don't have to worry about a race because SDOCORE is set once prior
1759	 * to doing i/o from the process's address space and is never cleared.
1760	 */
1761	uint_t pflag = ttoproc(curthread)->p_flag;
1762
1763	bzero(&ch_flt, sizeof (ch_async_flt_t));
1764	/*
1765	 * Get the CPU log out info. If we can't find our CPU private
1766	 * pointer then we will have to make due without any detailed
1767	 * logout information.
1768	 */
1769	if (CPU_PRIVATE(CPU) == NULL) {
1770		clop = NULL;
1771		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1772		get_cpu_error_state(&cpu_error_regs);
1773		set_cpu_error_state(&cpu_error_regs);
1774		t_afar = cpu_error_regs.afar;
1775		t_afsr = cpu_error_regs.afsr;
1776		t_afsr_ext = cpu_error_regs.afsr_ext;
1777#if defined(SERRANO)
1778		ch_flt.afar2 = cpu_error_regs.afar2;
1779#endif	/* SERRANO */
1780		clo_flags = p_clo_flags;
1781	} else {
1782		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1783		t_afar = clop->clo_data.chd_afar;
1784		t_afsr = clop->clo_data.chd_afsr;
1785		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1786#if defined(SERRANO)
1787		ch_flt.afar2 = clop->clo_data.chd_afar2;
1788#endif	/* SERRANO */
1789		clo_flags = clop->clo_flags;
1790	}
1791
1792	/*
1793	 * In order to simplify code, we maintain this afsr_errs
1794	 * variable which holds the aggregate of AFSR and AFSR_EXT
1795	 * sticky bits.
1796	 */
1797	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1798	    (t_afsr & C_AFSR_ALL_ERRS);
1799	pr_reason[0] = '\0';
1800
1801	/*
1802	 * Grab information encoded into our clo_flags field.
1803	 */
1804	ceen = clo_flags & EN_REG_CEEN;
1805	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1806	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1807
1808	/*
1809	 * handle the specific error
1810	 */
1811	aflt = (struct async_flt *)&ch_flt;
1812	aflt->flt_id = gethrtime_waitfree();
1813	aflt->flt_bus_id = getprocessorid();
1814	aflt->flt_inst = CPU->cpu_id;
1815	ch_flt.afsr_ext = t_afsr_ext;
1816	ch_flt.afsr_errs = t_afsr_errs;
1817	aflt->flt_stat = t_afsr;
1818	aflt->flt_addr = t_afar;
1819	aflt->flt_pc = (caddr_t)rp->r_pc;
1820	aflt->flt_prot = AFLT_PROT_NONE;
1821	aflt->flt_class = CPU_FAULT;
1822	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1823	aflt->flt_tl = (uchar_t)tl;
1824	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1825	    C_AFSR_PANIC(t_afsr_errs));
1826	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1827	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1828
1829	/*
1830	 * If the trap occurred in privileged mode at TL=0, we need to check to
1831	 * see if we were executing in the kernel under on_trap() or t_lofault
1832	 * protection.  If so, modify the saved registers so that we return
1833	 * from the trap to the appropriate trampoline routine.
1834	 */
1835	if (aflt->flt_priv && tl == 0) {
1836		if (curthread->t_ontrap != NULL) {
1837			on_trap_data_t *otp = curthread->t_ontrap;
1838
1839			if (otp->ot_prot & OT_DATA_EC) {
1840				aflt->flt_prot = AFLT_PROT_EC;
1841				otp->ot_trap |= OT_DATA_EC;
1842				rp->r_pc = otp->ot_trampoline;
1843				rp->r_npc = rp->r_pc + 4;
1844				trampolined = 1;
1845			}
1846
1847			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1848			    (otp->ot_prot & OT_DATA_ACCESS)) {
1849				aflt->flt_prot = AFLT_PROT_ACCESS;
1850				otp->ot_trap |= OT_DATA_ACCESS;
1851				rp->r_pc = otp->ot_trampoline;
1852				rp->r_npc = rp->r_pc + 4;
1853				trampolined = 1;
1854				/*
1855				 * for peeks and caut_gets errors are expected
1856				 */
1857				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1858				if (!hp)
1859					expected = DDI_FM_ERR_PEEK;
1860				else if (hp->ah_acc.devacc_attr_access ==
1861				    DDI_CAUTIOUS_ACC)
1862					expected = DDI_FM_ERR_EXPECTED;
1863			}
1864
1865		} else if (curthread->t_lofault) {
1866			aflt->flt_prot = AFLT_PROT_COPY;
1867			rp->r_g1 = EFAULT;
1868			rp->r_pc = curthread->t_lofault;
1869			rp->r_npc = rp->r_pc + 4;
1870			trampolined = 1;
1871		}
1872	}
1873
1874	/*
1875	 * If we're in user mode or we're doing a protected copy, we either
1876	 * want the ASTON code below to send a signal to the user process
1877	 * or we want to panic if aft_panic is set.
1878	 *
1879	 * If we're in privileged mode and we're not doing a copy, then we
1880	 * need to check if we've trampolined.  If we haven't trampolined,
1881	 * we should panic.
1882	 */
1883	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1884		if (t_afsr_errs &
1885		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1886		    ~(C_AFSR_BERR | C_AFSR_TO)))
1887			aflt->flt_panic |= aft_panic;
1888	} else if (!trampolined) {
1889			aflt->flt_panic = 1;
1890	}
1891
1892	/*
1893	 * If we've trampolined due to a privileged TO or BERR, or if an
1894	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1895	 * event for that TO or BERR.  Queue all other events (if any) besides
1896	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1897	 * ignore the number of events queued.  If we haven't trampolined due
1898	 * to a TO or BERR, just enqueue events normally.
1899	 */
1900	log_afsr = t_afsr_errs;
1901	if (trampolined) {
1902		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1903	} else if (!aflt->flt_priv) {
1904		/*
1905		 * User mode, suppress messages if
1906		 * cpu_berr_to_verbose is not set.
1907		 */
1908		if (!cpu_berr_to_verbose)
1909			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1910	}
1911
1912	/*
1913	 * Log any errors that occurred
1914	 */
1915	if (((log_afsr &
1916	    ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1917	    cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1918	    (t_afsr_errs & (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1919		ch_flt.flt_type = CPU_INV_AFSR;
1920		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1921		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1922		    aflt->flt_panic);
1923	}
1924
1925	/*
1926	 * Zero out + invalidate CPU logout.
1927	 */
1928	if (clop) {
1929		bzero(clop, sizeof (ch_cpu_logout_t));
1930		clop->clo_data.chd_afar = LOGOUT_INVALID;
1931	}
1932
1933#if defined(JALAPENO) || defined(SERRANO)
1934	/*
1935	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1936	 * IO errors that may have resulted in this trap.
1937	 */
1938	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1939		cpu_run_bus_error_handlers(aflt, expected);
1940	}
1941
1942	/*
1943	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1944	 * line from the Ecache.  We also need to query the bus nexus for
1945	 * fatal errors.  Attempts to do diagnostic read on caches may
1946	 * introduce more errors (especially when the module is bad).
1947	 */
1948	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1949		/*
1950		 * Ask our bus nexus friends if they have any fatal errors.  If
1951		 * so, they will log appropriate error messages.
1952		 */
1953		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1954			aflt->flt_panic = 1;
1955
1956		/*
1957		 * We got a UE or RUE and are panicking, save the fault PA in
1958		 * a known location so that the platform specific panic code
1959		 * can check for copyback errors.
1960		 */
1961		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1962			panic_aflt = *aflt;
1963		}
1964	}
1965
1966	/*
1967	 * Flush Ecache line or entire Ecache
1968	 */
1969	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1970		cpu_error_ecache_flush(&ch_flt);
1971#else /* JALAPENO || SERRANO */
1972	/*
1973	 * UE/BERR/TO: Call our bus nexus friends to check for
1974	 * IO errors that may have resulted in this trap.
1975	 */
1976	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1977		cpu_run_bus_error_handlers(aflt, expected);
1978	}
1979
1980	/*
1981	 * UE: If the UE is in memory, we need to flush the bad
1982	 * line from the Ecache.  We also need to query the bus nexus for
1983	 * fatal errors.  Attempts to do diagnostic read on caches may
1984	 * introduce more errors (especially when the module is bad).
1985	 */
1986	if (t_afsr & C_AFSR_UE) {
1987		/*
1988		 * Ask our legacy bus nexus friends if they have any fatal
1989		 * errors.  If so, they will log appropriate error messages.
1990		 */
1991		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1992			aflt->flt_panic = 1;
1993
1994		/*
1995		 * We got a UE and are panicking, save the fault PA in a known
1996		 * location so that the platform specific panic code can check
1997		 * for copyback errors.
1998		 */
1999		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
2000			panic_aflt = *aflt;
2001		}
2002	}
2003
2004	/*
2005	 * Flush Ecache line or entire Ecache
2006	 */
2007	if (t_afsr_errs &
2008	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
2009		cpu_error_ecache_flush(&ch_flt);
2010#endif /* JALAPENO || SERRANO */
2011
2012	/*
2013	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
2014	 * or disrupting errors have happened.  We do this because if a
2015	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
2016	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
2017	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
2018	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
2019	 * deferred or disrupting error happening between checking the AFSR and
2020	 * enabling NCEEN/CEEN.
2021	 *
2022	 * Note: CEEN reenabled only if it was on when trap taken.
2023	 */
2024	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
2025	if (clear_errors(&ch_flt)) {
2026		/*
2027		 * Check for secondary errors, and avoid panicking if we
2028		 * have them
2029		 */
2030		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
2031		    t_afar) == 0) {
2032			aflt->flt_panic |= ((ch_flt.afsr_errs &
2033			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
2034		}
2035		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
2036		    NULL);
2037	}
2038
2039	/*
2040	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2041	 * be logged as part of the panic flow.
2042	 */
2043	if (aflt->flt_panic)
2044		fm_panic("%sError(s)", pr_reason);
2045
2046	/*
2047	 * If we queued an error and we are going to return from the trap and
2048	 * the error was in user mode or inside of a copy routine, set AST flag
2049	 * so the queue will be drained before returning to user mode.  The
2050	 * AST processing will also act on our failure policy.
2051	 */
2052	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2053		int pcb_flag = 0;
2054
2055		if (t_afsr_errs &
2056		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
2057		    ~(C_AFSR_BERR | C_AFSR_TO)))
2058			pcb_flag |= ASYNC_HWERR;
2059
2060		if (t_afsr & C_AFSR_BERR)
2061			pcb_flag |= ASYNC_BERR;
2062
2063		if (t_afsr & C_AFSR_TO)
2064			pcb_flag |= ASYNC_BTO;
2065
2066		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2067		aston(curthread);
2068	}
2069}
2070
2071#if defined(CPU_IMP_L1_CACHE_PARITY)
2072/*
2073 * Handling of data and instruction parity errors (traps 0x71, 0x72).
2074 *
2075 * For Panther, P$ data parity errors during floating point load hits
2076 * are also detected (reported as TT 0x71) and handled by this trap
2077 * handler.
2078 *
2079 * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
2080 * is available.
2081 */
2082/*ARGSUSED*/
2083void
2084cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
2085{
2086	ch_async_flt_t ch_flt;
2087	struct async_flt *aflt;
2088	uchar_t tl = ((flags & CH_ERR_TL) != 0);
2089	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
2090	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
2091	char *error_class;
2092	int index, way, word;
2093	ch_dc_data_t tmp_dcp;
2094	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
2095	uint64_t parity_bits, pbits;
2096	/* The parity bit array corresponds to the result of summing two bits */
2097	static int parity_bits_popc[] = { 0, 1, 1, 0 };
2098
2099	/*
2100	 * Log the error.
2101	 * For icache parity errors the fault address is the trap PC.
2102	 * For dcache/pcache parity errors the instruction would have to
2103	 * be decoded to determine the address and that isn't possible
2104	 * at high PIL.
2105	 */
2106	bzero(&ch_flt, sizeof (ch_async_flt_t));
2107	aflt = (struct async_flt *)&ch_flt;
2108	aflt->flt_id = gethrtime_waitfree();
2109	aflt->flt_bus_id = getprocessorid();
2110	aflt->flt_inst = CPU->cpu_id;
2111	aflt->flt_pc = tpc;
2112	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
2113	aflt->flt_prot = AFLT_PROT_NONE;
2114	aflt->flt_class = CPU_FAULT;
2115	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
2116	aflt->flt_tl = tl;
2117	aflt->flt_panic = panic;
2118	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
2119	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
2120
2121	if (iparity) {
2122		cpu_icache_parity_info(&ch_flt);
2123		if (ch_flt.parity_data.ipe.cpl_off != -1)
2124			error_class = FM_EREPORT_CPU_USIII_IDSPE;
2125		else if (ch_flt.parity_data.ipe.cpl_way != -1)
2126			error_class = FM_EREPORT_CPU_USIII_ITSPE;
2127		else
2128			error_class = FM_EREPORT_CPU_USIII_IPE;
2129		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2130	} else {
2131		cpu_dcache_parity_info(&ch_flt);
2132		if (ch_flt.parity_data.dpe.cpl_off != -1) {
2133			/*
2134			 * If not at TL 0 and running on a Jalapeno processor,
2135			 * then process as a true ddspe.  A true
2136			 * ddspe error can only occur if the way == 0
2137			 */
2138			way = ch_flt.parity_data.dpe.cpl_way;
2139			if ((tl == 0) && (way != 0) &&
2140			    IS_JALAPENO(cpunodes[CPU->cpu_id].implementation)) {
2141				for (index = 0; index < dc_set_size;
2142				    index += dcache_linesize) {
2143					get_dcache_dtag(index + way *
2144					    dc_set_size,
2145					    (uint64_t *)&tmp_dcp);
2146					/*
2147					 * Check data array for even parity.
2148					 * The 8 parity bits are grouped into
2149					 * 4 pairs each of which covers a 64-bit
2150					 * word.  The endianness is reversed
2151					 * -- the low-order parity bits cover
2152					 *  the high-order data words.
2153					 */
2154					parity_bits = tmp_dcp.dc_utag >> 8;
2155					for (word = 0; word < 4; word++) {
2156						pbits = (parity_bits >>
2157						    (6 - word * 2)) & 3;
2158						if (((popc64(
2159						    tmp_dcp.dc_data[word]) +
2160						    parity_bits_popc[pbits]) &
2161						    1) && (tmp_dcp.dc_tag &
2162						    VA13)) {
2163							/* cleanup */
2164							correct_dcache_parity(
2165							    dcache_size,
2166							    dcache_linesize);
2167							if (cache_boot_state &
2168							    DCU_DC) {
2169								flush_dcache();
2170							}
2171
2172							set_dcu(get_dcu() |
2173							    cache_boot_state);
2174							return;
2175						}
2176					}
2177				}
2178			} /* (tl == 0) && (way != 0) && IS JALAPENO */
2179			error_class = FM_EREPORT_CPU_USIII_DDSPE;
2180		} else if (ch_flt.parity_data.dpe.cpl_way != -1)
2181			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2182		else
2183			error_class = FM_EREPORT_CPU_USIII_DPE;
2184		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2185		/*
2186		 * For panther we also need to check the P$ for parity errors.
2187		 */
2188		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2189			cpu_pcache_parity_info(&ch_flt);
2190			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2191				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2192				aflt->flt_payload =
2193				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2194			}
2195		}
2196	}
2197
2198	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2199	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2200
2201	if (iparity) {
2202		/*
2203		 * Invalidate entire I$.
2204		 * This is required due to the use of diagnostic ASI
2205		 * accesses that may result in a loss of I$ coherency.
2206		 */
2207		if (cache_boot_state & DCU_IC) {
2208			flush_icache();
2209		}
2210		/*
2211		 * According to section P.3.1 of the Panther PRM, we
2212		 * need to do a little more for recovery on those
2213		 * CPUs after encountering an I$ parity error.
2214		 */
2215		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2216			flush_ipb();
2217			correct_dcache_parity(dcache_size,
2218			    dcache_linesize);
2219			flush_pcache();
2220		}
2221	} else {
2222		/*
2223		 * Since the valid bit is ignored when checking parity the
2224		 * D$ data and tag must also be corrected.  Set D$ data bits
2225		 * to zero and set utag to 0, 1, 2, 3.
2226		 */
2227		correct_dcache_parity(dcache_size, dcache_linesize);
2228
2229		/*
2230		 * According to section P.3.3 of the Panther PRM, we
2231		 * need to do a little more for recovery on those
2232		 * CPUs after encountering a D$ or P$ parity error.
2233		 *
2234		 * As far as clearing P$ parity errors, it is enough to
2235		 * simply invalidate all entries in the P$ since P$ parity
2236		 * error traps are only generated for floating point load
2237		 * hits.
2238		 */
2239		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2240			flush_icache();
2241			flush_ipb();
2242			flush_pcache();
2243		}
2244	}
2245
2246	/*
2247	 * Invalidate entire D$ if it was enabled.
2248	 * This is done to avoid stale data in the D$ which might
2249	 * occur with the D$ disabled and the trap handler doing
2250	 * stores affecting lines already in the D$.
2251	 */
2252	if (cache_boot_state & DCU_DC) {
2253		flush_dcache();
2254	}
2255
2256	/*
2257	 * Restore caches to their bootup state.
2258	 */
2259	set_dcu(get_dcu() | cache_boot_state);
2260
2261	/*
2262	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2263	 * be logged as part of the panic flow.
2264	 */
2265	if (aflt->flt_panic)
2266		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2267
2268	/*
2269	 * If this error occurred at TL>0 then flush the E$ here to reduce
2270	 * the chance of getting an unrecoverable Fast ECC error.  This
2271	 * flush will evict the part of the parity trap handler that is run
2272	 * at TL>1.
2273	 */
2274	if (tl) {
2275		cpu_flush_ecache();
2276	}
2277}
2278
2279/*
2280 * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2281 * to indicate which portions of the captured data should be in the ereport.
2282 */
2283void
2284cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2285{
2286	int way = ch_flt->parity_data.ipe.cpl_way;
2287	int offset = ch_flt->parity_data.ipe.cpl_off;
2288	int tag_index;
2289	struct async_flt *aflt = (struct async_flt *)ch_flt;
2290
2291
2292	if ((offset != -1) || (way != -1)) {
2293		/*
2294		 * Parity error in I$ tag or data
2295		 */
2296		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2297		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2298			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2299			    PN_ICIDX_TO_WAY(tag_index);
2300		else
2301			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2302			    CH_ICIDX_TO_WAY(tag_index);
2303		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2304		    IC_LOGFLAG_MAGIC;
2305	} else {
2306		/*
2307		 * Parity error was not identified.
2308		 * Log tags and data for all ways.
2309		 */
2310		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2311			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2312			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2313				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2314				    PN_ICIDX_TO_WAY(tag_index);
2315			else
2316				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2317				    CH_ICIDX_TO_WAY(tag_index);
2318			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2319			    IC_LOGFLAG_MAGIC;
2320		}
2321	}
2322}
2323
2324/*
2325 * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2326 * to indicate which portions of the captured data should be in the ereport.
2327 */
2328void
2329cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2330{
2331	int way = ch_flt->parity_data.dpe.cpl_way;
2332	int offset = ch_flt->parity_data.dpe.cpl_off;
2333	int tag_index;
2334
2335	if (offset != -1) {
2336		/*
2337		 * Parity error in D$ or P$ data array.
2338		 *
2339		 * First check to see whether the parity error is in D$ or P$
2340		 * since P$ data parity errors are reported in Panther using
2341		 * the same trap.
2342		 */
2343		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2344			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2345			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2346			    CH_PCIDX_TO_WAY(tag_index);
2347			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2348			    PC_LOGFLAG_MAGIC;
2349		} else {
2350			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2351			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2352			    CH_DCIDX_TO_WAY(tag_index);
2353			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2354			    DC_LOGFLAG_MAGIC;
2355		}
2356	} else if (way != -1) {
2357		/*
2358		 * Parity error in D$ tag.
2359		 */
2360		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2361		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2362		    CH_DCIDX_TO_WAY(tag_index);
2363		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2364		    DC_LOGFLAG_MAGIC;
2365	}
2366}
2367#endif	/* CPU_IMP_L1_CACHE_PARITY */
2368
2369/*
2370 * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2371 * post-process CPU events that are dequeued.  As such, it can be invoked
2372 * from softint context, from AST processing in the trap() flow, or from the
2373 * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2374 * Historically this entry point was used to log the actual cmn_err(9F) text;
2375 * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2376 * With FMA this function now also returns a flag which indicates to the
2377 * caller whether the ereport should be posted (1) or suppressed (0).
2378 */
2379static int
2380cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2381{
2382	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2383	struct async_flt *aflt = (struct async_flt *)flt;
2384	uint64_t errors;
2385	extern void memscrub_induced_error(void);
2386
2387	switch (ch_flt->flt_type) {
2388	case CPU_INV_AFSR:
2389		/*
2390		 * If it is a disrupting trap and the AFSR is zero, then
2391		 * the event has probably already been noted. Do not post
2392		 * an ereport.
2393		 */
2394		if ((aflt->flt_status & ECC_C_TRAP) &&
2395		    (!(aflt->flt_stat & C_AFSR_MASK)))
2396			return (0);
2397		else
2398			return (1);
2399	case CPU_TO:
2400	case CPU_BERR:
2401	case CPU_FATAL:
2402	case CPU_FPUERR:
2403		return (1);
2404
2405	case CPU_UE_ECACHE_RETIRE:
2406		cpu_log_err(aflt);
2407		cpu_page_retire(ch_flt);
2408		return (1);
2409
2410	/*
2411	 * Cases where we may want to suppress logging or perform
2412	 * extended diagnostics.
2413	 */
2414	case CPU_CE:
2415	case CPU_EMC:
2416		/*
2417		 * We want to skip logging and further classification
2418		 * only if ALL the following conditions are true:
2419		 *
2420		 *	1. There is only one error
2421		 *	2. That error is a correctable memory error
2422		 *	3. The error is caused by the memory scrubber (in
2423		 *	   which case the error will have occurred under
2424		 *	   on_trap protection)
2425		 *	4. The error is on a retired page
2426		 *
2427		 * Note: AFLT_PROT_EC is used places other than the memory
2428		 * scrubber.  However, none of those errors should occur
2429		 * on a retired page.
2430		 */
2431		if ((ch_flt->afsr_errs &
2432		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2433		    aflt->flt_prot == AFLT_PROT_EC) {
2434
2435			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2436				if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2437
2438				/*
2439				 * Since we're skipping logging, we'll need
2440				 * to schedule the re-enabling of CEEN
2441				 */
2442				(void) timeout(cpu_delayed_check_ce_errors,
2443				    (void *)(uintptr_t)aflt->flt_inst,
2444				    drv_usectohz((clock_t)cpu_ceen_delay_secs
2445				    * MICROSEC));
2446				}
2447
2448				/*
2449				 * Inform memscrubber - scrubbing induced
2450				 * CE on a retired page.
2451				 */
2452				memscrub_induced_error();
2453				return (0);
2454			}
2455		}
2456
2457		/*
2458		 * Perform/schedule further classification actions, but
2459		 * only if the page is healthy (we don't want bad
2460		 * pages inducing too much diagnostic activity).  If we could
2461		 * not find a page pointer then we also skip this.  If
2462		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2463		 * to copy and recirculate the event (for further diagnostics)
2464		 * and we should not proceed to log it here.
2465		 *
2466		 * This must be the last step here before the cpu_log_err()
2467		 * below - if an event recirculates cpu_ce_log_err() will
2468		 * not call the current function but just proceed directly
2469		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2470		 *
2471		 * Note: Check cpu_impl_async_log_err if changing this
2472		 */
2473		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2474			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2475			    CE_XDIAG_SKIP_NOPP);
2476		} else {
2477			if (errors != PR_OK) {
2478				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2479				    CE_XDIAG_SKIP_PAGEDET);
2480			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2481			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2482				return (0);
2483			}
2484		}
2485		/*FALLTHRU*/
2486
2487	/*
2488	 * Cases where we just want to report the error and continue.
2489	 */
2490	case CPU_CE_ECACHE:
2491	case CPU_UE_ECACHE:
2492	case CPU_IV:
2493	case CPU_ORPH:
2494		cpu_log_err(aflt);
2495		return (1);
2496
2497	/*
2498	 * Cases where we want to fall through to handle panicking.
2499	 */
2500	case CPU_UE:
2501		/*
2502		 * We want to skip logging in the same conditions as the
2503		 * CE case.  In addition, we want to make sure we're not
2504		 * panicking.
2505		 */
2506		if (!panicstr && (ch_flt->afsr_errs &
2507		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2508		    aflt->flt_prot == AFLT_PROT_EC) {
2509			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2510				/* Zero the address to clear the error */
2511				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2512				/*
2513				 * Inform memscrubber - scrubbing induced
2514				 * UE on a retired page.
2515				 */
2516				memscrub_induced_error();
2517				return (0);
2518			}
2519		}
2520		cpu_log_err(aflt);
2521		break;
2522
2523	default:
2524		/*
2525		 * If the us3_common.c code doesn't know the flt_type, it may
2526		 * be an implementation-specific code.  Call into the impldep
2527		 * backend to find out what to do: if it tells us to continue,
2528		 * break and handle as if falling through from a UE; if not,
2529		 * the impldep backend has handled the error and we're done.
2530		 */
2531		switch (cpu_impl_async_log_err(flt, eqep)) {
2532		case CH_ASYNC_LOG_DONE:
2533			return (1);
2534		case CH_ASYNC_LOG_RECIRC:
2535			return (0);
2536		case CH_ASYNC_LOG_CONTINUE:
2537			break; /* continue on to handle UE-like error */
2538		default:
2539			cmn_err(CE_WARN, "discarding error 0x%p with "
2540			    "invalid fault type (0x%x)",
2541			    (void *)aflt, ch_flt->flt_type);
2542			return (0);
2543		}
2544	}
2545
2546	/* ... fall through from the UE case */
2547
2548	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2549		if (!panicstr) {
2550			cpu_page_retire(ch_flt);
2551		} else {
2552			/*
2553			 * Clear UEs on panic so that we don't
2554			 * get haunted by them during panic or
2555			 * after reboot
2556			 */
2557			cpu_clearphys(aflt);
2558			(void) clear_errors(NULL);
2559		}
2560	}
2561
2562	return (1);
2563}
2564
2565/*
2566 * Retire the bad page that may contain the flushed error.
2567 */
2568void
2569cpu_page_retire(ch_async_flt_t *ch_flt)
2570{
2571	struct async_flt *aflt = (struct async_flt *)ch_flt;
2572	(void) page_retire(aflt->flt_addr, PR_UE);
2573}
2574
2575/*
2576 * Return true if the error specified in the AFSR indicates
2577 * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$
2578 * for Panther, none for Jalapeno/Serrano).
2579 */
2580/* ARGSUSED */
2581static int
2582cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr)
2583{
2584#if defined(JALAPENO) || defined(SERRANO)
2585	return (0);
2586#elif defined(CHEETAH_PLUS)
2587	if (IS_PANTHER(cpunodes[cpuid].implementation))
2588		return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0);
2589	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2590#else	/* CHEETAH_PLUS */
2591	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2592#endif
2593}
2594
2595/*
2596 * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2597 * generic event post-processing for correctable and uncorrectable memory,
2598 * E$, and MTag errors.  Historically this entry point was used to log bits of
2599 * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2600 * converted into an ereport.  In addition, it transmits the error to any
2601 * platform-specific service-processor FRU logging routines, if available.
2602 */
2603void
2604cpu_log_err(struct async_flt *aflt)
2605{
2606	char unum[UNUM_NAMLEN];
2607	int synd_status, synd_code, afar_status;
2608	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2609
2610	if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit))
2611		aflt->flt_status |= ECC_ECACHE;
2612	else
2613		aflt->flt_status &= ~ECC_ECACHE;
2614	/*
2615	 * Determine syndrome status.
2616	 */
2617	synd_status = afsr_to_synd_status(aflt->flt_inst,
2618	    ch_flt->afsr_errs, ch_flt->flt_bit);
2619
2620	/*
2621	 * Determine afar status.
2622	 */
2623	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2624		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2625		    ch_flt->flt_bit);
2626	else
2627		afar_status = AFLT_STAT_INVALID;
2628
2629	synd_code = synd_to_synd_code(synd_status,
2630	    aflt->flt_synd, ch_flt->flt_bit);
2631
2632	/*
2633	 * If afar status is not invalid do a unum lookup.
2634	 */
2635	if (afar_status != AFLT_STAT_INVALID) {
2636		(void) cpu_get_mem_unum_synd(synd_code, aflt, unum);
2637	} else {
2638		unum[0] = '\0';
2639	}
2640
2641	/*
2642	 * Do not send the fruid message (plat_ecc_error_data_t)
2643	 * to the SC if it can handle the enhanced error information
2644	 * (plat_ecc_error2_data_t) or when the tunable
2645	 * ecc_log_fruid_enable is set to 0.
2646	 */
2647
2648	if (&plat_ecc_capability_sc_get &&
2649	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2650		if (&plat_log_fruid_error)
2651			plat_log_fruid_error(synd_code, aflt, unum,
2652			    ch_flt->flt_bit);
2653	}
2654
2655	if (aflt->flt_func != NULL)
2656		aflt->flt_func(aflt, unum);
2657
2658	if (afar_status != AFLT_STAT_INVALID)
2659		cpu_log_diag_info(ch_flt);
2660
2661	/*
2662	 * If we have a CEEN error , we do not reenable CEEN until after
2663	 * we exit the trap handler. Otherwise, another error may
2664	 * occur causing the handler to be entered recursively.
2665	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2666	 * to try and ensure that the CPU makes progress in the face
2667	 * of a CE storm.
2668	 */
2669	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2670		(void) timeout(cpu_delayed_check_ce_errors,
2671		    (void *)(uintptr_t)aflt->flt_inst,
2672		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2673	}
2674}
2675
2676/*
2677 * Invoked by error_init() early in startup and therefore before
2678 * startup_errorq() is called to drain any error Q -
2679 *
2680 * startup()
2681 *   startup_end()
2682 *     error_init()
2683 *       cpu_error_init()
2684 * errorq_init()
2685 *   errorq_drain()
2686 * start_other_cpus()
2687 *
2688 * The purpose of this routine is to create error-related taskqs.  Taskqs
2689 * are used for this purpose because cpu_lock can't be grabbed from interrupt
2690 * context.
2691 */
2692void
2693cpu_error_init(int items)
2694{
2695	/*
2696	 * Create taskq(s) to reenable CE
2697	 */
2698	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2699	    items, items, TASKQ_PREPOPULATE);
2700}
2701
2702void
2703cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2704{
2705	char unum[UNUM_NAMLEN];
2706	int len;
2707
2708	switch (aflt->flt_class) {
2709	case CPU_FAULT:
2710		cpu_ereport_init(aflt);
2711		if (cpu_async_log_err(aflt, eqep))
2712			cpu_ereport_post(aflt);
2713		break;
2714
2715	case BUS_FAULT:
2716		if (aflt->flt_func != NULL) {
2717			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2718			    unum, UNUM_NAMLEN, &len);
2719			aflt->flt_func(aflt, unum);
2720		}
2721		break;
2722
2723	case RECIRC_CPU_FAULT:
2724		aflt->flt_class = CPU_FAULT;
2725		cpu_log_err(aflt);
2726		cpu_ereport_post(aflt);
2727		break;
2728
2729	case RECIRC_BUS_FAULT:
2730		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2731		/*FALLTHRU*/
2732	default:
2733		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2734		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2735		return;
2736	}
2737}
2738
2739/*
2740 * Scrub and classify a CE.  This function must not modify the
2741 * fault structure passed to it but instead should return the classification
2742 * information.
2743 */
2744
2745static uchar_t
2746cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2747{
2748	uchar_t disp = CE_XDIAG_EXTALG;
2749	on_trap_data_t otd;
2750	uint64_t orig_err;
2751	ch_cpu_logout_t *clop;
2752
2753	/*
2754	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2755	 * this, but our other callers have not.  Disable preemption to
2756	 * avoid CPU migration so that we restore CEEN on the correct
2757	 * cpu later.
2758	 *
2759	 * CEEN is cleared so that further CEs that our instruction and
2760	 * data footprint induce do not cause use to either creep down
2761	 * kernel stack to the point of overflow, or do so much CE
2762	 * notification as to make little real forward progress.
2763	 *
2764	 * NCEEN must not be cleared.  However it is possible that
2765	 * our accesses to the flt_addr may provoke a bus error or timeout
2766	 * if the offending address has just been unconfigured as part of
2767	 * a DR action.  So we must operate under on_trap protection.
2768	 */
2769	kpreempt_disable();
2770	orig_err = get_error_enable();
2771	if (orig_err & EN_REG_CEEN)
2772		set_error_enable(orig_err & ~EN_REG_CEEN);
2773
2774	/*
2775	 * Our classification algorithm includes the line state before
2776	 * the scrub; we'd like this captured after the detection and
2777	 * before the algorithm below - the earlier the better.
2778	 *
2779	 * If we've come from a cpu CE trap then this info already exists
2780	 * in the cpu logout area.
2781	 *
2782	 * For a CE detected by memscrub for which there was no trap
2783	 * (running with CEEN off) cpu_log_and_clear_ce has called
2784	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2785	 * marked the fault structure as incomplete as a flag to later
2786	 * logging code.
2787	 *
2788	 * If called directly from an IO detected CE there has been
2789	 * no line data capture.  In this case we logout to the cpu logout
2790	 * area - that's appropriate since it's the cpu cache data we need
2791	 * for classification.  We thus borrow the cpu logout area for a
2792	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2793	 * this time (we will invalidate it again below).
2794	 *
2795	 * If called from the partner check xcall handler then this cpu
2796	 * (the partner) has not necessarily experienced a CE at this
2797	 * address.  But we want to capture line state before its scrub
2798	 * attempt since we use that in our classification.
2799	 */
2800	if (logout_tried == B_FALSE) {
2801		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2802			disp |= CE_XDIAG_NOLOGOUT;
2803	}
2804
2805	/*
2806	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2807	 * no longer be valid (if DR'd since the initial event) so we
2808	 * perform this scrub under on_trap protection.  If this access is
2809	 * ok then further accesses below will also be ok - DR cannot
2810	 * proceed while this thread is active (preemption is disabled);
2811	 * to be safe we'll nonetheless use on_trap again below.
2812	 */
2813	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2814		cpu_scrubphys(ecc);
2815	} else {
2816		no_trap();
2817		if (orig_err & EN_REG_CEEN)
2818			set_error_enable(orig_err);
2819		kpreempt_enable();
2820		return (disp);
2821	}
2822	no_trap();
2823
2824	/*
2825	 * Did the casx read of the scrub log a CE that matches the AFAR?
2826	 * Note that it's quite possible that the read sourced the data from
2827	 * another cpu.
2828	 */
2829	if (clear_ecc(ecc))
2830		disp |= CE_XDIAG_CE1;
2831
2832	/*
2833	 * Read the data again.  This time the read is very likely to
2834	 * come from memory since the scrub induced a writeback to memory.
2835	 */
2836	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2837		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2838	} else {
2839		no_trap();
2840		if (orig_err & EN_REG_CEEN)
2841			set_error_enable(orig_err);
2842		kpreempt_enable();
2843		return (disp);
2844	}
2845	no_trap();
2846
2847	/* Did that read induce a CE that matches the AFAR? */
2848	if (clear_ecc(ecc))
2849		disp |= CE_XDIAG_CE2;
2850
2851	/*
2852	 * Look at the logout information and record whether we found the
2853	 * line in l2/l3 cache.  For Panther we are interested in whether
2854	 * we found it in either cache (it won't reside in both but
2855	 * it is possible to read it that way given the moving target).
2856	 */
2857	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2858	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2859	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2860		int hit, level;
2861		int state;
2862		int totalsize;
2863		ch_ec_data_t *ecp;
2864
2865		/*
2866		 * If hit is nonzero then a match was found and hit will
2867		 * be one greater than the index which hit.  For Panther we
2868		 * also need to pay attention to level to see which of l2$ or
2869		 * l3$ it hit in.
2870		 */
2871		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2872		    0, &level);
2873
2874		if (hit) {
2875			--hit;
2876			disp |= CE_XDIAG_AFARMATCH;
2877
2878			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2879				if (level == 2)
2880					ecp = &clop->clo_data.chd_l2_data[hit];
2881				else
2882					ecp = &clop->clo_data.chd_ec_data[hit];
2883			} else {
2884				ASSERT(level == 2);
2885				ecp = &clop->clo_data.chd_ec_data[hit];
2886			}
2887			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2888			state = cpu_ectag_pa_to_subblk_state(totalsize,
2889			    ecc->flt_addr, ecp->ec_tag);
2890
2891			/*
2892			 * Cheetah variants use different state encodings -
2893			 * the CH_ECSTATE_* defines vary depending on the
2894			 * module we're compiled for.  Translate into our
2895			 * one true version.  Conflate Owner-Shared state
2896			 * of SSM mode with Owner as victimisation of such
2897			 * lines may cause a writeback.
2898			 */
2899			switch (state) {
2900			case CH_ECSTATE_MOD:
2901				disp |= EC_STATE_M;
2902				break;
2903
2904			case CH_ECSTATE_OWN:
2905			case CH_ECSTATE_OWS:
2906				disp |= EC_STATE_O;
2907				break;
2908
2909			case CH_ECSTATE_EXL:
2910				disp |= EC_STATE_E;
2911				break;
2912
2913			case CH_ECSTATE_SHR:
2914				disp |= EC_STATE_S;
2915				break;
2916
2917			default:
2918				disp |= EC_STATE_I;
2919				break;
2920			}
2921		}
2922
2923		/*
2924		 * If we initiated the delayed logout then we are responsible
2925		 * for invalidating the logout area.
2926		 */
2927		if (logout_tried == B_FALSE) {
2928			bzero(clop, sizeof (ch_cpu_logout_t));
2929			clop->clo_data.chd_afar = LOGOUT_INVALID;
2930		}
2931	}
2932
2933	/*
2934	 * Re-enable CEEN if we turned it off.
2935	 */
2936	if (orig_err & EN_REG_CEEN)
2937		set_error_enable(orig_err);
2938	kpreempt_enable();
2939
2940	return (disp);
2941}
2942
2943/*
2944 * Scrub a correctable memory error and collect data for classification
2945 * of CE type.  This function is called in the detection path, ie tl0 handling
2946 * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2947 */
2948void
2949cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2950{
2951	/*
2952	 * Cheetah CE classification does not set any bits in flt_status.
2953	 * Instead we will record classification datapoints in flt_disp.
2954	 */
2955	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2956
2957	/*
2958	 * To check if the error detected by IO is persistent, sticky or
2959	 * intermittent.  This is noticed by clear_ecc().
2960	 */
2961	if (ecc->flt_status & ECC_IOBUS)
2962		ecc->flt_stat = C_AFSR_MEMORY;
2963
2964	/*
2965	 * Record information from this first part of the algorithm in
2966	 * flt_disp.
2967	 */
2968	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2969}
2970
2971/*
2972 * Select a partner to perform a further CE classification check from.
2973 * Must be called with kernel preemption disabled (to stop the cpu list
2974 * from changing).  The detecting cpu we are partnering has cpuid
2975 * aflt->flt_inst; we might not be running on the detecting cpu.
2976 *
2977 * Restrict choice to active cpus in the same cpu partition as ourselves in
2978 * an effort to stop bad cpus in one partition causing other partitions to
2979 * perform excessive diagnostic activity.  Actually since the errorq drain
2980 * is run from a softint most of the time and that is a global mechanism
2981 * this isolation is only partial.  Return NULL if we fail to find a
2982 * suitable partner.
2983 *
2984 * We prefer a partner that is in a different latency group to ourselves as
2985 * we will share fewer datapaths.  If such a partner is unavailable then
2986 * choose one in the same lgroup but prefer a different chip and only allow
2987 * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2988 * flags includes PTNR_SELFOK then permit selection of the original detector.
2989 *
2990 * We keep a cache of the last partner selected for a cpu, and we'll try to
2991 * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2992 * have passed since that selection was made.  This provides the benefit
2993 * of the point-of-view of different partners over time but without
2994 * requiring frequent cpu list traversals.
2995 */
2996
2997#define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2998#define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2999
3000static cpu_t *
3001ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
3002{
3003	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
3004	hrtime_t lasttime, thistime;
3005
3006	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
3007
3008	dtcr = cpu[aflt->flt_inst];
3009
3010	/*
3011	 * Short-circuit for the following cases:
3012	 *	. the dtcr is not flagged active
3013	 *	. there is just one cpu present
3014	 *	. the detector has disappeared
3015	 *	. we were given a bad flt_inst cpuid; this should not happen
3016	 *	  (eg PCI code now fills flt_inst) but if it does it is no
3017	 *	  reason to panic.
3018	 *	. there is just one cpu left online in the cpu partition
3019	 *
3020	 * If we return NULL after this point then we do not update the
3021	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
3022	 * again next time; this is the case where the only other cpu online
3023	 * in the detector's partition is on the same chip as the detector
3024	 * and since CEEN re-enable is throttled even that case should not
3025	 * hurt performance.
3026	 */
3027	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
3028		return (NULL);
3029	}
3030	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
3031		if (flags & PTNR_SELFOK) {
3032			*typep = CE_XDIAG_PTNR_SELF;
3033			return (dtcr);
3034		} else {
3035			return (NULL);
3036		}
3037	}
3038
3039	thistime = gethrtime();
3040	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
3041
3042	/*
3043	 * Select a starting point.
3044	 */
3045	if (!lasttime) {
3046		/*
3047		 * We've never selected a partner for this detector before.
3048		 * Start the scan at the next online cpu in the same cpu
3049		 * partition.
3050		 */
3051		sp = dtcr->cpu_next_part;
3052	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
3053		/*
3054		 * Our last selection has not aged yet.  If this partner:
3055		 *	. is still a valid cpu,
3056		 *	. is still in the same partition as the detector
3057		 *	. is still marked active
3058		 *	. satisfies the 'flags' argument criteria
3059		 * then select it again without updating the timestamp.
3060		 */
3061		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3062		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
3063		    !cpu_flagged_active(sp->cpu_flags) ||
3064		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
3065		    (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) &&
3066		    !(flags & PTNR_SIBLINGOK))) {
3067			sp = dtcr->cpu_next_part;
3068		} else {
3069			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3070				*typep = CE_XDIAG_PTNR_REMOTE;
3071			} else if (sp == dtcr) {
3072				*typep = CE_XDIAG_PTNR_SELF;
3073			} else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) {
3074				*typep = CE_XDIAG_PTNR_SIBLING;
3075			} else {
3076				*typep = CE_XDIAG_PTNR_LOCAL;
3077			}
3078			return (sp);
3079		}
3080	} else {
3081		/*
3082		 * Our last selection has aged.  If it is nonetheless still a
3083		 * valid cpu then start the scan at the next cpu in the
3084		 * partition after our last partner.  If the last selection
3085		 * is no longer a valid cpu then go with our default.  In
3086		 * this way we slowly cycle through possible partners to
3087		 * obtain multiple viewpoints over time.
3088		 */
3089		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3090		if (sp == NULL) {
3091			sp = dtcr->cpu_next_part;
3092		} else {
3093			sp = sp->cpu_next_part;		/* may be dtcr */
3094			if (sp->cpu_part != dtcr->cpu_part)
3095				sp = dtcr;
3096		}
3097	}
3098
3099	/*
3100	 * We have a proposed starting point for our search, but if this
3101	 * cpu is offline then its cpu_next_part will point to itself
3102	 * so we can't use that to iterate over cpus in this partition in
3103	 * the loop below.  We still want to avoid iterating over cpus not
3104	 * in our partition, so in the case that our starting point is offline
3105	 * we will repoint it to be the detector itself;  and if the detector
3106	 * happens to be offline we'll return NULL from the following loop.
3107	 */
3108	if (!cpu_flagged_active(sp->cpu_flags)) {
3109		sp = dtcr;
3110	}
3111
3112	ptnr = sp;
3113	locptnr = NULL;
3114	sibptnr = NULL;
3115	do {
3116		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
3117			continue;
3118		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3119			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
3120			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3121			*typep = CE_XDIAG_PTNR_REMOTE;
3122			return (ptnr);
3123		}
3124		if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) {
3125			if (sibptnr == NULL)
3126				sibptnr = ptnr;
3127			continue;
3128		}
3129		if (locptnr == NULL)
3130			locptnr = ptnr;
3131	} while ((ptnr = ptnr->cpu_next_part) != sp);
3132
3133	/*
3134	 * A foreign partner has already been returned if one was available.
3135	 *
3136	 * If locptnr is not NULL it is a cpu in the same lgroup as the
3137	 * detector, is active, and is not a sibling of the detector.
3138	 *
3139	 * If sibptnr is not NULL it is a sibling of the detector, and is
3140	 * active.
3141	 *
3142	 * If we have to resort to using the detector itself we have already
3143	 * checked that it is active.
3144	 */
3145	if (locptnr) {
3146		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
3147		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3148		*typep = CE_XDIAG_PTNR_LOCAL;
3149		return (locptnr);
3150	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
3151		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
3152		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3153		*typep = CE_XDIAG_PTNR_SIBLING;
3154		return (sibptnr);
3155	} else if (flags & PTNR_SELFOK) {
3156		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
3157		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3158		*typep = CE_XDIAG_PTNR_SELF;
3159		return (dtcr);
3160	}
3161
3162	return (NULL);
3163}
3164
3165/*
3166 * Cross call handler that is requested to run on the designated partner of
3167 * a cpu that experienced a possibly sticky or possibly persistnet CE.
3168 */
3169static void
3170ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
3171{
3172	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
3173}
3174
3175/*
3176 * The associated errorqs are never destroyed so we do not need to deal with
3177 * them disappearing before this timeout fires.  If the affected memory
3178 * has been DR'd out since the original event the scrub algrithm will catch
3179 * any errors and return null disposition info.  If the original detecting
3180 * cpu has been DR'd out then ereport detector info will not be able to
3181 * lookup CPU type;  with a small timeout this is unlikely.
3182 */
3183static void
3184ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
3185{
3186	struct async_flt *aflt = cbarg->lkycb_aflt;
3187	uchar_t disp;
3188	cpu_t *cp;
3189	int ptnrtype;
3190
3191	kpreempt_disable();
3192	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3193	    &ptnrtype)) {
3194		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3195		    (uint64_t)&disp);
3196		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3197		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3198		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3199	} else {
3200		ce_xdiag_lkydrops++;
3201		if (ncpus > 1)
3202			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3203			    CE_XDIAG_SKIP_NOPTNR);
3204	}
3205	kpreempt_enable();
3206
3207	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3208	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3209}
3210
3211/*
3212 * Called from errorq drain code when processing a CE error, both from
3213 * CPU and PCI drain functions.  Decide what further classification actions,
3214 * if any, we will perform.  Perform immediate actions now, and schedule
3215 * delayed actions as required.  Note that we are no longer necessarily running
3216 * on the detecting cpu, and that the async_flt structure will not persist on
3217 * return from this function.
3218 *
3219 * Calls to this function should aim to be self-throtlling in some way.  With
3220 * the delayed re-enable of CEEN the absolute rate of calls should not
3221 * be excessive.  Callers should also avoid performing in-depth classification
3222 * for events in pages that are already known to be suspect.
3223 *
3224 * We return nonzero to indicate that the event has been copied and
3225 * recirculated for further testing.  The caller should not log the event
3226 * in this case - it will be logged when further test results are available.
3227 *
3228 * Our possible contexts are that of errorq_drain: below lock level or from
3229 * panic context.  We can assume that the cpu we are running on is online.
3230 */
3231
3232
3233#ifdef DEBUG
3234static int ce_xdiag_forceaction;
3235#endif
3236
3237int
3238ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3239    errorq_elem_t *eqep, size_t afltoffset)
3240{
3241	ce_dispact_t dispact, action;
3242	cpu_t *cp;
3243	uchar_t dtcrinfo, disp;
3244	int ptnrtype;
3245
3246	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3247		ce_xdiag_drops++;
3248		return (0);
3249	} else if (!aflt->flt_in_memory) {
3250		ce_xdiag_drops++;
3251		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3252		return (0);
3253	}
3254
3255	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3256
3257	/*
3258	 * Some correctable events are not scrubbed/classified, such as those
3259	 * noticed at the tail of cpu_deferred_error.  So if there is no
3260	 * initial detector classification go no further.
3261	 */
3262	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3263		ce_xdiag_drops++;
3264		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3265		return (0);
3266	}
3267
3268	dispact = CE_DISPACT(ce_disp_table,
3269	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3270	    CE_XDIAG_STATE(dtcrinfo),
3271	    CE_XDIAG_CE1SEEN(dtcrinfo),
3272	    CE_XDIAG_CE2SEEN(dtcrinfo));
3273
3274
3275	action = CE_ACT(dispact);	/* bad lookup caught below */
3276#ifdef DEBUG
3277	if (ce_xdiag_forceaction != 0)
3278		action = ce_xdiag_forceaction;
3279#endif
3280
3281	switch (action) {
3282	case CE_ACT_LKYCHK: {
3283		caddr_t ndata;
3284		errorq_elem_t *neqep;
3285		struct async_flt *ecc;
3286		ce_lkychk_cb_t *cbargp;
3287
3288		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3289			ce_xdiag_lkydrops++;
3290			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3291			    CE_XDIAG_SKIP_DUPFAIL);
3292			break;
3293		}
3294		ecc = (struct async_flt *)(ndata + afltoffset);
3295
3296		ASSERT(ecc->flt_class == CPU_FAULT ||
3297		    ecc->flt_class == BUS_FAULT);
3298		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3299		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3300
3301		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3302		cbargp->lkycb_aflt = ecc;
3303		cbargp->lkycb_eqp = eqp;
3304		cbargp->lkycb_eqep = neqep;
3305
3306		(void) timeout((void (*)(void *))ce_lkychk_cb,
3307		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3308		return (1);
3309	}
3310
3311	case CE_ACT_PTNRCHK:
3312		kpreempt_disable();	/* stop cpu list changing */
3313		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3314			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3315			    (uint64_t)aflt, (uint64_t)&disp);
3316			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3317			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3318			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3319		} else if (ncpus > 1) {
3320			ce_xdiag_ptnrdrops++;
3321			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3322			    CE_XDIAG_SKIP_NOPTNR);
3323		} else {
3324			ce_xdiag_ptnrdrops++;
3325			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3326			    CE_XDIAG_SKIP_UNIPROC);
3327		}
3328		kpreempt_enable();
3329		break;
3330
3331	case CE_ACT_DONE:
3332		break;
3333
3334	case CE_ACT(CE_DISP_BAD):
3335	default:
3336#ifdef DEBUG
3337		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3338#endif
3339		ce_xdiag_bad++;
3340		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3341		break;
3342	}
3343
3344	return (0);
3345}
3346
3347/*
3348 * We route all errors through a single switch statement.
3349 */
3350void
3351cpu_ue_log_err(struct async_flt *aflt)
3352{
3353	switch (aflt->flt_class) {
3354	case CPU_FAULT:
3355		cpu_ereport_init(aflt);
3356		if (cpu_async_log_err(aflt, NULL))
3357			cpu_ereport_post(aflt);
3358		break;
3359
3360	case BUS_FAULT:
3361		bus_async_log_err(aflt);
3362		break;
3363
3364	default:
3365		cmn_err(CE_WARN, "discarding async error %p with invalid "
3366		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3367		return;
3368	}
3369}
3370
3371/*
3372 * Routine for panic hook callback from panic_idle().
3373 */
3374void
3375cpu_async_panic_callb(void)
3376{
3377	ch_async_flt_t ch_flt;
3378	struct async_flt *aflt;
3379	ch_cpu_errors_t cpu_error_regs;
3380	uint64_t afsr_errs;
3381
3382	get_cpu_error_state(&cpu_error_regs);
3383
3384	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3385	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3386
3387	if (afsr_errs) {
3388
3389		bzero(&ch_flt, sizeof (ch_async_flt_t));
3390		aflt = (struct async_flt *)&ch_flt;
3391		aflt->flt_id = gethrtime_waitfree();
3392		aflt->flt_bus_id = getprocessorid();
3393		aflt->flt_inst = CPU->cpu_id;
3394		aflt->flt_stat = cpu_error_regs.afsr;
3395		aflt->flt_addr = cpu_error_regs.afar;
3396		aflt->flt_prot = AFLT_PROT_NONE;
3397		aflt->flt_class = CPU_FAULT;
3398		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3399		aflt->flt_panic = 1;
3400		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3401		ch_flt.afsr_errs = afsr_errs;
3402#if defined(SERRANO)
3403		ch_flt.afar2 = cpu_error_regs.afar2;
3404#endif	/* SERRANO */
3405		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3406	}
3407}
3408
3409/*
3410 * Routine to convert a syndrome into a syndrome code.
3411 */
3412static int
3413synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3414{
3415	if (synd_status == AFLT_STAT_INVALID)
3416		return (-1);
3417
3418	/*
3419	 * Use the syndrome to index the appropriate syndrome table,
3420	 * to get the code indicating which bit(s) is(are) bad.
3421	 */
3422	if (afsr_bit &
3423	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3424		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3425#if defined(JALAPENO) || defined(SERRANO)
3426			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3427				return (-1);
3428			else
3429				return (BPAR0 + synd);
3430#else /* JALAPENO || SERRANO */
3431			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3432				return (-1);
3433			else
3434				return (mtag_syndrome_tab[synd]);
3435#endif /* JALAPENO || SERRANO */
3436		} else {
3437			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3438				return (-1);
3439			else
3440				return (ecc_syndrome_tab[synd]);
3441		}
3442	} else {
3443		return (-1);
3444	}
3445}
3446
3447int
3448cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3449{
3450	if (&plat_get_mem_sid)
3451		return (plat_get_mem_sid(unum, buf, buflen, lenp));
3452	else
3453		return (ENOTSUP);
3454}
3455
3456int
3457cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
3458{
3459	if (&plat_get_mem_offset)
3460		return (plat_get_mem_offset(flt_addr, offp));
3461	else
3462		return (ENOTSUP);
3463}
3464
3465int
3466cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
3467{
3468	if (&plat_get_mem_addr)
3469		return (plat_get_mem_addr(unum, sid, offset, addrp));
3470	else
3471		return (ENOTSUP);
3472}
3473
3474/*
3475 * Routine to return a string identifying the physical name
3476 * associated with a memory/cache error.
3477 */
3478int
3479cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3480    uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3481    ushort_t flt_status, char *buf, int buflen, int *lenp)
3482{
3483	int synd_code;
3484	int ret;
3485
3486	/*
3487	 * An AFSR of -1 defaults to a memory syndrome.
3488	 */
3489	if (flt_stat == (uint64_t)-1)
3490		flt_stat = C_AFSR_CE;
3491
3492	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3493
3494	/*
3495	 * Syndrome code must be either a single-bit error code
3496	 * (0...143) or -1 for unum lookup.
3497	 */
3498	if (synd_code < 0 || synd_code >= M2)
3499		synd_code = -1;
3500	if (&plat_get_mem_unum) {
3501		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3502		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3503			buf[0] = '\0';
3504			*lenp = 0;
3505		}
3506
3507		return (ret);
3508	}
3509
3510	return (ENOTSUP);
3511}
3512
3513/*
3514 * Wrapper for cpu_get_mem_unum() routine that takes an
3515 * async_flt struct rather than explicit arguments.
3516 */
3517int
3518cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3519    char *buf, int buflen, int *lenp)
3520{
3521	/*
3522	 * If we come thru here for an IO bus error aflt->flt_stat will
3523	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3524	 * so it will interpret this as a memory error.
3525	 */
3526	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3527	    (aflt->flt_class == BUS_FAULT) ?
3528	    (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit,
3529	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3530	    aflt->flt_status, buf, buflen, lenp));
3531}
3532
3533/*
3534 * Return unum string given synd_code and async_flt into
3535 * the buf with size UNUM_NAMLEN
3536 */
3537static int
3538cpu_get_mem_unum_synd(int synd_code, struct async_flt *aflt, char *buf)
3539{
3540	int ret, len;
3541
3542	/*
3543	 * Syndrome code must be either a single-bit error code
3544	 * (0...143) or -1 for unum lookup.
3545	 */
3546	if (synd_code < 0 || synd_code >= M2)
3547		synd_code = -1;
3548	if (&plat_get_mem_unum) {
3549		if ((ret = plat_get_mem_unum(synd_code, aflt->flt_addr,
3550		    aflt->flt_bus_id, aflt->flt_in_memory,
3551		    aflt->flt_status, buf, UNUM_NAMLEN, &len)) != 0) {
3552			buf[0] = '\0';
3553		}
3554		return (ret);
3555	}
3556
3557	buf[0] = '\0';
3558	return (ENOTSUP);
3559}
3560
3561/*
3562 * This routine is a more generic interface to cpu_get_mem_unum()
3563 * that may be used by other modules (e.g. the 'mm' driver, through
3564 * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's
3565 * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events).
3566 */
3567int
3568cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3569    char *buf, int buflen, int *lenp)
3570{
3571	int synd_status, flt_in_memory, ret;
3572	ushort_t flt_status = 0;
3573	char unum[UNUM_NAMLEN];
3574	uint64_t t_afsr_errs;
3575
3576	/*
3577	 * Check for an invalid address.
3578	 */
3579	if (afar == (uint64_t)-1)
3580		return (ENXIO);
3581
3582	if (synd == (uint64_t)-1)
3583		synd_status = AFLT_STAT_INVALID;
3584	else
3585		synd_status = AFLT_STAT_VALID;
3586
3587	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3588	    pf_is_memory(afar >> MMU_PAGESHIFT);
3589
3590	/*
3591	 * Get aggregate AFSR for call to cpu_error_is_ecache_data.
3592	 */
3593	if (*afsr == (uint64_t)-1)
3594		t_afsr_errs = C_AFSR_CE;
3595	else {
3596		t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS);
3597#if defined(CHEETAH_PLUS)
3598		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
3599			t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS);
3600#endif	/* CHEETAH_PLUS */
3601	}
3602
3603	/*
3604	 * Turn on ECC_ECACHE if error type is E$ Data.
3605	 */
3606	if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs))
3607		flt_status |= ECC_ECACHE;
3608
3609	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar,
3610	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3611	if (ret != 0)
3612		return (ret);
3613
3614	if (*lenp >= buflen)
3615		return (ENAMETOOLONG);
3616
3617	(void) strncpy(buf, unum, buflen);
3618
3619	return (0);
3620}
3621
3622/*
3623 * Routine to return memory information associated
3624 * with a physical address and syndrome.
3625 */
3626int
3627cpu_get_mem_info(uint64_t synd, uint64_t afar,
3628    uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3629    int *segsp, int *banksp, int *mcidp)
3630{
3631	int synd_status, synd_code;
3632
3633	if (afar == (uint64_t)-1)
3634		return (ENXIO);
3635
3636	if (synd == (uint64_t)-1)
3637		synd_status = AFLT_STAT_INVALID;
3638	else
3639		synd_status = AFLT_STAT_VALID;
3640
3641	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3642
3643	if (p2get_mem_info != NULL)
3644		return ((p2get_mem_info)(synd_code, afar,
3645		    mem_sizep, seg_sizep, bank_sizep,
3646		    segsp, banksp, mcidp));
3647	else
3648		return (ENOTSUP);
3649}
3650
3651/*
3652 * Routine to return a string identifying the physical
3653 * name associated with a cpuid.
3654 */
3655int
3656cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3657{
3658	int ret;
3659	char unum[UNUM_NAMLEN];
3660
3661	if (&plat_get_cpu_unum) {
3662		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3663		    != 0)
3664			return (ret);
3665	} else {
3666		return (ENOTSUP);
3667	}
3668
3669	if (*lenp >= buflen)
3670		return (ENAMETOOLONG);
3671
3672	(void) strncpy(buf, unum, buflen);
3673
3674	return (0);
3675}
3676
3677/*
3678 * This routine exports the name buffer size.
3679 */
3680size_t
3681cpu_get_name_bufsize()
3682{
3683	return (UNUM_NAMLEN);
3684}
3685
3686/*
3687 * Historical function, apparantly not used.
3688 */
3689/* ARGSUSED */
3690void
3691cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3692{}
3693
3694/*
3695 * Historical function only called for SBus errors in debugging.
3696 */
3697/*ARGSUSED*/
3698void
3699read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3700{}
3701
3702/*
3703 * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3704 * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3705 * an async fault structure argument is passed in, the captured error state
3706 * (AFSR, AFAR) info will be returned in the structure.
3707 */
3708int
3709clear_errors(ch_async_flt_t *ch_flt)
3710{
3711	struct async_flt *aflt = (struct async_flt *)ch_flt;
3712	ch_cpu_errors_t	cpu_error_regs;
3713
3714	get_cpu_error_state(&cpu_error_regs);
3715
3716	if (ch_flt != NULL) {
3717		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3718		aflt->flt_addr = cpu_error_regs.afar;
3719		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3720		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3721		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3722#if defined(SERRANO)
3723		ch_flt->afar2 = cpu_error_regs.afar2;
3724#endif	/* SERRANO */
3725	}
3726
3727	set_cpu_error_state(&cpu_error_regs);
3728
3729	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3730	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3731}
3732
3733/*
3734 * Clear any AFSR error bits, and check for persistence.
3735 *
3736 * It would be desirable to also insist that syndrome match.  PCI handling
3737 * has already filled flt_synd.  For errors trapped by CPU we only fill
3738 * flt_synd when we queue the event, so we do not have a valid flt_synd
3739 * during initial classification (it is valid if we're called as part of
3740 * subsequent low-pil additional classification attempts).  We could try
3741 * to determine which syndrome to use: we know we're only called for
3742 * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3743 * would be esynd/none and esynd/msynd, respectively.  If that is
3744 * implemented then what do we do in the case that we do experience an
3745 * error on the same afar but with different syndrome?  At the very least
3746 * we should count such occurences.  Anyway, for now, we'll leave it as
3747 * it has been for ages.
3748 */
3749static int
3750clear_ecc(struct async_flt *aflt)
3751{
3752	ch_cpu_errors_t	cpu_error_regs;
3753
3754	/*
3755	 * Snapshot the AFSR and AFAR and clear any errors
3756	 */
3757	get_cpu_error_state(&cpu_error_regs);
3758	set_cpu_error_state(&cpu_error_regs);
3759
3760	/*
3761	 * If any of the same memory access error bits are still on and
3762	 * the AFAR matches, return that the error is persistent.
3763	 */
3764	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3765	    cpu_error_regs.afar == aflt->flt_addr);
3766}
3767
3768/*
3769 * Turn off all cpu error detection, normally only used for panics.
3770 */
3771void
3772cpu_disable_errors(void)
3773{
3774	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3775
3776	/*
3777	 * With error detection now turned off, check the other cpus
3778	 * logout areas for any unlogged errors.
3779	 */
3780	if (enable_check_other_cpus_logout) {
3781		cpu_check_other_cpus_logout();
3782		/*
3783		 * Make a second pass over the logout areas, in case
3784		 * there is a failing CPU in an error-trap loop which
3785		 * will write to the logout area once it is emptied.
3786		 */
3787		cpu_check_other_cpus_logout();
3788	}
3789}
3790
3791/*
3792 * Enable errors.
3793 */
3794void
3795cpu_enable_errors(void)
3796{
3797	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3798}
3799
3800/*
3801 * Flush the entire ecache using displacement flush by reading through a
3802 * physical address range twice as large as the Ecache.
3803 */
3804void
3805cpu_flush_ecache(void)
3806{
3807	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3808	    cpunodes[CPU->cpu_id].ecache_linesize);
3809}
3810
3811/*
3812 * Return CPU E$ set size - E$ size divided by the associativity.
3813 * We use this function in places where the CPU_PRIVATE ptr may not be
3814 * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3815 * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3816 * up before the kernel switches from OBP's to the kernel's trap table, so
3817 * we don't have to worry about cpunodes being unitialized.
3818 */
3819int
3820cpu_ecache_set_size(struct cpu *cp)
3821{
3822	if (CPU_PRIVATE(cp))
3823		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3824
3825	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3826}
3827
3828/*
3829 * Flush Ecache line.
3830 * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3831 * Uses normal displacement flush for Cheetah.
3832 */
3833static void
3834cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3835{
3836	struct async_flt *aflt = (struct async_flt *)ch_flt;
3837	int ec_set_size = cpu_ecache_set_size(CPU);
3838
3839	ecache_flush_line(aflt->flt_addr, ec_set_size);
3840}
3841
3842/*
3843 * Scrub physical address.
3844 * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3845 * Ecache or direct-mapped Ecache.
3846 */
3847static void
3848cpu_scrubphys(struct async_flt *aflt)
3849{
3850	int ec_set_size = cpu_ecache_set_size(CPU);
3851
3852	scrubphys(aflt->flt_addr, ec_set_size);
3853}
3854
3855/*
3856 * Clear physical address.
3857 * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3858 * Ecache or direct-mapped Ecache.
3859 */
3860void
3861cpu_clearphys(struct async_flt *aflt)
3862{
3863	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3864	int ec_set_size = cpu_ecache_set_size(CPU);
3865
3866
3867	clearphys(aflt->flt_addr, ec_set_size, lsize);
3868}
3869
3870#if defined(CPU_IMP_ECACHE_ASSOC)
3871/*
3872 * Check for a matching valid line in all the sets.
3873 * If found, return set# + 1. Otherwise return 0.
3874 */
3875static int
3876cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3877{
3878	struct async_flt *aflt = (struct async_flt *)ch_flt;
3879	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3880	int ec_set_size = cpu_ecache_set_size(CPU);
3881	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3882	int nway = cpu_ecache_nway();
3883	int i;
3884
3885	for (i = 0; i < nway; i++, ecp++) {
3886		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3887		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3888		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3889			return (i+1);
3890	}
3891	return (0);
3892}
3893#endif /* CPU_IMP_ECACHE_ASSOC */
3894
3895/*
3896 * Check whether a line in the given logout info matches the specified
3897 * fault address.  If reqval is set then the line must not be Invalid.
3898 * Returns 0 on failure;  on success (way + 1) is returned an *level is
3899 * set to 2 for l2$ or 3 for l3$.
3900 */
3901static int
3902cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3903{
3904	ch_diag_data_t *cdp = data;
3905	ch_ec_data_t *ecp;
3906	int totalsize, ec_set_size;
3907	int i, ways;
3908	int match = 0;
3909	int tagvalid;
3910	uint64_t addr, tagpa;
3911	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3912
3913	/*
3914	 * Check the l2$ logout data
3915	 */
3916	if (ispanther) {
3917		ecp = &cdp->chd_l2_data[0];
3918		ec_set_size = PN_L2_SET_SIZE;
3919		ways = PN_L2_NWAYS;
3920	} else {
3921		ecp = &cdp->chd_ec_data[0];
3922		ec_set_size = cpu_ecache_set_size(CPU);
3923		ways = cpu_ecache_nway();
3924		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3925	}
3926	/* remove low order PA bits from fault address not used in PA tag */
3927	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3928	for (i = 0; i < ways; i++, ecp++) {
3929		if (ispanther) {
3930			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3931			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3932		} else {
3933			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3934			tagvalid = !cpu_ectag_line_invalid(totalsize,
3935			    ecp->ec_tag);
3936		}
3937		if (tagpa == addr && (!reqval || tagvalid)) {
3938			match = i + 1;
3939			*level = 2;
3940			break;
3941		}
3942	}
3943
3944	if (match || !ispanther)
3945		return (match);
3946
3947	/* For Panther we also check the l3$ */
3948	ecp = &cdp->chd_ec_data[0];
3949	ec_set_size = PN_L3_SET_SIZE;
3950	ways = PN_L3_NWAYS;
3951	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3952
3953	for (i = 0; i < ways; i++, ecp++) {
3954		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3955		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3956			match = i + 1;
3957			*level = 3;
3958			break;
3959		}
3960	}
3961
3962	return (match);
3963}
3964
3965#if defined(CPU_IMP_L1_CACHE_PARITY)
3966/*
3967 * Record information related to the source of an Dcache Parity Error.
3968 */
3969static void
3970cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3971{
3972	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3973	int index;
3974
3975	/*
3976	 * Since instruction decode cannot be done at high PIL
3977	 * just examine the entire Dcache to locate the error.
3978	 */
3979	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3980		ch_flt->parity_data.dpe.cpl_way = -1;
3981		ch_flt->parity_data.dpe.cpl_off = -1;
3982	}
3983	for (index = 0; index < dc_set_size; index += dcache_linesize)
3984		cpu_dcache_parity_check(ch_flt, index);
3985}
3986
3987/*
3988 * Check all ways of the Dcache at a specified index for good parity.
3989 */
3990static void
3991cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3992{
3993	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3994	uint64_t parity_bits, pbits, data_word;
3995	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3996	int way, word, data_byte;
3997	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3998	ch_dc_data_t tmp_dcp;
3999
4000	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
4001		/*
4002		 * Perform diagnostic read.
4003		 */
4004		get_dcache_dtag(index + way * dc_set_size,
4005		    (uint64_t *)&tmp_dcp);
4006
4007		/*
4008		 * Check tag for even parity.
4009		 * Sum of 1 bits (including parity bit) should be even.
4010		 */
4011		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
4012			/*
4013			 * If this is the first error log detailed information
4014			 * about it and check the snoop tag. Otherwise just
4015			 * record the fact that we found another error.
4016			 */
4017			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4018				ch_flt->parity_data.dpe.cpl_way = way;
4019				ch_flt->parity_data.dpe.cpl_cache =
4020				    CPU_DC_PARITY;
4021				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
4022
4023				if (popc64(tmp_dcp.dc_sntag &
4024				    CHP_DCSNTAG_PARMASK) & 1) {
4025					ch_flt->parity_data.dpe.cpl_tag |=
4026					    CHP_DC_SNTAG;
4027					ch_flt->parity_data.dpe.cpl_lcnt++;
4028				}
4029
4030				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
4031			}
4032
4033			ch_flt->parity_data.dpe.cpl_lcnt++;
4034		}
4035
4036		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4037			/*
4038			 * Panther has more parity bits than the other
4039			 * processors for covering dcache data and so each
4040			 * byte of data in each word has its own parity bit.
4041			 */
4042			parity_bits = tmp_dcp.dc_pn_data_parity;
4043			for (word = 0; word < 4; word++) {
4044				data_word = tmp_dcp.dc_data[word];
4045				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
4046				for (data_byte = 0; data_byte < 8;
4047				    data_byte++) {
4048					if (((popc64(data_word &
4049					    PN_DC_DATA_PARITY_MASK)) & 1) ^
4050					    (pbits & 1)) {
4051						cpu_record_dc_data_parity(
4052						    ch_flt, dcp, &tmp_dcp, way,
4053						    word);
4054					}
4055					pbits >>= 1;
4056					data_word >>= 8;
4057				}
4058				parity_bits >>= 8;
4059			}
4060		} else {
4061			/*
4062			 * Check data array for even parity.
4063			 * The 8 parity bits are grouped into 4 pairs each
4064			 * of which covers a 64-bit word.  The endianness is
4065			 * reversed -- the low-order parity bits cover the
4066			 * high-order data words.
4067			 */
4068			parity_bits = tmp_dcp.dc_utag >> 8;
4069			for (word = 0; word < 4; word++) {
4070				pbits = (parity_bits >> (6 - word * 2)) & 3;
4071				if ((popc64(tmp_dcp.dc_data[word]) +
4072				    parity_bits_popc[pbits]) & 1) {
4073					cpu_record_dc_data_parity(ch_flt, dcp,
4074					    &tmp_dcp, way, word);
4075				}
4076			}
4077		}
4078	}
4079}
4080
4081static void
4082cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
4083    ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
4084{
4085	/*
4086	 * If this is the first error log detailed information about it.
4087	 * Otherwise just record the fact that we found another error.
4088	 */
4089	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4090		ch_flt->parity_data.dpe.cpl_way = way;
4091		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
4092		ch_flt->parity_data.dpe.cpl_off = word * 8;
4093		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
4094	}
4095	ch_flt->parity_data.dpe.cpl_lcnt++;
4096}
4097
4098/*
4099 * Record information related to the source of an Icache Parity Error.
4100 *
4101 * Called with the Icache disabled so any diagnostic accesses are safe.
4102 */
4103static void
4104cpu_icache_parity_info(ch_async_flt_t *ch_flt)
4105{
4106	int	ic_set_size;
4107	int	ic_linesize;
4108	int	index;
4109
4110	if (CPU_PRIVATE(CPU)) {
4111		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4112		    CH_ICACHE_NWAY;
4113		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4114	} else {
4115		ic_set_size = icache_size / CH_ICACHE_NWAY;
4116		ic_linesize = icache_linesize;
4117	}
4118
4119	ch_flt->parity_data.ipe.cpl_way = -1;
4120	ch_flt->parity_data.ipe.cpl_off = -1;
4121
4122	for (index = 0; index < ic_set_size; index += ic_linesize)
4123		cpu_icache_parity_check(ch_flt, index);
4124}
4125
4126/*
4127 * Check all ways of the Icache at a specified index for good parity.
4128 */
4129static void
4130cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
4131{
4132	uint64_t parmask, pn_inst_parity;
4133	int ic_set_size;
4134	int ic_linesize;
4135	int flt_index, way, instr, num_instr;
4136	struct async_flt *aflt = (struct async_flt *)ch_flt;
4137	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
4138	ch_ic_data_t tmp_icp;
4139
4140	if (CPU_PRIVATE(CPU)) {
4141		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4142		    CH_ICACHE_NWAY;
4143		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4144	} else {
4145		ic_set_size = icache_size / CH_ICACHE_NWAY;
4146		ic_linesize = icache_linesize;
4147	}
4148
4149	/*
4150	 * Panther has twice as many instructions per icache line and the
4151	 * instruction parity bit is in a different location.
4152	 */
4153	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4154		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
4155		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
4156	} else {
4157		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
4158		pn_inst_parity = 0;
4159	}
4160
4161	/*
4162	 * Index at which we expect to find the parity error.
4163	 */
4164	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
4165
4166	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
4167		/*
4168		 * Diagnostic reads expect address argument in ASI format.
4169		 */
4170		get_icache_dtag(2 * (index + way * ic_set_size),
4171		    (uint64_t *)&tmp_icp);
4172
4173		/*
4174		 * If this is the index in which we expect to find the
4175		 * error log detailed information about each of the ways.
4176		 * This information will be displayed later if we can't
4177		 * determine the exact way in which the error is located.
4178		 */
4179		if (flt_index == index)
4180			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
4181
4182		/*
4183		 * Check tag for even parity.
4184		 * Sum of 1 bits (including parity bit) should be even.
4185		 */
4186		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
4187			/*
4188			 * If this way is the one in which we expected
4189			 * to find the error record the way and check the
4190			 * snoop tag. Otherwise just record the fact we
4191			 * found another error.
4192			 */
4193			if (flt_index == index) {
4194				ch_flt->parity_data.ipe.cpl_way = way;
4195				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
4196
4197				if (popc64(tmp_icp.ic_sntag &
4198				    CHP_ICSNTAG_PARMASK) & 1) {
4199					ch_flt->parity_data.ipe.cpl_tag |=
4200					    CHP_IC_SNTAG;
4201					ch_flt->parity_data.ipe.cpl_lcnt++;
4202				}
4203
4204			}
4205			ch_flt->parity_data.ipe.cpl_lcnt++;
4206			continue;
4207		}
4208
4209		/*
4210		 * Check instruction data for even parity.
4211		 * Bits participating in parity differ for PC-relative
4212		 * versus non-PC-relative instructions.
4213		 */
4214		for (instr = 0; instr < num_instr; instr++) {
4215			parmask = (tmp_icp.ic_data[instr] &
4216			    CH_ICDATA_PRED_ISPCREL) ?
4217			    (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
4218			    (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
4219			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
4220				/*
4221				 * If this way is the one in which we expected
4222				 * to find the error record the way and offset.
4223				 * Otherwise just log the fact we found another
4224				 * error.
4225				 */
4226				if (flt_index == index) {
4227					ch_flt->parity_data.ipe.cpl_way = way;
4228					ch_flt->parity_data.ipe.cpl_off =
4229					    instr * 4;
4230				}
4231				ch_flt->parity_data.ipe.cpl_lcnt++;
4232				continue;
4233			}
4234		}
4235	}
4236}
4237
4238/*
4239 * Record information related to the source of an Pcache Parity Error.
4240 */
4241static void
4242cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
4243{
4244	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4245	int index;
4246
4247	/*
4248	 * Since instruction decode cannot be done at high PIL just
4249	 * examine the entire Pcache to check for any parity errors.
4250	 */
4251	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4252		ch_flt->parity_data.dpe.cpl_way = -1;
4253		ch_flt->parity_data.dpe.cpl_off = -1;
4254	}
4255	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
4256		cpu_pcache_parity_check(ch_flt, index);
4257}
4258
4259/*
4260 * Check all ways of the Pcache at a specified index for good parity.
4261 */
4262static void
4263cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
4264{
4265	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4266	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
4267	int way, word, pbit, parity_bits;
4268	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4269	ch_pc_data_t tmp_pcp;
4270
4271	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4272		/*
4273		 * Perform diagnostic read.
4274		 */
4275		get_pcache_dtag(index + way * pc_set_size,
4276		    (uint64_t *)&tmp_pcp);
4277		/*
4278		 * Check data array for odd parity. There are 8 parity
4279		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4280		 * of those bits covers exactly 8 bytes of the data
4281		 * array:
4282		 *
4283		 *	parity bit	P$ data bytes covered
4284		 *	----------	---------------------
4285		 *	50		63:56
4286		 *	51		55:48
4287		 *	52		47:40
4288		 *	53		39:32
4289		 *	54		31:24
4290		 *	55		23:16
4291		 *	56		15:8
4292		 *	57		7:0
4293		 */
4294		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4295		for (word = 0; word < pc_data_words; word++) {
4296			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4297			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4298				/*
4299				 * If this is the first error log detailed
4300				 * information about it. Otherwise just record
4301				 * the fact that we found another error.
4302				 */
4303				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4304					ch_flt->parity_data.dpe.cpl_way = way;
4305					ch_flt->parity_data.dpe.cpl_cache =
4306					    CPU_PC_PARITY;
4307					ch_flt->parity_data.dpe.cpl_off =
4308					    word * sizeof (uint64_t);
4309					bcopy(&tmp_pcp, pcp,
4310					    sizeof (ch_pc_data_t));
4311				}
4312				ch_flt->parity_data.dpe.cpl_lcnt++;
4313			}
4314		}
4315	}
4316}
4317
4318
4319/*
4320 * Add L1 Data cache data to the ereport payload.
4321 */
4322static void
4323cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4324{
4325	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4326	ch_dc_data_t *dcp;
4327	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4328	uint_t nelem;
4329	int i, ways_to_check, ways_logged = 0;
4330
4331	/*
4332	 * If this is an D$ fault then there may be multiple
4333	 * ways captured in the ch_parity_log_t structure.
4334	 * Otherwise, there will be at most one way captured
4335	 * in the ch_diag_data_t struct.
4336	 * Check each way to see if it should be encoded.
4337	 */
4338	if (ch_flt->flt_type == CPU_DC_PARITY)
4339		ways_to_check = CH_DCACHE_NWAY;
4340	else
4341		ways_to_check = 1;
4342	for (i = 0; i < ways_to_check; i++) {
4343		if (ch_flt->flt_type == CPU_DC_PARITY)
4344			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4345		else
4346			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4347		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4348			bcopy(dcp, &dcdata[ways_logged],
4349			    sizeof (ch_dc_data_t));
4350			ways_logged++;
4351		}
4352	}
4353
4354	/*
4355	 * Add the dcache data to the payload.
4356	 */
4357	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4358	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4359	if (ways_logged != 0) {
4360		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4361		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4362		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4363	}
4364}
4365
4366/*
4367 * Add L1 Instruction cache data to the ereport payload.
4368 */
4369static void
4370cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4371{
4372	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4373	ch_ic_data_t *icp;
4374	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4375	uint_t nelem;
4376	int i, ways_to_check, ways_logged = 0;
4377
4378	/*
4379	 * If this is an I$ fault then there may be multiple
4380	 * ways captured in the ch_parity_log_t structure.
4381	 * Otherwise, there will be at most one way captured
4382	 * in the ch_diag_data_t struct.
4383	 * Check each way to see if it should be encoded.
4384	 */
4385	if (ch_flt->flt_type == CPU_IC_PARITY)
4386		ways_to_check = CH_ICACHE_NWAY;
4387	else
4388		ways_to_check = 1;
4389	for (i = 0; i < ways_to_check; i++) {
4390		if (ch_flt->flt_type == CPU_IC_PARITY)
4391			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4392		else
4393			icp = &ch_flt->flt_diag_data.chd_ic_data;
4394		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4395			bcopy(icp, &icdata[ways_logged],
4396			    sizeof (ch_ic_data_t));
4397			ways_logged++;
4398		}
4399	}
4400
4401	/*
4402	 * Add the icache data to the payload.
4403	 */
4404	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4405	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4406	if (ways_logged != 0) {
4407		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4408		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4409		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4410	}
4411}
4412
4413#endif	/* CPU_IMP_L1_CACHE_PARITY */
4414
4415/*
4416 * Add ecache data to payload.
4417 */
4418static void
4419cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4420{
4421	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4422	ch_ec_data_t *ecp;
4423	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4424	uint_t nelem;
4425	int i, ways_logged = 0;
4426
4427	/*
4428	 * Check each way to see if it should be encoded
4429	 * and concatinate it into a temporary buffer.
4430	 */
4431	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4432		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4433		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4434			bcopy(ecp, &ecdata[ways_logged],
4435			    sizeof (ch_ec_data_t));
4436			ways_logged++;
4437		}
4438	}
4439
4440	/*
4441	 * Panther CPUs have an additional level of cache and so
4442	 * what we just collected was the L3 (ecache) and not the
4443	 * L2 cache.
4444	 */
4445	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4446		/*
4447		 * Add the L3 (ecache) data to the payload.
4448		 */
4449		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4450		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4451		if (ways_logged != 0) {
4452			nelem = sizeof (ch_ec_data_t) /
4453			    sizeof (uint64_t) * ways_logged;
4454			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4455			    DATA_TYPE_UINT64_ARRAY, nelem,
4456			    (uint64_t *)ecdata, NULL);
4457		}
4458
4459		/*
4460		 * Now collect the L2 cache.
4461		 */
4462		ways_logged = 0;
4463		for (i = 0; i < PN_L2_NWAYS; i++) {
4464			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4465			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4466				bcopy(ecp, &ecdata[ways_logged],
4467				    sizeof (ch_ec_data_t));
4468				ways_logged++;
4469			}
4470		}
4471	}
4472
4473	/*
4474	 * Add the L2 cache data to the payload.
4475	 */
4476	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4477	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4478	if (ways_logged != 0) {
4479		nelem = sizeof (ch_ec_data_t) /
4480		    sizeof (uint64_t) * ways_logged;
4481		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4482		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4483	}
4484}
4485
4486/*
4487 * Initialize cpu scheme for specified cpu.
4488 */
4489static void
4490cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid)
4491{
4492	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
4493	uint8_t mask;
4494
4495	mask = cpunodes[cpuid].version;
4496	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
4497	    (u_longlong_t)cpunodes[cpuid].device_id);
4498	(void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL,
4499	    cpuid, &mask, (const char *)sbuf);
4500}
4501
4502/*
4503 * Returns ereport resource type.
4504 */
4505static int
4506cpu_error_to_resource_type(struct async_flt *aflt)
4507{
4508	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4509
4510	switch (ch_flt->flt_type) {
4511
4512	case CPU_CE_ECACHE:
4513	case CPU_UE_ECACHE:
4514	case CPU_UE_ECACHE_RETIRE:
4515	case CPU_ORPH:
4516		/*
4517		 * If AFSR error bit indicates L2$ Data for Cheetah,
4518		 * Cheetah+ or Jaguar, or L3$ Data for Panther, return
4519		 * E$ Data type, otherwise, return CPU type.
4520		 */
4521		if (cpu_error_is_ecache_data(aflt->flt_inst,
4522		    ch_flt->flt_bit))
4523			return (ERRTYPE_ECACHE_DATA);
4524		return (ERRTYPE_CPU);
4525
4526	case CPU_CE:
4527	case CPU_UE:
4528	case CPU_EMC:
4529	case CPU_DUE:
4530	case CPU_RCE:
4531	case CPU_RUE:
4532	case CPU_FRC:
4533	case CPU_FRU:
4534		return (ERRTYPE_MEMORY);
4535
4536	case CPU_IC_PARITY:
4537	case CPU_DC_PARITY:
4538	case CPU_FPUERR:
4539	case CPU_PC_PARITY:
4540	case CPU_ITLB_PARITY:
4541	case CPU_DTLB_PARITY:
4542		return (ERRTYPE_CPU);
4543	}
4544	return (ERRTYPE_UNKNOWN);
4545}
4546
4547/*
4548 * Encode the data saved in the ch_async_flt_t struct into
4549 * the FM ereport payload.
4550 */
4551static void
4552cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4553    nvlist_t *resource, int *afar_status, int *synd_status)
4554{
4555	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4556	*synd_status = AFLT_STAT_INVALID;
4557	*afar_status = AFLT_STAT_INVALID;
4558
4559	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4560		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4561		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4562	}
4563
4564	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4565	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4566		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4567		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4568	}
4569
4570	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4571		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4572		    ch_flt->flt_bit);
4573		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4574		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4575	}
4576
4577	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4578		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4579		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4580	}
4581
4582	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4583		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4584		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4585	}
4586
4587	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4588		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4589		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4590	}
4591
4592	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4593		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4594		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4595	}
4596
4597	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4598		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4599		    DATA_TYPE_BOOLEAN_VALUE,
4600		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4601	}
4602
4603	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4604		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4605		    DATA_TYPE_BOOLEAN_VALUE,
4606		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4607	}
4608
4609	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4610		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4611		    ch_flt->afsr_errs, ch_flt->flt_bit);
4612		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4613		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4614	}
4615
4616	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4617		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4618		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4619	}
4620
4621	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4622		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4623		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4624	}
4625
4626	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4627		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4628		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4629	}
4630
4631	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4632		cpu_payload_add_ecache(aflt, payload);
4633
4634	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4635		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4636		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4637	}
4638
4639	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4640		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4641		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4642	}
4643
4644	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4645		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4646		    DATA_TYPE_UINT32_ARRAY, 16,
4647		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4648	}
4649
4650#if defined(CPU_IMP_L1_CACHE_PARITY)
4651	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4652		cpu_payload_add_dcache(aflt, payload);
4653	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4654		cpu_payload_add_icache(aflt, payload);
4655#endif	/* CPU_IMP_L1_CACHE_PARITY */
4656
4657#if defined(CHEETAH_PLUS)
4658	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4659		cpu_payload_add_pcache(aflt, payload);
4660	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4661		cpu_payload_add_tlb(aflt, payload);
4662#endif	/* CHEETAH_PLUS */
4663	/*
4664	 * Create the FMRI that goes into the payload
4665	 * and contains the unum info if necessary.
4666	 */
4667	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) {
4668		char unum[UNUM_NAMLEN] = "";
4669		char sid[DIMM_SERIAL_ID_LEN] = "";
4670		int len, ret, rtype, synd_code;
4671		uint64_t offset = (uint64_t)-1;
4672
4673		rtype = cpu_error_to_resource_type(aflt);
4674		switch (rtype) {
4675
4676		case ERRTYPE_MEMORY:
4677		case ERRTYPE_ECACHE_DATA:
4678
4679			/*
4680			 * Memory errors, do unum lookup
4681			 */
4682			if (*afar_status == AFLT_STAT_INVALID)
4683				break;
4684
4685			if (rtype == ERRTYPE_ECACHE_DATA)
4686				aflt->flt_status |= ECC_ECACHE;
4687			else
4688				aflt->flt_status &= ~ECC_ECACHE;
4689
4690			synd_code = synd_to_synd_code(*synd_status,
4691			    aflt->flt_synd, ch_flt->flt_bit);
4692
4693			if (cpu_get_mem_unum_synd(synd_code, aflt, unum) != 0)
4694				break;
4695
4696			ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
4697			    &len);
4698
4699			if (ret == 0) {
4700				(void) cpu_get_mem_offset(aflt->flt_addr,
4701				    &offset);
4702			}
4703
4704			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4705			    NULL, unum, (ret == 0) ? sid : NULL, offset);
4706			fm_payload_set(payload,
4707			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4708			    DATA_TYPE_NVLIST, resource, NULL);
4709			break;
4710
4711		case ERRTYPE_CPU:
4712			/*
4713			 * On-board processor array error, add cpu resource.
4714			 */
4715			cpu_fmri_cpu_set(resource, aflt->flt_inst);
4716			fm_payload_set(payload,
4717			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4718			    DATA_TYPE_NVLIST, resource, NULL);
4719			break;
4720		}
4721	}
4722}
4723
4724/*
4725 * Initialize the way info if necessary.
4726 */
4727void
4728cpu_ereport_init(struct async_flt *aflt)
4729{
4730	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4731	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4732	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4733	int i;
4734
4735	/*
4736	 * Initialize the info in the CPU logout structure.
4737	 * The I$/D$ way information is not initialized here
4738	 * since it is captured in the logout assembly code.
4739	 */
4740	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4741		(ecp + i)->ec_way = i;
4742
4743	for (i = 0; i < PN_L2_NWAYS; i++)
4744		(l2p + i)->ec_way = i;
4745}
4746
4747/*
4748 * Returns whether fault address is valid for this error bit and
4749 * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4750 */
4751int
4752cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4753{
4754	struct async_flt *aflt = (struct async_flt *)ch_flt;
4755
4756	return ((t_afsr_bit & C_AFSR_MEMORY) &&
4757	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4758	    AFLT_STAT_VALID &&
4759	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4760}
4761
4762/*
4763 * Returns whether fault address is valid based on the error bit for the
4764 * one event being queued and whether the address is "in memory".
4765 */
4766static int
4767cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4768{
4769	struct async_flt *aflt = (struct async_flt *)ch_flt;
4770	int afar_status;
4771	uint64_t afsr_errs, afsr_ow, *ow_bits;
4772
4773	if (!(t_afsr_bit & C_AFSR_MEMORY) ||
4774	    !pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
4775		return (0);
4776
4777	afsr_errs = ch_flt->afsr_errs;
4778	afar_status = afsr_to_afar_status(afsr_errs, t_afsr_bit);
4779
4780	switch (afar_status) {
4781	case AFLT_STAT_VALID:
4782		return (1);
4783
4784	case AFLT_STAT_AMBIGUOUS:
4785		/*
4786		 * Status is ambiguous since another error bit (or bits)
4787		 * of equal priority to the specified bit on in the afsr,
4788		 * so check those bits. Return 1 only if the bits on in the
4789		 * same class as the t_afsr_bit are also C_AFSR_MEMORY bits.
4790		 * Otherwise not all the equal priority bits are for memory
4791		 * errors, so return 0.
4792		 */
4793		ow_bits = afar_overwrite;
4794		while ((afsr_ow = *ow_bits++) != 0) {
4795			/*
4796			 * Get other bits that are on in t_afsr_bit's priority
4797			 * class to check for Memory Error bits only.
4798			 */
4799			if (afsr_ow & t_afsr_bit) {
4800				if ((afsr_errs & afsr_ow) & ~C_AFSR_MEMORY)
4801					return (0);
4802				else
4803					return (1);
4804			}
4805		}
4806		/*FALLTHRU*/
4807
4808	default:
4809		return (0);
4810	}
4811}
4812
4813static void
4814cpu_log_diag_info(ch_async_flt_t *ch_flt)
4815{
4816	struct async_flt *aflt = (struct async_flt *)ch_flt;
4817	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4818	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4819	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4820#if defined(CPU_IMP_ECACHE_ASSOC)
4821	int i, nway;
4822#endif /* CPU_IMP_ECACHE_ASSOC */
4823
4824	/*
4825	 * Check if the CPU log out captured was valid.
4826	 */
4827	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4828	    ch_flt->flt_data_incomplete)
4829		return;
4830
4831#if defined(CPU_IMP_ECACHE_ASSOC)
4832	nway = cpu_ecache_nway();
4833	i =  cpu_ecache_line_valid(ch_flt);
4834	if (i == 0 || i > nway) {
4835		for (i = 0; i < nway; i++)
4836			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4837	} else
4838		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4839#else /* CPU_IMP_ECACHE_ASSOC */
4840	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4841#endif /* CPU_IMP_ECACHE_ASSOC */
4842
4843#if defined(CHEETAH_PLUS)
4844	pn_cpu_log_diag_l2_info(ch_flt);
4845#endif /* CHEETAH_PLUS */
4846
4847	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4848		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4849		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4850	}
4851
4852	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4853		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4854			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4855		else
4856			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4857		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4858	}
4859}
4860
4861/*
4862 * Cheetah ECC calculation.
4863 *
4864 * We only need to do the calculation on the data bits and can ignore check
4865 * bit and Mtag bit terms in the calculation.
4866 */
4867static uint64_t ch_ecc_table[9][2] = {
4868	/*
4869	 * low order 64-bits   high-order 64-bits
4870	 */
4871	{ 0x46bffffeccd1177f, 0x488800022100014c },
4872	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4873	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4874	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4875	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4876	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4877	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4878	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4879	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4880};
4881
4882/*
4883 * 64-bit population count, use well-known popcnt trick.
4884 * We could use the UltraSPARC V9 POPC instruction, but some
4885 * CPUs including Cheetahplus and Jaguar do not support that
4886 * instruction.
4887 */
4888int
4889popc64(uint64_t val)
4890{
4891	int cnt;
4892
4893	for (cnt = 0; val != 0; val &= val - 1)
4894		cnt++;
4895	return (cnt);
4896}
4897
4898/*
4899 * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4900 * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4901 * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4902 * instead of doing all the xor's.
4903 */
4904uint32_t
4905us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4906{
4907	int bitno, s;
4908	int synd = 0;
4909
4910	for (bitno = 0; bitno < 9; bitno++) {
4911		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4912		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4913		synd |= (s << bitno);
4914	}
4915	return (synd);
4916
4917}
4918
4919/*
4920 * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4921 * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4922 * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4923 */
4924static void
4925cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4926    ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4927{
4928	struct async_flt *aflt = (struct async_flt *)ch_flt;
4929
4930	if (reason &&
4931	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4932		(void) strcat(reason, eccp->ec_reason);
4933	}
4934
4935	ch_flt->flt_bit = eccp->ec_afsr_bit;
4936	ch_flt->flt_type = eccp->ec_flt_type;
4937	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4938		ch_flt->flt_diag_data = *cdp;
4939	else
4940		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4941	aflt->flt_in_memory =
4942	    cpu_flt_in_memory_one_event(ch_flt, ch_flt->flt_bit);
4943
4944	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4945		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4946	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4947		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4948	else
4949		aflt->flt_synd = 0;
4950
4951	aflt->flt_payload = eccp->ec_err_payload;
4952
4953	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4954	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4955		cpu_errorq_dispatch(eccp->ec_err_class,
4956		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4957		    aflt->flt_panic);
4958	else
4959		cpu_errorq_dispatch(eccp->ec_err_class,
4960		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4961		    aflt->flt_panic);
4962}
4963
4964/*
4965 * Queue events on async event queue one event per error bit.  First we
4966 * queue the events that we "expect" for the given trap, then we queue events
4967 * that we may not expect.  Return number of events queued.
4968 */
4969int
4970cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4971    ch_cpu_logout_t *clop)
4972{
4973	struct async_flt *aflt = (struct async_flt *)ch_flt;
4974	ecc_type_to_info_t *eccp;
4975	int nevents = 0;
4976	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4977#if defined(CHEETAH_PLUS)
4978	uint64_t orig_t_afsr_errs;
4979#endif
4980	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4981	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4982	ch_diag_data_t *cdp = NULL;
4983
4984	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4985
4986#if defined(CHEETAH_PLUS)
4987	orig_t_afsr_errs = t_afsr_errs;
4988
4989	/*
4990	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4991	 */
4992	if (clop != NULL) {
4993		/*
4994		 * Set the AFSR and AFAR fields to the shadow registers.  The
4995		 * flt_addr and flt_stat fields will be reset to the primaries
4996		 * below, but the sdw_addr and sdw_stat will stay as the
4997		 * secondaries.
4998		 */
4999		cdp = &clop->clo_sdw_data;
5000		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
5001		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
5002		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
5003		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
5004		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
5005
5006		/*
5007		 * If the primary and shadow AFSR differ, tag the shadow as
5008		 * the first fault.
5009		 */
5010		if ((primary_afar != cdp->chd_afar) ||
5011		    (primary_afsr_errs != ch_flt->afsr_errs)) {
5012			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
5013		}
5014
5015		/*
5016		 * Check AFSR bits as well as AFSR_EXT bits in order of
5017		 * the AFAR overwrite priority. Our stored AFSR_EXT value
5018		 * is expected to be zero for those CPUs which do not have
5019		 * an AFSR_EXT register.
5020		 */
5021		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
5022			if ((eccp->ec_afsr_bit &
5023			    (ch_flt->afsr_errs & t_afsr_errs)) &&
5024			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
5025				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5026				cdp = NULL;
5027				t_afsr_errs &= ~eccp->ec_afsr_bit;
5028				nevents++;
5029			}
5030		}
5031
5032		/*
5033		 * If the ME bit is on in the primary AFSR turn all the
5034		 * error bits on again that may set the ME bit to make
5035		 * sure we see the ME AFSR error logs.
5036		 */
5037		if ((primary_afsr & C_AFSR_ME) != 0)
5038			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
5039	}
5040#endif	/* CHEETAH_PLUS */
5041
5042	if (clop != NULL)
5043		cdp = &clop->clo_data;
5044
5045	/*
5046	 * Queue expected errors, error bit and fault type must match
5047	 * in the ecc_type_to_info table.
5048	 */
5049	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5050	    eccp++) {
5051		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
5052		    (eccp->ec_flags & aflt->flt_status) != 0) {
5053#if defined(SERRANO)
5054			/*
5055			 * For FRC/FRU errors on Serrano the afar2 captures
5056			 * the address and the associated data is
5057			 * in the shadow logout area.
5058			 */
5059			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
5060				if (clop != NULL)
5061					cdp = &clop->clo_sdw_data;
5062				aflt->flt_addr = ch_flt->afar2;
5063			} else {
5064				if (clop != NULL)
5065					cdp = &clop->clo_data;
5066				aflt->flt_addr = primary_afar;
5067			}
5068#else	/* SERRANO */
5069			aflt->flt_addr = primary_afar;
5070#endif	/* SERRANO */
5071			aflt->flt_stat = primary_afsr;
5072			ch_flt->afsr_ext = primary_afsr_ext;
5073			ch_flt->afsr_errs = primary_afsr_errs;
5074			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5075			cdp = NULL;
5076			t_afsr_errs &= ~eccp->ec_afsr_bit;
5077			nevents++;
5078		}
5079	}
5080
5081	/*
5082	 * Queue unexpected errors, error bit only match.
5083	 */
5084	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5085	    eccp++) {
5086		if (eccp->ec_afsr_bit & t_afsr_errs) {
5087#if defined(SERRANO)
5088			/*
5089			 * For FRC/FRU errors on Serrano the afar2 captures
5090			 * the address and the associated data is
5091			 * in the shadow logout area.
5092			 */
5093			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
5094				if (clop != NULL)
5095					cdp = &clop->clo_sdw_data;
5096				aflt->flt_addr = ch_flt->afar2;
5097			} else {
5098				if (clop != NULL)
5099					cdp = &clop->clo_data;
5100				aflt->flt_addr = primary_afar;
5101			}
5102#else	/* SERRANO */
5103			aflt->flt_addr = primary_afar;
5104#endif	/* SERRANO */
5105			aflt->flt_stat = primary_afsr;
5106			ch_flt->afsr_ext = primary_afsr_ext;
5107			ch_flt->afsr_errs = primary_afsr_errs;
5108			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5109			cdp = NULL;
5110			t_afsr_errs &= ~eccp->ec_afsr_bit;
5111			nevents++;
5112		}
5113	}
5114	return (nevents);
5115}
5116
5117/*
5118 * Return trap type number.
5119 */
5120uint8_t
5121flt_to_trap_type(struct async_flt *aflt)
5122{
5123	if (aflt->flt_status & ECC_I_TRAP)
5124		return (TRAP_TYPE_ECC_I);
5125	if (aflt->flt_status & ECC_D_TRAP)
5126		return (TRAP_TYPE_ECC_D);
5127	if (aflt->flt_status & ECC_F_TRAP)
5128		return (TRAP_TYPE_ECC_F);
5129	if (aflt->flt_status & ECC_C_TRAP)
5130		return (TRAP_TYPE_ECC_C);
5131	if (aflt->flt_status & ECC_DP_TRAP)
5132		return (TRAP_TYPE_ECC_DP);
5133	if (aflt->flt_status & ECC_IP_TRAP)
5134		return (TRAP_TYPE_ECC_IP);
5135	if (aflt->flt_status & ECC_ITLB_TRAP)
5136		return (TRAP_TYPE_ECC_ITLB);
5137	if (aflt->flt_status & ECC_DTLB_TRAP)
5138		return (TRAP_TYPE_ECC_DTLB);
5139	return (TRAP_TYPE_UNKNOWN);
5140}
5141
5142/*
5143 * Decide an error type based on detector and leaky/partner tests.
5144 * The following array is used for quick translation - it must
5145 * stay in sync with ce_dispact_t.
5146 */
5147
5148static char *cetypes[] = {
5149	CE_DISP_DESC_U,
5150	CE_DISP_DESC_I,
5151	CE_DISP_DESC_PP,
5152	CE_DISP_DESC_P,
5153	CE_DISP_DESC_L,
5154	CE_DISP_DESC_PS,
5155	CE_DISP_DESC_S
5156};
5157
5158char *
5159flt_to_error_type(struct async_flt *aflt)
5160{
5161	ce_dispact_t dispact, disp;
5162	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
5163
5164	/*
5165	 * The memory payload bundle is shared by some events that do
5166	 * not perform any classification.  For those flt_disp will be
5167	 * 0 and we will return "unknown".
5168	 */
5169	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
5170		return (cetypes[CE_DISP_UNKNOWN]);
5171
5172	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
5173
5174	/*
5175	 * It is also possible that no scrub/classification was performed
5176	 * by the detector, for instance where a disrupting error logged
5177	 * in the AFSR while CEEN was off in cpu_deferred_error.
5178	 */
5179	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
5180		return (cetypes[CE_DISP_UNKNOWN]);
5181
5182	/*
5183	 * Lookup type in initial classification/action table
5184	 */
5185	dispact = CE_DISPACT(ce_disp_table,
5186	    CE_XDIAG_AFARMATCHED(dtcrinfo),
5187	    CE_XDIAG_STATE(dtcrinfo),
5188	    CE_XDIAG_CE1SEEN(dtcrinfo),
5189	    CE_XDIAG_CE2SEEN(dtcrinfo));
5190
5191	/*
5192	 * A bad lookup is not something to panic production systems for.
5193	 */
5194	ASSERT(dispact != CE_DISP_BAD);
5195	if (dispact == CE_DISP_BAD)
5196		return (cetypes[CE_DISP_UNKNOWN]);
5197
5198	disp = CE_DISP(dispact);
5199
5200	switch (disp) {
5201	case CE_DISP_UNKNOWN:
5202	case CE_DISP_INTERMITTENT:
5203		break;
5204
5205	case CE_DISP_POSS_PERS:
5206		/*
5207		 * "Possible persistent" errors to which we have applied a valid
5208		 * leaky test can be separated into "persistent" or "leaky".
5209		 */
5210		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
5211		if (CE_XDIAG_TESTVALID(lkyinfo)) {
5212			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
5213			    CE_XDIAG_CE2SEEN(lkyinfo))
5214				disp = CE_DISP_LEAKY;
5215			else
5216				disp = CE_DISP_PERS;
5217		}
5218		break;
5219
5220	case CE_DISP_POSS_STICKY:
5221		/*
5222		 * Promote "possible sticky" results that have been
5223		 * confirmed by a partner test to "sticky".  Unconfirmed
5224		 * "possible sticky" events are left at that status - we do not
5225		 * guess at any bad reader/writer etc status here.
5226		 */
5227		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
5228		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
5229		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
5230			disp = CE_DISP_STICKY;
5231
5232		/*
5233		 * Promote "possible sticky" results on a uniprocessor
5234		 * to "sticky"
5235		 */
5236		if (disp == CE_DISP_POSS_STICKY &&
5237		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
5238			disp = CE_DISP_STICKY;
5239		break;
5240
5241	default:
5242		disp = CE_DISP_UNKNOWN;
5243		break;
5244	}
5245
5246	return (cetypes[disp]);
5247}
5248
5249/*
5250 * Given the entire afsr, the specific bit to check and a prioritized list of
5251 * error bits, determine the validity of the various overwrite priority
5252 * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
5253 * different overwrite priorities.
5254 *
5255 * Given a specific afsr error bit and the entire afsr, there are three cases:
5256 *   INVALID:	The specified bit is lower overwrite priority than some other
5257 *		error bit which is on in the afsr (or IVU/IVC).
5258 *   VALID:	The specified bit is higher priority than all other error bits
5259 *		which are on in the afsr.
5260 *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
5261 *		bit is on in the afsr.
5262 */
5263int
5264afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
5265{
5266	uint64_t afsr_ow;
5267
5268	while ((afsr_ow = *ow_bits++) != 0) {
5269		/*
5270		 * If bit is in the priority class, check to see if another
5271		 * bit in the same class is on => ambiguous.  Otherwise,
5272		 * the value is valid.  If the bit is not on at this priority
5273		 * class, but a higher priority bit is on, then the value is
5274		 * invalid.
5275		 */
5276		if (afsr_ow & afsr_bit) {
5277			/*
5278			 * If equal pri bit is on, ambiguous.
5279			 */
5280			if (afsr & (afsr_ow & ~afsr_bit))
5281				return (AFLT_STAT_AMBIGUOUS);
5282			return (AFLT_STAT_VALID);
5283		} else if (afsr & afsr_ow)
5284			break;
5285	}
5286
5287	/*
5288	 * We didn't find a match or a higher priority bit was on.  Not
5289	 * finding a match handles the case of invalid AFAR for IVC, IVU.
5290	 */
5291	return (AFLT_STAT_INVALID);
5292}
5293
5294static int
5295afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
5296{
5297#if defined(SERRANO)
5298	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
5299		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
5300	else
5301#endif	/* SERRANO */
5302		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
5303}
5304
5305static int
5306afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
5307{
5308	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
5309}
5310
5311static int
5312afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
5313{
5314	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
5315}
5316
5317static int
5318afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
5319{
5320#ifdef lint
5321	cpuid = cpuid;
5322#endif
5323#if defined(CHEETAH_PLUS)
5324	/*
5325	 * The M_SYND overwrite policy is combined with the E_SYND overwrite
5326	 * policy for Cheetah+ and separate for Panther CPUs.
5327	 */
5328	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5329		if (IS_PANTHER(cpunodes[cpuid].implementation))
5330			return (afsr_to_msynd_status(afsr, afsr_bit));
5331		else
5332			return (afsr_to_esynd_status(afsr, afsr_bit));
5333	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5334		if (IS_PANTHER(cpunodes[cpuid].implementation))
5335			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
5336		else
5337			return (afsr_to_esynd_status(afsr, afsr_bit));
5338#else /* CHEETAH_PLUS */
5339	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5340		return (afsr_to_msynd_status(afsr, afsr_bit));
5341	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5342		return (afsr_to_esynd_status(afsr, afsr_bit));
5343#endif /* CHEETAH_PLUS */
5344	} else {
5345		return (AFLT_STAT_INVALID);
5346	}
5347}
5348
5349/*
5350 * Slave CPU stick synchronization.
5351 */
5352void
5353sticksync_slave(void)
5354{
5355	int		i;
5356	int		tries = 0;
5357	int64_t		tskew;
5358	int64_t		av_tskew;
5359
5360	kpreempt_disable();
5361	/* wait for the master side */
5362	while (stick_sync_cmd != SLAVE_START)
5363		;
5364	/*
5365	 * Synchronization should only take a few tries at most. But in the
5366	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
5367	 * without it's stick synchronized wouldn't be a good citizen.
5368	 */
5369	while (slave_done == 0) {
5370		/*
5371		 * Time skew calculation.
5372		 */
5373		av_tskew = tskew = 0;
5374
5375		for (i = 0; i < stick_iter; i++) {
5376			/* make location hot */
5377			timestamp[EV_A_START] = 0;
5378			stick_timestamp(&timestamp[EV_A_START]);
5379
5380			/* tell the master we're ready */
5381			stick_sync_cmd = MASTER_START;
5382
5383			/* and wait */
5384			while (stick_sync_cmd != SLAVE_CONT)
5385				;
5386			/* Event B end */
5387			stick_timestamp(&timestamp[EV_B_END]);
5388
5389			/* calculate time skew */
5390			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
5391			    - (timestamp[EV_A_END] - timestamp[EV_A_START]))
5392			    / 2;
5393
5394			/* keep running count */
5395			av_tskew += tskew;
5396		} /* for */
5397
5398		/*
5399		 * Adjust stick for time skew if not within the max allowed;
5400		 * otherwise we're all done.
5401		 */
5402		if (stick_iter != 0)
5403			av_tskew = av_tskew/stick_iter;
5404		if (ABS(av_tskew) > stick_tsk) {
5405			/*
5406			 * If the skew is 1 (the slave's STICK register
5407			 * is 1 STICK ahead of the master's), stick_adj
5408			 * could fail to adjust the slave's STICK register
5409			 * if the STICK read on the slave happens to
5410			 * align with the increment of the STICK.
5411			 * Therefore, we increment the skew to 2.
5412			 */
5413			if (av_tskew == 1)
5414				av_tskew++;
5415			stick_adj(-av_tskew);
5416		} else
5417			slave_done = 1;
5418#ifdef DEBUG
5419		if (tries < DSYNC_ATTEMPTS)
5420			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
5421			    av_tskew;
5422		++tries;
5423#endif /* DEBUG */
5424#ifdef lint
5425		tries = tries;
5426#endif
5427
5428	} /* while */
5429
5430	/* allow the master to finish */
5431	stick_sync_cmd = EVENT_NULL;
5432	kpreempt_enable();
5433}
5434
5435/*
5436 * Master CPU side of stick synchronization.
5437 *  - timestamp end of Event A
5438 *  - timestamp beginning of Event B
5439 */
5440void
5441sticksync_master(void)
5442{
5443	int		i;
5444
5445	kpreempt_disable();
5446	/* tell the slave we've started */
5447	slave_done = 0;
5448	stick_sync_cmd = SLAVE_START;
5449
5450	while (slave_done == 0) {
5451		for (i = 0; i < stick_iter; i++) {
5452			/* wait for the slave */
5453			while (stick_sync_cmd != MASTER_START)
5454				;
5455			/* Event A end */
5456			stick_timestamp(&timestamp[EV_A_END]);
5457
5458			/* make location hot */
5459			timestamp[EV_B_START] = 0;
5460			stick_timestamp(&timestamp[EV_B_START]);
5461
5462			/* tell the slave to continue */
5463			stick_sync_cmd = SLAVE_CONT;
5464		} /* for */
5465
5466		/* wait while slave calculates time skew */
5467		while (stick_sync_cmd == SLAVE_CONT)
5468			;
5469	} /* while */
5470	kpreempt_enable();
5471}
5472
5473/*
5474 * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5475 * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5476 * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5477 * panic idle.
5478 */
5479/*ARGSUSED*/
5480void
5481cpu_check_allcpus(struct async_flt *aflt)
5482{}
5483
5484struct kmem_cache *ch_private_cache;
5485
5486/*
5487 * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5488 * deallocate the scrubber data structures and cpu_private data structure.
5489 */
5490void
5491cpu_uninit_private(struct cpu *cp)
5492{
5493	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5494
5495	ASSERT(chprp);
5496	cpu_uninit_ecache_scrub_dr(cp);
5497	CPU_PRIVATE(cp) = NULL;
5498	ch_err_tl1_paddrs[cp->cpu_id] = 0;
5499	kmem_cache_free(ch_private_cache, chprp);
5500	cmp_delete_cpu(cp->cpu_id);
5501
5502}
5503
5504/*
5505 * Cheetah Cache Scrubbing
5506 *
5507 * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5508 * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5509 * protected by either parity or ECC.
5510 *
5511 * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5512 * cache per second). Due to the the specifics of how the I$ control
5513 * logic works with respect to the ASI used to scrub I$ lines, the entire
5514 * I$ is scanned at once.
5515 */
5516
5517/*
5518 * Tuneables to enable and disable the scrubbing of the caches, and to tune
5519 * scrubbing behavior.  These may be changed via /etc/system or using mdb
5520 * on a running system.
5521 */
5522int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5523
5524/*
5525 * The following are the PIL levels that the softints/cross traps will fire at.
5526 */
5527uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5528uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5529uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5530
5531#if defined(JALAPENO)
5532
5533/*
5534 * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5535 * on Jalapeno.
5536 */
5537int ecache_scrub_enable = 0;
5538
5539#else	/* JALAPENO */
5540
5541/*
5542 * With all other cpu types, E$ scrubbing is on by default
5543 */
5544int ecache_scrub_enable = 1;
5545
5546#endif	/* JALAPENO */
5547
5548
5549#if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5550
5551/*
5552 * The I$ scrubber tends to cause latency problems for real-time SW, so it
5553 * is disabled by default on non-Cheetah systems
5554 */
5555int icache_scrub_enable = 0;
5556
5557/*
5558 * Tuneables specifying the scrub calls per second and the scan rate
5559 * for each cache
5560 *
5561 * The cyclic times are set during boot based on the following values.
5562 * Changing these values in mdb after this time will have no effect.  If
5563 * a different value is desired, it must be set in /etc/system before a
5564 * reboot.
5565 */
5566int ecache_calls_a_sec = 1;
5567int dcache_calls_a_sec = 2;
5568int icache_calls_a_sec = 2;
5569
5570int ecache_scan_rate_idle = 1;
5571int ecache_scan_rate_busy = 1;
5572int dcache_scan_rate_idle = 1;
5573int dcache_scan_rate_busy = 1;
5574int icache_scan_rate_idle = 1;
5575int icache_scan_rate_busy = 1;
5576
5577#else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5578
5579int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5580
5581int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5582int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5583int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5584
5585int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5586int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5587int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5588int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5589int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5590int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5591
5592#endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5593
5594/*
5595 * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5596 * increment the outstanding request counter and schedule a softint to run
5597 * the scrubber.
5598 */
5599extern xcfunc_t cache_scrubreq_tl1;
5600
5601/*
5602 * These are the softint functions for each cache scrubber
5603 */
5604static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5605static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5606static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5607
5608/*
5609 * The cache scrub info table contains cache specific information
5610 * and allows for some of the scrub code to be table driven, reducing
5611 * duplication of cache similar code.
5612 *
5613 * This table keeps a copy of the value in the calls per second variable
5614 * (?cache_calls_a_sec).  This makes it much more difficult for someone
5615 * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5616 * mdb in a misguided attempt to disable the scrubber).
5617 */
5618struct scrub_info {
5619	int		*csi_enable;	/* scrubber enable flag */
5620	int		csi_freq;	/* scrubber calls per second */
5621	int		csi_index;	/* index to chsm_outstanding[] */
5622	uint64_t	csi_inum;	/* scrubber interrupt number */
5623	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5624	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5625	char		csi_name[3];	/* cache name for this scrub entry */
5626} cache_scrub_info[] = {
5627{ &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5628{ &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5629{ &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5630};
5631
5632/*
5633 * If scrubbing is enabled, increment the outstanding request counter.  If it
5634 * is 1 (meaning there were no previous requests outstanding), call
5635 * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5636 * a self trap.
5637 */
5638static void
5639do_scrub(struct scrub_info *csi)
5640{
5641	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5642	int index = csi->csi_index;
5643	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5644
5645	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5646		if (atomic_inc_32_nv(outstanding) == 1) {
5647			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5648			    csi->csi_inum, 0);
5649		}
5650	}
5651}
5652
5653/*
5654 * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5655 * cross-trap the offline cpus.
5656 */
5657static void
5658do_scrub_offline(struct scrub_info *csi)
5659{
5660	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5661
5662	if (CPUSET_ISNULL(cpu_offline_set)) {
5663		/*
5664		 * No offline cpus - nothing to do
5665		 */
5666		return;
5667	}
5668
5669	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5670		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5671		    csi->csi_index);
5672	}
5673}
5674
5675/*
5676 * This is the initial setup for the scrubber cyclics - it sets the
5677 * interrupt level, frequency, and function to call.
5678 */
5679/*ARGSUSED*/
5680static void
5681cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5682    cyc_time_t *when)
5683{
5684	struct scrub_info *csi = (struct scrub_info *)arg;
5685
5686	ASSERT(csi != NULL);
5687	hdlr->cyh_func = (cyc_func_t)do_scrub;
5688	hdlr->cyh_level = CY_LOW_LEVEL;
5689	hdlr->cyh_arg = arg;
5690
5691	when->cyt_when = 0;	/* Start immediately */
5692	when->cyt_interval = NANOSEC / csi->csi_freq;
5693}
5694
5695/*
5696 * Initialization for cache scrubbing.
5697 * This routine is called AFTER all cpus have had cpu_init_private called
5698 * to initialize their private data areas.
5699 */
5700void
5701cpu_init_cache_scrub(void)
5702{
5703	int i;
5704	struct scrub_info *csi;
5705	cyc_omni_handler_t omni_hdlr;
5706	cyc_handler_t offline_hdlr;
5707	cyc_time_t when;
5708
5709	/*
5710	 * save away the maximum number of lines for the D$
5711	 */
5712	dcache_nlines = dcache_size / dcache_linesize;
5713
5714	/*
5715	 * register the softints for the cache scrubbing
5716	 */
5717	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5718	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5719	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E], SOFTINT_MT);
5720	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5721
5722	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5723	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5724	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D], SOFTINT_MT);
5725	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5726
5727	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5728	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5729	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I], SOFTINT_MT);
5730	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5731
5732	/*
5733	 * start the scrubbing for all the caches
5734	 */
5735	mutex_enter(&cpu_lock);
5736	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5737
5738		csi = &cache_scrub_info[i];
5739
5740		if (!(*csi->csi_enable))
5741			continue;
5742
5743		/*
5744		 * force the following to be true:
5745		 *	1 <= calls_a_sec <= hz
5746		 */
5747		if (csi->csi_freq > hz) {
5748			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5749			    "(%d); resetting to hz (%d)", csi->csi_name,
5750			    csi->csi_freq, hz);
5751			csi->csi_freq = hz;
5752		} else if (csi->csi_freq < 1) {
5753			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5754			    "(%d); resetting to 1", csi->csi_name,
5755			    csi->csi_freq);
5756			csi->csi_freq = 1;
5757		}
5758
5759		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5760		omni_hdlr.cyo_offline = NULL;
5761		omni_hdlr.cyo_arg = (void *)csi;
5762
5763		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5764		offline_hdlr.cyh_arg = (void *)csi;
5765		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5766
5767		when.cyt_when = 0;	/* Start immediately */
5768		when.cyt_interval = NANOSEC / csi->csi_freq;
5769
5770		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5771		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5772	}
5773	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5774	mutex_exit(&cpu_lock);
5775}
5776
5777/*
5778 * Indicate that the specified cpu is idle.
5779 */
5780void
5781cpu_idle_ecache_scrub(struct cpu *cp)
5782{
5783	if (CPU_PRIVATE(cp) != NULL) {
5784		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5785		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5786	}
5787}
5788
5789/*
5790 * Indicate that the specified cpu is busy.
5791 */
5792void
5793cpu_busy_ecache_scrub(struct cpu *cp)
5794{
5795	if (CPU_PRIVATE(cp) != NULL) {
5796		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5797		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5798	}
5799}
5800
5801/*
5802 * Initialization for cache scrubbing for the specified cpu.
5803 */
5804void
5805cpu_init_ecache_scrub_dr(struct cpu *cp)
5806{
5807	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5808	int cpuid = cp->cpu_id;
5809
5810	/* initialize the number of lines in the caches */
5811	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5812	    cpunodes[cpuid].ecache_linesize;
5813	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5814	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5815
5816	/*
5817	 * do_scrub() and do_scrub_offline() check both the global
5818	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5819	 * check this value before scrubbing.  Currently, we use it to
5820	 * disable the E$ scrubber on multi-core cpus or while running at
5821	 * slowed speed.  For now, just turn everything on and allow
5822	 * cpu_init_private() to change it if necessary.
5823	 */
5824	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5825	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5826	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5827
5828	cpu_busy_ecache_scrub(cp);
5829}
5830
5831/*
5832 * Un-initialization for cache scrubbing for the specified cpu.
5833 */
5834static void
5835cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5836{
5837	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5838
5839	/*
5840	 * un-initialize bookkeeping for cache scrubbing
5841	 */
5842	bzero(csmp, sizeof (ch_scrub_misc_t));
5843
5844	cpu_idle_ecache_scrub(cp);
5845}
5846
5847/*
5848 * Called periodically on each CPU to scrub the D$.
5849 */
5850static void
5851scrub_dcache(int how_many)
5852{
5853	int i;
5854	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5855	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5856
5857	/*
5858	 * scrub the desired number of lines
5859	 */
5860	for (i = 0; i < how_many; i++) {
5861		/*
5862		 * scrub a D$ line
5863		 */
5864		dcache_inval_line(index);
5865
5866		/*
5867		 * calculate the next D$ line to scrub, assumes
5868		 * that dcache_nlines is a power of 2
5869		 */
5870		index = (index + 1) & (dcache_nlines - 1);
5871	}
5872
5873	/*
5874	 * set the scrub index for the next visit
5875	 */
5876	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5877}
5878
5879/*
5880 * Handler for D$ scrub inum softint. Call scrub_dcache until
5881 * we decrement the outstanding request count to zero.
5882 */
5883/*ARGSUSED*/
5884static uint_t
5885scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5886{
5887	int i;
5888	int how_many;
5889	int outstanding;
5890	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5891	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5892	struct scrub_info *csi = (struct scrub_info *)arg1;
5893	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5894	    dcache_scan_rate_idle : dcache_scan_rate_busy;
5895
5896	/*
5897	 * The scan rates are expressed in units of tenths of a
5898	 * percent.  A scan rate of 1000 (100%) means the whole
5899	 * cache is scanned every second.
5900	 */
5901	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5902
5903	do {
5904		outstanding = *countp;
5905		for (i = 0; i < outstanding; i++) {
5906			scrub_dcache(how_many);
5907		}
5908	} while (atomic_add_32_nv(countp, -outstanding));
5909
5910	return (DDI_INTR_CLAIMED);
5911}
5912
5913/*
5914 * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5915 * by invalidating lines. Due to the characteristics of the ASI which
5916 * is used to invalidate an I$ line, the entire I$ must be invalidated
5917 * vs. an individual I$ line.
5918 */
5919static void
5920scrub_icache(int how_many)
5921{
5922	int i;
5923	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5924	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5925	int icache_nlines = csmp->chsm_icache_nlines;
5926
5927	/*
5928	 * scrub the desired number of lines
5929	 */
5930	for (i = 0; i < how_many; i++) {
5931		/*
5932		 * since the entire I$ must be scrubbed at once,
5933		 * wait until the index wraps to zero to invalidate
5934		 * the entire I$
5935		 */
5936		if (index == 0) {
5937			icache_inval_all();
5938		}
5939
5940		/*
5941		 * calculate the next I$ line to scrub, assumes
5942		 * that chsm_icache_nlines is a power of 2
5943		 */
5944		index = (index + 1) & (icache_nlines - 1);
5945	}
5946
5947	/*
5948	 * set the scrub index for the next visit
5949	 */
5950	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5951}
5952
5953/*
5954 * Handler for I$ scrub inum softint. Call scrub_icache until
5955 * we decrement the outstanding request count to zero.
5956 */
5957/*ARGSUSED*/
5958static uint_t
5959scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5960{
5961	int i;
5962	int how_many;
5963	int outstanding;
5964	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5965	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5966	struct scrub_info *csi = (struct scrub_info *)arg1;
5967	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5968	    icache_scan_rate_idle : icache_scan_rate_busy;
5969	int icache_nlines = csmp->chsm_icache_nlines;
5970
5971	/*
5972	 * The scan rates are expressed in units of tenths of a
5973	 * percent.  A scan rate of 1000 (100%) means the whole
5974	 * cache is scanned every second.
5975	 */
5976	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5977
5978	do {
5979		outstanding = *countp;
5980		for (i = 0; i < outstanding; i++) {
5981			scrub_icache(how_many);
5982		}
5983	} while (atomic_add_32_nv(countp, -outstanding));
5984
5985	return (DDI_INTR_CLAIMED);
5986}
5987
5988/*
5989 * Called periodically on each CPU to scrub the E$.
5990 */
5991static void
5992scrub_ecache(int how_many)
5993{
5994	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5995	int i;
5996	int cpuid = CPU->cpu_id;
5997	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5998	int nlines = csmp->chsm_ecache_nlines;
5999	int linesize = cpunodes[cpuid].ecache_linesize;
6000	int ec_set_size = cpu_ecache_set_size(CPU);
6001
6002	/*
6003	 * scrub the desired number of lines
6004	 */
6005	for (i = 0; i < how_many; i++) {
6006		/*
6007		 * scrub the E$ line
6008		 */
6009		ecache_flush_line(ecache_flushaddr + (index * linesize),
6010		    ec_set_size);
6011
6012		/*
6013		 * calculate the next E$ line to scrub based on twice
6014		 * the number of E$ lines (to displace lines containing
6015		 * flush area data), assumes that the number of lines
6016		 * is a power of 2
6017		 */
6018		index = (index + 1) & ((nlines << 1) - 1);
6019	}
6020
6021	/*
6022	 * set the ecache scrub index for the next visit
6023	 */
6024	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
6025}
6026
6027/*
6028 * Handler for E$ scrub inum softint. Call the E$ scrubber until
6029 * we decrement the outstanding request count to zero.
6030 *
6031 * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
6032 * become negative after the atomic_add_32_nv().  This is not a problem, as
6033 * the next trip around the loop won't scrub anything, and the next add will
6034 * reset the count back to zero.
6035 */
6036/*ARGSUSED*/
6037static uint_t
6038scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
6039{
6040	int i;
6041	int how_many;
6042	int outstanding;
6043	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
6044	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
6045	struct scrub_info *csi = (struct scrub_info *)arg1;
6046	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
6047	    ecache_scan_rate_idle : ecache_scan_rate_busy;
6048	int ecache_nlines = csmp->chsm_ecache_nlines;
6049
6050	/*
6051	 * The scan rates are expressed in units of tenths of a
6052	 * percent.  A scan rate of 1000 (100%) means the whole
6053	 * cache is scanned every second.
6054	 */
6055	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
6056
6057	do {
6058		outstanding = *countp;
6059		for (i = 0; i < outstanding; i++) {
6060			scrub_ecache(how_many);
6061		}
6062	} while (atomic_add_32_nv(countp, -outstanding));
6063
6064	return (DDI_INTR_CLAIMED);
6065}
6066
6067/*
6068 * Timeout function to reenable CE
6069 */
6070static void
6071cpu_delayed_check_ce_errors(void *arg)
6072{
6073	if (taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
6074	    TQ_NOSLEEP) == TASKQID_INVALID) {
6075		(void) timeout(cpu_delayed_check_ce_errors, arg,
6076		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6077	}
6078}
6079
6080/*
6081 * CE Deferred Re-enable after trap.
6082 *
6083 * When the CPU gets a disrupting trap for any of the errors
6084 * controlled by the CEEN bit, CEEN is disabled in the trap handler
6085 * immediately. To eliminate the possibility of multiple CEs causing
6086 * recursive stack overflow in the trap handler, we cannot
6087 * reenable CEEN while still running in the trap handler. Instead,
6088 * after a CE is logged on a CPU, we schedule a timeout function,
6089 * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
6090 * seconds. This function will check whether any further CEs
6091 * have occurred on that CPU, and if none have, will reenable CEEN.
6092 *
6093 * If further CEs have occurred while CEEN is disabled, another
6094 * timeout will be scheduled. This is to ensure that the CPU can
6095 * make progress in the face of CE 'storms', and that it does not
6096 * spend all its time logging CE errors.
6097 */
6098static void
6099cpu_check_ce_errors(void *arg)
6100{
6101	int	cpuid = (int)(uintptr_t)arg;
6102	cpu_t	*cp;
6103
6104	/*
6105	 * We acquire cpu_lock.
6106	 */
6107	ASSERT(curthread->t_pil == 0);
6108
6109	/*
6110	 * verify that the cpu is still around, DR
6111	 * could have got there first ...
6112	 */
6113	mutex_enter(&cpu_lock);
6114	cp = cpu_get(cpuid);
6115	if (cp == NULL) {
6116		mutex_exit(&cpu_lock);
6117		return;
6118	}
6119	/*
6120	 * make sure we don't migrate across CPUs
6121	 * while checking our CE status.
6122	 */
6123	kpreempt_disable();
6124
6125	/*
6126	 * If we are running on the CPU that got the
6127	 * CE, we can do the checks directly.
6128	 */
6129	if (cp->cpu_id == CPU->cpu_id) {
6130		mutex_exit(&cpu_lock);
6131		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
6132		kpreempt_enable();
6133		return;
6134	}
6135	kpreempt_enable();
6136
6137	/*
6138	 * send an x-call to get the CPU that originally
6139	 * got the CE to do the necessary checks. If we can't
6140	 * send the x-call, reschedule the timeout, otherwise we
6141	 * lose CEEN forever on that CPU.
6142	 */
6143	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
6144		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
6145		    TIMEOUT_CEEN_CHECK, 0);
6146		mutex_exit(&cpu_lock);
6147	} else {
6148		/*
6149		 * When the CPU is not accepting xcalls, or
6150		 * the processor is offlined, we don't want to
6151		 * incur the extra overhead of trying to schedule the
6152		 * CE timeout indefinitely. However, we don't want to lose
6153		 * CE checking forever.
6154		 *
6155		 * Keep rescheduling the timeout, accepting the additional
6156		 * overhead as the cost of correctness in the case where we get
6157		 * a CE, disable CEEN, offline the CPU during the
6158		 * the timeout interval, and then online it at some
6159		 * point in the future. This is unlikely given the short
6160		 * cpu_ceen_delay_secs.
6161		 */
6162		mutex_exit(&cpu_lock);
6163		(void) timeout(cpu_delayed_check_ce_errors,
6164		    (void *)(uintptr_t)cp->cpu_id,
6165		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6166	}
6167}
6168
6169/*
6170 * This routine will check whether CEs have occurred while
6171 * CEEN is disabled. Any CEs detected will be logged and, if
6172 * possible, scrubbed.
6173 *
6174 * The memscrubber will also use this routine to clear any errors
6175 * caused by its scrubbing with CEEN disabled.
6176 *
6177 * flag == SCRUBBER_CEEN_CHECK
6178 *		called from memscrubber, just check/scrub, no reset
6179 *		paddr	physical addr. for start of scrub pages
6180 *		vaddr	virtual addr. for scrub area
6181 *		psz	page size of area to be scrubbed
6182 *
6183 * flag == TIMEOUT_CEEN_CHECK
6184 *		timeout function has triggered, reset timeout or CEEN
6185 *
6186 * Note: We must not migrate cpus during this function.  This can be
6187 * achieved by one of:
6188 *    - invoking as target of an x-call in which case we're at XCALL_PIL
6189 *	The flag value must be first xcall argument.
6190 *    - disabling kernel preemption.  This should be done for very short
6191 *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
6192 *	scrub an extended area with cpu_check_block.  The call for
6193 *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
6194 *	brief for this case.
6195 *    - binding to a cpu, eg with thread_affinity_set().  This is used
6196 *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
6197 *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
6198 */
6199void
6200cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
6201{
6202	ch_cpu_errors_t	cpu_error_regs;
6203	uint64_t	ec_err_enable;
6204	uint64_t	page_offset;
6205
6206	/* Read AFSR */
6207	get_cpu_error_state(&cpu_error_regs);
6208
6209	/*
6210	 * If no CEEN errors have occurred during the timeout
6211	 * interval, it is safe to re-enable CEEN and exit.
6212	 */
6213	if (((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) |
6214	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_CECC_ERRS)) == 0) {
6215		if (flag == TIMEOUT_CEEN_CHECK &&
6216		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
6217			set_error_enable(ec_err_enable | EN_REG_CEEN);
6218		return;
6219	}
6220
6221	/*
6222	 * Ensure that CEEN was not reenabled (maybe by DR) before
6223	 * we log/clear the error.
6224	 */
6225	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
6226		set_error_enable(ec_err_enable & ~EN_REG_CEEN);
6227
6228	/*
6229	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
6230	 * timeout will be rescheduled when the error is logged.
6231	 */
6232	if (!((cpu_error_regs.afsr & cpu_ce_not_deferred) |
6233	    (cpu_error_regs.afsr_ext & cpu_ce_not_deferred_ext)))
6234		cpu_ce_detected(&cpu_error_regs,
6235		    CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
6236	else
6237		cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
6238
6239	/*
6240	 * If the memory scrubber runs while CEEN is
6241	 * disabled, (or if CEEN is disabled during the
6242	 * scrub as a result of a CE being triggered by
6243	 * it), the range being scrubbed will not be
6244	 * completely cleaned. If there are multiple CEs
6245	 * in the range at most two of these will be dealt
6246	 * with, (one by the trap handler and one by the
6247	 * timeout). It is also possible that none are dealt
6248	 * with, (CEEN disabled and another CE occurs before
6249	 * the timeout triggers). So to ensure that the
6250	 * memory is actually scrubbed, we have to access each
6251	 * memory location in the range and then check whether
6252	 * that access causes a CE.
6253	 */
6254	if (flag == SCRUBBER_CEEN_CHECK && va) {
6255		if ((cpu_error_regs.afar >= pa) &&
6256		    (cpu_error_regs.afar < (pa + psz))) {
6257			/*
6258			 * Force a load from physical memory for each
6259			 * 64-byte block, then check AFSR to determine
6260			 * whether this access caused an error.
6261			 *
6262			 * This is a slow way to do a scrub, but as it will
6263			 * only be invoked when the memory scrubber actually
6264			 * triggered a CE, it should not happen too
6265			 * frequently.
6266			 *
6267			 * cut down what we need to check as the scrubber
6268			 * has verified up to AFAR, so get it's offset
6269			 * into the page and start there.
6270			 */
6271			page_offset = (uint64_t)(cpu_error_regs.afar &
6272			    (psz - 1));
6273			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
6274			psz -= (uint_t)(P2ALIGN(page_offset, 64));
6275			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
6276			    psz);
6277		}
6278	}
6279
6280	/*
6281	 * Reset error enable if this CE is not masked.
6282	 */
6283	if ((flag == TIMEOUT_CEEN_CHECK) &&
6284	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
6285		set_error_enable(ec_err_enable | EN_REG_CEEN);
6286
6287}
6288
6289/*
6290 * Attempt a cpu logout for an error that we did not trap for, such
6291 * as a CE noticed with CEEN off.  It is assumed that we are still running
6292 * on the cpu that took the error and that we cannot migrate.  Returns
6293 * 0 on success, otherwise nonzero.
6294 */
6295static int
6296cpu_ce_delayed_ec_logout(uint64_t afar)
6297{
6298	ch_cpu_logout_t *clop;
6299
6300	if (CPU_PRIVATE(CPU) == NULL)
6301		return (0);
6302
6303	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6304	if (atomic_cas_64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
6305	    LOGOUT_INVALID)
6306		return (0);
6307
6308	cpu_delayed_logout(afar, clop);
6309	return (1);
6310}
6311
6312/*
6313 * We got an error while CEEN was disabled. We
6314 * need to clean up after it and log whatever
6315 * information we have on the CE.
6316 */
6317void
6318cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
6319{
6320	ch_async_flt_t ch_flt;
6321	struct async_flt *aflt;
6322	char pr_reason[MAX_REASON_STRING];
6323
6324	bzero(&ch_flt, sizeof (ch_async_flt_t));
6325	ch_flt.flt_trapped_ce = flag;
6326	aflt = (struct async_flt *)&ch_flt;
6327	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
6328	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
6329	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
6330	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
6331	aflt->flt_addr = cpu_error_regs->afar;
6332#if defined(SERRANO)
6333	ch_flt.afar2 = cpu_error_regs->afar2;
6334#endif	/* SERRANO */
6335	aflt->flt_pc = NULL;
6336	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
6337	aflt->flt_tl = 0;
6338	aflt->flt_panic = 0;
6339	cpu_log_and_clear_ce(&ch_flt);
6340
6341	/*
6342	 * check if we caused any errors during cleanup
6343	 */
6344	if (clear_errors(&ch_flt)) {
6345		pr_reason[0] = '\0';
6346		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
6347		    NULL);
6348	}
6349}
6350
6351/*
6352 * Log/clear CEEN-controlled disrupting errors
6353 */
6354static void
6355cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
6356{
6357	struct async_flt *aflt;
6358	uint64_t afsr, afsr_errs;
6359	ch_cpu_logout_t *clop;
6360	char pr_reason[MAX_REASON_STRING];
6361	on_trap_data_t *otp = curthread->t_ontrap;
6362
6363	aflt = (struct async_flt *)ch_flt;
6364	afsr = aflt->flt_stat;
6365	afsr_errs = ch_flt->afsr_errs;
6366	aflt->flt_id = gethrtime_waitfree();
6367	aflt->flt_bus_id = getprocessorid();
6368	aflt->flt_inst = CPU->cpu_id;
6369	aflt->flt_prot = AFLT_PROT_NONE;
6370	aflt->flt_class = CPU_FAULT;
6371	aflt->flt_status = ECC_C_TRAP;
6372
6373	pr_reason[0] = '\0';
6374	/*
6375	 * Get the CPU log out info for Disrupting Trap.
6376	 */
6377	if (CPU_PRIVATE(CPU) == NULL) {
6378		clop = NULL;
6379		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
6380	} else {
6381		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6382	}
6383
6384	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
6385		ch_cpu_errors_t cpu_error_regs;
6386
6387		get_cpu_error_state(&cpu_error_regs);
6388		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
6389		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
6390		clop->clo_data.chd_afar = cpu_error_regs.afar;
6391		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
6392		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
6393		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
6394		clop->clo_sdw_data.chd_afsr_ext =
6395		    cpu_error_regs.shadow_afsr_ext;
6396#if defined(SERRANO)
6397		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
6398#endif	/* SERRANO */
6399		ch_flt->flt_data_incomplete = 1;
6400
6401		/*
6402		 * The logging/clear code expects AFSR/AFAR to be cleared.
6403		 * The trap handler does it for CEEN enabled errors
6404		 * so we need to do it here.
6405		 */
6406		set_cpu_error_state(&cpu_error_regs);
6407	}
6408
6409#if defined(JALAPENO) || defined(SERRANO)
6410	/*
6411	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
6412	 * For Serrano, even thou we do have the AFAR, we still do the
6413	 * scrub on the RCE side since that's where the error type can
6414	 * be properly classified as intermittent, persistent, etc.
6415	 *
6416	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
6417	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6418	 * the flt_status bits.
6419	 */
6420	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
6421	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6422	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
6423		cpu_ce_scrub_mem_err(aflt, B_TRUE);
6424	}
6425#else /* JALAPENO || SERRANO */
6426	/*
6427	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
6428	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6429	 * the flt_status bits.
6430	 */
6431	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6432		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6433		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6434			cpu_ce_scrub_mem_err(aflt, B_TRUE);
6435		}
6436	}
6437
6438#endif /* JALAPENO || SERRANO */
6439
6440	/*
6441	 * Update flt_prot if this error occurred under on_trap protection.
6442	 */
6443	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6444		aflt->flt_prot = AFLT_PROT_EC;
6445
6446	/*
6447	 * Queue events on the async event queue, one event per error bit.
6448	 */
6449	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6450	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6451		ch_flt->flt_type = CPU_INV_AFSR;
6452		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6453		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6454		    aflt->flt_panic);
6455	}
6456
6457	/*
6458	 * Zero out + invalidate CPU logout.
6459	 */
6460	if (clop) {
6461		bzero(clop, sizeof (ch_cpu_logout_t));
6462		clop->clo_data.chd_afar = LOGOUT_INVALID;
6463	}
6464
6465	/*
6466	 * If either a CPC, WDC or EDC error has occurred while CEEN
6467	 * was disabled, we need to flush either the entire
6468	 * E$ or an E$ line.
6469	 */
6470#if defined(JALAPENO) || defined(SERRANO)
6471	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6472#else	/* JALAPENO || SERRANO */
6473	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6474	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6475#endif	/* JALAPENO || SERRANO */
6476		cpu_error_ecache_flush(ch_flt);
6477
6478}
6479
6480/*
6481 * depending on the error type, we determine whether we
6482 * need to flush the entire ecache or just a line.
6483 */
6484static int
6485cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6486{
6487	struct async_flt *aflt;
6488	uint64_t	afsr;
6489	uint64_t	afsr_errs = ch_flt->afsr_errs;
6490
6491	aflt = (struct async_flt *)ch_flt;
6492	afsr = aflt->flt_stat;
6493
6494	/*
6495	 * If we got multiple errors, no point in trying
6496	 * the individual cases, just flush the whole cache
6497	 */
6498	if (afsr & C_AFSR_ME) {
6499		return (ECACHE_FLUSH_ALL);
6500	}
6501
6502	/*
6503	 * If either a CPC, WDC or EDC error has occurred while CEEN
6504	 * was disabled, we need to flush entire E$. We can't just
6505	 * flush the cache line affected as the ME bit
6506	 * is not set when multiple correctable errors of the same
6507	 * type occur, so we might have multiple CPC or EDC errors,
6508	 * with only the first recorded.
6509	 */
6510#if defined(JALAPENO) || defined(SERRANO)
6511	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6512#else	/* JALAPENO || SERRANO */
6513	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6514	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6515#endif	/* JALAPENO || SERRANO */
6516		return (ECACHE_FLUSH_ALL);
6517	}
6518
6519#if defined(JALAPENO) || defined(SERRANO)
6520	/*
6521	 * If only UE or RUE is set, flush the Ecache line, otherwise
6522	 * flush the entire Ecache.
6523	 */
6524	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6525		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6526		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6527			return (ECACHE_FLUSH_LINE);
6528		} else {
6529			return (ECACHE_FLUSH_ALL);
6530		}
6531	}
6532#else /* JALAPENO || SERRANO */
6533	/*
6534	 * If UE only is set, flush the Ecache line, otherwise
6535	 * flush the entire Ecache.
6536	 */
6537	if (afsr_errs & C_AFSR_UE) {
6538		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6539		    C_AFSR_UE) {
6540			return (ECACHE_FLUSH_LINE);
6541		} else {
6542			return (ECACHE_FLUSH_ALL);
6543		}
6544	}
6545#endif /* JALAPENO || SERRANO */
6546
6547	/*
6548	 * EDU: If EDU only is set, flush the ecache line, otherwise
6549	 * flush the entire Ecache.
6550	 */
6551	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6552		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6553		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6554			return (ECACHE_FLUSH_LINE);
6555		} else {
6556			return (ECACHE_FLUSH_ALL);
6557		}
6558	}
6559
6560	/*
6561	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6562	 * flush the entire Ecache.
6563	 */
6564	if (afsr_errs & C_AFSR_BERR) {
6565		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6566			return (ECACHE_FLUSH_LINE);
6567		} else {
6568			return (ECACHE_FLUSH_ALL);
6569		}
6570	}
6571
6572	return (0);
6573}
6574
6575void
6576cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6577{
6578	int	ecache_flush_flag =
6579	    cpu_error_ecache_flush_required(ch_flt);
6580
6581	/*
6582	 * Flush Ecache line or entire Ecache based on above checks.
6583	 */
6584	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6585		cpu_flush_ecache();
6586	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6587		cpu_flush_ecache_line(ch_flt);
6588	}
6589
6590}
6591
6592/*
6593 * Extract the PA portion from the E$ tag.
6594 */
6595uint64_t
6596cpu_ectag_to_pa(int setsize, uint64_t tag)
6597{
6598	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6599		return (JG_ECTAG_TO_PA(setsize, tag));
6600	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6601		return (PN_L3TAG_TO_PA(tag));
6602	else
6603		return (CH_ECTAG_TO_PA(setsize, tag));
6604}
6605
6606/*
6607 * Convert the E$ tag PA into an E$ subblock index.
6608 */
6609int
6610cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6611{
6612	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6613		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6614	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6615		/* Panther has only one subblock per line */
6616		return (0);
6617	else
6618		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6619}
6620
6621/*
6622 * All subblocks in an E$ line must be invalid for
6623 * the line to be invalid.
6624 */
6625int
6626cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6627{
6628	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6629		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6630	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6631		return (PN_L3_LINE_INVALID(tag));
6632	else
6633		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6634}
6635
6636/*
6637 * Extract state bits for a subblock given the tag.  Note that for Panther
6638 * this works on both l2 and l3 tags.
6639 */
6640int
6641cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6642{
6643	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6644		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6645	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6646		return (tag & CH_ECSTATE_MASK);
6647	else
6648		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6649}
6650
6651/*
6652 * Cpu specific initialization.
6653 */
6654void
66