1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*7c478bd9Sstevel@tonic-gate 29*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 30*7c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> 31*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 32*7c478bd9Sstevel@tonic-gate #include <sys/async.h> 33*7c478bd9Sstevel@tonic-gate #include <sys/ontrap.h> 34*7c478bd9Sstevel@tonic-gate #include <sys/ddifm.h> 35*7c478bd9Sstevel@tonic-gate #include <sys/hypervisor_api.h> 36*7c478bd9Sstevel@tonic-gate #include <sys/errorq.h> 37*7c478bd9Sstevel@tonic-gate #include <sys/promif.h> 38*7c478bd9Sstevel@tonic-gate #include <sys/prom_plat.h> 39*7c478bd9Sstevel@tonic-gate #include <sys/x_call.h> 40*7c478bd9Sstevel@tonic-gate #include <sys/error.h> 41*7c478bd9Sstevel@tonic-gate #include <sys/fm/util.h> 42*7c478bd9Sstevel@tonic-gate 43*7c478bd9Sstevel@tonic-gate #define MAX_CE_FLTS 10 44*7c478bd9Sstevel@tonic-gate #define MAX_ASYNC_FLTS 6 45*7c478bd9Sstevel@tonic-gate 46*7c478bd9Sstevel@tonic-gate errorq_t *ue_queue; /* queue of uncorrectable errors */ 47*7c478bd9Sstevel@tonic-gate errorq_t *ce_queue; /* queue of correctable errors */ 48*7c478bd9Sstevel@tonic-gate 49*7c478bd9Sstevel@tonic-gate /* 50*7c478bd9Sstevel@tonic-gate * Being used by memory test driver. 51*7c478bd9Sstevel@tonic-gate * ce_verbose_memory - covers CEs in DIMMs 52*7c478bd9Sstevel@tonic-gate * ce_verbose_other - covers "others" (ecache, IO, etc.) 53*7c478bd9Sstevel@tonic-gate * 54*7c478bd9Sstevel@tonic-gate * If the value is 0, nothing is logged. 55*7c478bd9Sstevel@tonic-gate * If the value is 1, the error is logged to the log file, but not console. 56*7c478bd9Sstevel@tonic-gate * If the value is 2, the error is logged to the log file and console. 57*7c478bd9Sstevel@tonic-gate */ 58*7c478bd9Sstevel@tonic-gate int ce_verbose_memory = 1; 59*7c478bd9Sstevel@tonic-gate int ce_verbose_other = 1; 60*7c478bd9Sstevel@tonic-gate 61*7c478bd9Sstevel@tonic-gate int ce_show_data = 0; 62*7c478bd9Sstevel@tonic-gate int ce_debug = 0; 63*7c478bd9Sstevel@tonic-gate int ue_debug = 0; 64*7c478bd9Sstevel@tonic-gate int reset_debug = 0; 65*7c478bd9Sstevel@tonic-gate 66*7c478bd9Sstevel@tonic-gate /* 67*7c478bd9Sstevel@tonic-gate * Tunables for controlling the handling of asynchronous faults (AFTs). Setting 68*7c478bd9Sstevel@tonic-gate * these to non-default values on a non-DEBUG kernel is NOT supported. 69*7c478bd9Sstevel@tonic-gate */ 70*7c478bd9Sstevel@tonic-gate int aft_verbose = 0; /* log AFT messages > 1 to log only */ 71*7c478bd9Sstevel@tonic-gate int aft_panic = 0; /* panic (not reboot) on fatal usermode AFLT */ 72*7c478bd9Sstevel@tonic-gate int aft_testfatal = 0; /* force all AFTs to panic immediately */ 73*7c478bd9Sstevel@tonic-gate 74*7c478bd9Sstevel@tonic-gate /* 75*7c478bd9Sstevel@tonic-gate * Defined in bus_func.c but initialised in error_init 76*7c478bd9Sstevel@tonic-gate */ 77*7c478bd9Sstevel@tonic-gate extern kmutex_t bfd_lock; 78*7c478bd9Sstevel@tonic-gate 79*7c478bd9Sstevel@tonic-gate static uint32_t rq_overflow_count = 0; /* counter for rq overflow */ 80*7c478bd9Sstevel@tonic-gate 81*7c478bd9Sstevel@tonic-gate static void cpu_queue_one_event(errh_async_flt_t *); 82*7c478bd9Sstevel@tonic-gate static uint32_t count_entries_on_queue(uint64_t, uint64_t, uint32_t); 83*7c478bd9Sstevel@tonic-gate static void errh_page_settoxic(errh_async_flt_t *, uchar_t); 84*7c478bd9Sstevel@tonic-gate static void errh_page_retire(errh_async_flt_t *); 85*7c478bd9Sstevel@tonic-gate static int errh_error_protected(struct regs *, struct async_flt *, int *); 86*7c478bd9Sstevel@tonic-gate static void errh_rq_full(struct async_flt *); 87*7c478bd9Sstevel@tonic-gate static void ue_drain(void *, struct async_flt *, errorq_elem_t *); 88*7c478bd9Sstevel@tonic-gate static void ce_drain(void *, struct async_flt *, errorq_elem_t *); 89*7c478bd9Sstevel@tonic-gate 90*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 91*7c478bd9Sstevel@tonic-gate void 92*7c478bd9Sstevel@tonic-gate process_resumable_error(struct regs *rp, uint32_t head_offset, 93*7c478bd9Sstevel@tonic-gate uint32_t tail_offset) 94*7c478bd9Sstevel@tonic-gate { 95*7c478bd9Sstevel@tonic-gate struct machcpu *mcpup; 96*7c478bd9Sstevel@tonic-gate struct async_flt *aflt; 97*7c478bd9Sstevel@tonic-gate errh_async_flt_t errh_flt; 98*7c478bd9Sstevel@tonic-gate errh_er_t *head_va; 99*7c478bd9Sstevel@tonic-gate 100*7c478bd9Sstevel@tonic-gate mcpup = &(CPU->cpu_m); 101*7c478bd9Sstevel@tonic-gate 102*7c478bd9Sstevel@tonic-gate while (head_offset != tail_offset) { 103*7c478bd9Sstevel@tonic-gate /* kernel buffer starts right after the resumable queue */ 104*7c478bd9Sstevel@tonic-gate head_va = (errh_er_t *)(mcpup->cpu_rq_va + head_offset + 105*7c478bd9Sstevel@tonic-gate CPU_RQ_SIZE); 106*7c478bd9Sstevel@tonic-gate /* Copy the error report to local buffer */ 107*7c478bd9Sstevel@tonic-gate bzero(&errh_flt, sizeof (errh_async_flt_t)); 108*7c478bd9Sstevel@tonic-gate bcopy((char *)head_va, &(errh_flt.errh_er), 109*7c478bd9Sstevel@tonic-gate sizeof (errh_er_t)); 110*7c478bd9Sstevel@tonic-gate 111*7c478bd9Sstevel@tonic-gate /* Increment the queue head */ 112*7c478bd9Sstevel@tonic-gate head_offset += Q_ENTRY_SIZE; 113*7c478bd9Sstevel@tonic-gate /* Wrap around */ 114*7c478bd9Sstevel@tonic-gate head_offset &= (CPU_RQ_SIZE - 1); 115*7c478bd9Sstevel@tonic-gate 116*7c478bd9Sstevel@tonic-gate /* set error handle to zero so it can hold new error report */ 117*7c478bd9Sstevel@tonic-gate head_va->ehdl = 0; 118*7c478bd9Sstevel@tonic-gate 119*7c478bd9Sstevel@tonic-gate switch (errh_flt.errh_er.desc) { 120*7c478bd9Sstevel@tonic-gate case ERRH_DESC_UCOR_RE: 121*7c478bd9Sstevel@tonic-gate break; 122*7c478bd9Sstevel@tonic-gate 123*7c478bd9Sstevel@tonic-gate default: 124*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "Error Descriptor 0x%llx " 125*7c478bd9Sstevel@tonic-gate " invalid in resumable error handler", 126*7c478bd9Sstevel@tonic-gate (long long) errh_flt.errh_er.desc); 127*7c478bd9Sstevel@tonic-gate continue; 128*7c478bd9Sstevel@tonic-gate } 129*7c478bd9Sstevel@tonic-gate 130*7c478bd9Sstevel@tonic-gate aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt); 131*7c478bd9Sstevel@tonic-gate aflt->flt_id = gethrtime(); 132*7c478bd9Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid(); 133*7c478bd9Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 134*7c478bd9Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE; 135*7c478bd9Sstevel@tonic-gate aflt->flt_priv = (((errh_flt.errh_er.attr & ERRH_MODE_MASK) 136*7c478bd9Sstevel@tonic-gate >> ERRH_MODE_SHIFT) == ERRH_MODE_PRIV); 137*7c478bd9Sstevel@tonic-gate 138*7c478bd9Sstevel@tonic-gate if (errh_flt.errh_er.attr & ERRH_ATTR_CPU) 139*7c478bd9Sstevel@tonic-gate /* If it is an error on other cpu */ 140*7c478bd9Sstevel@tonic-gate aflt->flt_panic = 1; 141*7c478bd9Sstevel@tonic-gate else 142*7c478bd9Sstevel@tonic-gate aflt->flt_panic = 0; 143*7c478bd9Sstevel@tonic-gate 144*7c478bd9Sstevel@tonic-gate /* 145*7c478bd9Sstevel@tonic-gate * Handle resumable queue full case. 146*7c478bd9Sstevel@tonic-gate */ 147*7c478bd9Sstevel@tonic-gate if (errh_flt.errh_er.attr & ERRH_ATTR_RQF) { 148*7c478bd9Sstevel@tonic-gate (void) errh_rq_full(aflt); 149*7c478bd9Sstevel@tonic-gate } 150*7c478bd9Sstevel@tonic-gate 151*7c478bd9Sstevel@tonic-gate /* 152*7c478bd9Sstevel@tonic-gate * Queue the error on ce or ue queue depend on flt_panic. 153*7c478bd9Sstevel@tonic-gate * Even if flt_panic is set, the code still keep processing 154*7c478bd9Sstevel@tonic-gate * the rest element on rq until the panic starts. 155*7c478bd9Sstevel@tonic-gate */ 156*7c478bd9Sstevel@tonic-gate (void) cpu_queue_one_event(&errh_flt); 157*7c478bd9Sstevel@tonic-gate 158*7c478bd9Sstevel@tonic-gate /* 159*7c478bd9Sstevel@tonic-gate * Panic here if aflt->flt_panic has been set. 160*7c478bd9Sstevel@tonic-gate * Enqueued errors will be logged as part of the panic flow. 161*7c478bd9Sstevel@tonic-gate */ 162*7c478bd9Sstevel@tonic-gate if (aflt->flt_panic) { 163*7c478bd9Sstevel@tonic-gate fm_panic("Unrecoverable error on another CPU"); 164*7c478bd9Sstevel@tonic-gate } 165*7c478bd9Sstevel@tonic-gate } 166*7c478bd9Sstevel@tonic-gate } 167*7c478bd9Sstevel@tonic-gate 168*7c478bd9Sstevel@tonic-gate void 169*7c478bd9Sstevel@tonic-gate process_nonresumable_error(struct regs *rp, uint64_t tl, 170*7c478bd9Sstevel@tonic-gate uint32_t head_offset, uint32_t tail_offset) 171*7c478bd9Sstevel@tonic-gate { 172*7c478bd9Sstevel@tonic-gate struct machcpu *mcpup; 173*7c478bd9Sstevel@tonic-gate struct async_flt *aflt; 174*7c478bd9Sstevel@tonic-gate errh_async_flt_t errh_flt; 175*7c478bd9Sstevel@tonic-gate errh_er_t *head_va; 176*7c478bd9Sstevel@tonic-gate int trampolined = 0; 177*7c478bd9Sstevel@tonic-gate int expected = DDI_FM_ERR_UNEXPECTED; 178*7c478bd9Sstevel@tonic-gate uint64_t exec_mode; 179*7c478bd9Sstevel@tonic-gate 180*7c478bd9Sstevel@tonic-gate mcpup = &(CPU->cpu_m); 181*7c478bd9Sstevel@tonic-gate 182*7c478bd9Sstevel@tonic-gate while (head_offset != tail_offset) { 183*7c478bd9Sstevel@tonic-gate /* kernel buffer starts right after the nonresumable queue */ 184*7c478bd9Sstevel@tonic-gate head_va = (errh_er_t *)(mcpup->cpu_nrq_va + head_offset + 185*7c478bd9Sstevel@tonic-gate CPU_NRQ_SIZE); 186*7c478bd9Sstevel@tonic-gate 187*7c478bd9Sstevel@tonic-gate /* Copy the error report to local buffer */ 188*7c478bd9Sstevel@tonic-gate bzero(&errh_flt, sizeof (errh_async_flt_t)); 189*7c478bd9Sstevel@tonic-gate 190*7c478bd9Sstevel@tonic-gate bcopy((char *)head_va, &(errh_flt.errh_er), 191*7c478bd9Sstevel@tonic-gate sizeof (errh_er_t)); 192*7c478bd9Sstevel@tonic-gate 193*7c478bd9Sstevel@tonic-gate /* Increment the queue head */ 194*7c478bd9Sstevel@tonic-gate head_offset += Q_ENTRY_SIZE; 195*7c478bd9Sstevel@tonic-gate /* Wrap around */ 196*7c478bd9Sstevel@tonic-gate head_offset &= (CPU_NRQ_SIZE - 1); 197*7c478bd9Sstevel@tonic-gate 198*7c478bd9Sstevel@tonic-gate /* set error handle to zero so it can hold new error report */ 199*7c478bd9Sstevel@tonic-gate head_va->ehdl = 0; 200*7c478bd9Sstevel@tonic-gate 201*7c478bd9Sstevel@tonic-gate aflt = (struct async_flt *)&(errh_flt.cmn_asyncflt); 202*7c478bd9Sstevel@tonic-gate 203*7c478bd9Sstevel@tonic-gate trampolined = 0; 204*7c478bd9Sstevel@tonic-gate 205*7c478bd9Sstevel@tonic-gate if (errh_flt.errh_er.attr & ERRH_ATTR_PIO) 206*7c478bd9Sstevel@tonic-gate aflt->flt_class = BUS_FAULT; 207*7c478bd9Sstevel@tonic-gate else 208*7c478bd9Sstevel@tonic-gate aflt->flt_class = CPU_FAULT; 209*7c478bd9Sstevel@tonic-gate 210*7c478bd9Sstevel@tonic-gate aflt->flt_id = gethrtime(); 211*7c478bd9Sstevel@tonic-gate aflt->flt_bus_id = getprocessorid(); 212*7c478bd9Sstevel@tonic-gate aflt->flt_pc = (caddr_t)rp->r_pc; 213*7c478bd9Sstevel@tonic-gate exec_mode = (errh_flt.errh_er.attr & ERRH_MODE_MASK) 214*7c478bd9Sstevel@tonic-gate >> ERRH_MODE_SHIFT; 215*7c478bd9Sstevel@tonic-gate aflt->flt_priv = (exec_mode == ERRH_MODE_PRIV || 216*7c478bd9Sstevel@tonic-gate exec_mode == ERRH_MODE_UNKNOWN); 217*7c478bd9Sstevel@tonic-gate aflt->flt_tl = (uchar_t)tl; 218*7c478bd9Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_NONE; 219*7c478bd9Sstevel@tonic-gate aflt->flt_panic = ((aflt->flt_tl != 0) || 220*7c478bd9Sstevel@tonic-gate (aft_testfatal != 0)); 221*7c478bd9Sstevel@tonic-gate 222*7c478bd9Sstevel@tonic-gate switch (errh_flt.errh_er.desc) { 223*7c478bd9Sstevel@tonic-gate case ERRH_DESC_PR_NRE: 224*7c478bd9Sstevel@tonic-gate /* 225*7c478bd9Sstevel@tonic-gate * Fall through, precise fault also need to check 226*7c478bd9Sstevel@tonic-gate * to see if it was protected. 227*7c478bd9Sstevel@tonic-gate */ 228*7c478bd9Sstevel@tonic-gate 229*7c478bd9Sstevel@tonic-gate case ERRH_DESC_DEF_NRE: 230*7c478bd9Sstevel@tonic-gate /* 231*7c478bd9Sstevel@tonic-gate * If the trap occurred in privileged mode at TL=0, 232*7c478bd9Sstevel@tonic-gate * we need to check to see if we were executing 233*7c478bd9Sstevel@tonic-gate * in kernel under on_trap() or t_lofault 234*7c478bd9Sstevel@tonic-gate * protection. If so, modify the saved registers 235*7c478bd9Sstevel@tonic-gate * so that we return from the trap to the 236*7c478bd9Sstevel@tonic-gate * appropriate trampoline routine. 237*7c478bd9Sstevel@tonic-gate */ 238*7c478bd9Sstevel@tonic-gate if (aflt->flt_priv == 1 && aflt->flt_tl == 0) 239*7c478bd9Sstevel@tonic-gate trampolined = 240*7c478bd9Sstevel@tonic-gate errh_error_protected(rp, aflt, &expected); 241*7c478bd9Sstevel@tonic-gate 242*7c478bd9Sstevel@tonic-gate if (!aflt->flt_priv || aflt->flt_prot == 243*7c478bd9Sstevel@tonic-gate AFLT_PROT_COPY) { 244*7c478bd9Sstevel@tonic-gate aflt->flt_panic |= aft_panic; 245*7c478bd9Sstevel@tonic-gate } else if (!trampolined && 246*7c478bd9Sstevel@tonic-gate aflt->flt_class != BUS_FAULT) { 247*7c478bd9Sstevel@tonic-gate aflt->flt_panic = 1; 248*7c478bd9Sstevel@tonic-gate } 249*7c478bd9Sstevel@tonic-gate 250*7c478bd9Sstevel@tonic-gate /* 251*7c478bd9Sstevel@tonic-gate * If PIO error, we need to query the bus nexus 252*7c478bd9Sstevel@tonic-gate * for fatal errors. 253*7c478bd9Sstevel@tonic-gate */ 254*7c478bd9Sstevel@tonic-gate if (aflt->flt_class == BUS_FAULT) { 255*7c478bd9Sstevel@tonic-gate aflt->flt_addr = errh_flt.errh_er.ra; 256*7c478bd9Sstevel@tonic-gate errh_cpu_run_bus_error_handlers(aflt, 257*7c478bd9Sstevel@tonic-gate expected); 258*7c478bd9Sstevel@tonic-gate } 259*7c478bd9Sstevel@tonic-gate 260*7c478bd9Sstevel@tonic-gate break; 261*7c478bd9Sstevel@tonic-gate 262*7c478bd9Sstevel@tonic-gate default: 263*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "Error Descriptor 0x%llx " 264*7c478bd9Sstevel@tonic-gate " invalid in nonresumable error handler", 265*7c478bd9Sstevel@tonic-gate (long long) errh_flt.errh_er.desc); 266*7c478bd9Sstevel@tonic-gate continue; 267*7c478bd9Sstevel@tonic-gate } 268*7c478bd9Sstevel@tonic-gate 269*7c478bd9Sstevel@tonic-gate /* 270*7c478bd9Sstevel@tonic-gate * Queue the error report for further processing. If 271*7c478bd9Sstevel@tonic-gate * flt_panic is set, code still process other errors 272*7c478bd9Sstevel@tonic-gate * in the queue until the panic routine stops the 273*7c478bd9Sstevel@tonic-gate * kernel. 274*7c478bd9Sstevel@tonic-gate */ 275*7c478bd9Sstevel@tonic-gate (void) cpu_queue_one_event(&errh_flt); 276*7c478bd9Sstevel@tonic-gate 277*7c478bd9Sstevel@tonic-gate /* 278*7c478bd9Sstevel@tonic-gate * Panic here if aflt->flt_panic has been set. 279*7c478bd9Sstevel@tonic-gate * Enqueued errors will be logged as part of the panic flow. 280*7c478bd9Sstevel@tonic-gate */ 281*7c478bd9Sstevel@tonic-gate if (aflt->flt_panic) { 282*7c478bd9Sstevel@tonic-gate fm_panic("Unrecoverable hardware error"); 283*7c478bd9Sstevel@tonic-gate } 284*7c478bd9Sstevel@tonic-gate 285*7c478bd9Sstevel@tonic-gate /* 286*7c478bd9Sstevel@tonic-gate * If it is a memory error, we turn on the PAGE_IS_TOXIC 287*7c478bd9Sstevel@tonic-gate * flag. The page will be retired later and scrubbed when 288*7c478bd9Sstevel@tonic-gate * it is freed. 289*7c478bd9Sstevel@tonic-gate */ 290*7c478bd9Sstevel@tonic-gate if (errh_flt.errh_er.attr & ERRH_ATTR_MEM) 291*7c478bd9Sstevel@tonic-gate (void) errh_page_settoxic(&errh_flt, PAGE_IS_TOXIC); 292*7c478bd9Sstevel@tonic-gate 293*7c478bd9Sstevel@tonic-gate /* 294*7c478bd9Sstevel@tonic-gate * If we queued an error and the it was in user mode or 295*7c478bd9Sstevel@tonic-gate * protected by t_lofault, 296*7c478bd9Sstevel@tonic-gate * set AST flag so the queue will be drained before 297*7c478bd9Sstevel@tonic-gate * returning to user mode. 298*7c478bd9Sstevel@tonic-gate */ 299*7c478bd9Sstevel@tonic-gate if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) { 300*7c478bd9Sstevel@tonic-gate int pcb_flag = 0; 301*7c478bd9Sstevel@tonic-gate 302*7c478bd9Sstevel@tonic-gate if (aflt->flt_class == CPU_FAULT) 303*7c478bd9Sstevel@tonic-gate pcb_flag |= ASYNC_HWERR; 304*7c478bd9Sstevel@tonic-gate else if (aflt->flt_class == BUS_FAULT) 305*7c478bd9Sstevel@tonic-gate pcb_flag |= ASYNC_BERR; 306*7c478bd9Sstevel@tonic-gate 307*7c478bd9Sstevel@tonic-gate ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag; 308*7c478bd9Sstevel@tonic-gate aston(curthread); 309*7c478bd9Sstevel@tonic-gate } 310*7c478bd9Sstevel@tonic-gate } 311*7c478bd9Sstevel@tonic-gate } 312*7c478bd9Sstevel@tonic-gate 313*7c478bd9Sstevel@tonic-gate /* 314*7c478bd9Sstevel@tonic-gate * For PIO errors, this routine calls nexus driver's error 315*7c478bd9Sstevel@tonic-gate * callback routines. If the callback routine returns fatal, and 316*7c478bd9Sstevel@tonic-gate * we are in kernel or unknow mode without any error protection, 317*7c478bd9Sstevel@tonic-gate * we need to turn on the panic flag. 318*7c478bd9Sstevel@tonic-gate */ 319*7c478bd9Sstevel@tonic-gate void 320*7c478bd9Sstevel@tonic-gate errh_cpu_run_bus_error_handlers(struct async_flt *aflt, int expected) 321*7c478bd9Sstevel@tonic-gate { 322*7c478bd9Sstevel@tonic-gate int status; 323*7c478bd9Sstevel@tonic-gate ddi_fm_error_t de; 324*7c478bd9Sstevel@tonic-gate 325*7c478bd9Sstevel@tonic-gate bzero(&de, sizeof (ddi_fm_error_t)); 326*7c478bd9Sstevel@tonic-gate 327*7c478bd9Sstevel@tonic-gate de.fme_version = DDI_FME_VERSION; 328*7c478bd9Sstevel@tonic-gate de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1); 329*7c478bd9Sstevel@tonic-gate de.fme_flag = expected; 330*7c478bd9Sstevel@tonic-gate de.fme_bus_specific = (void *)aflt->flt_addr; 331*7c478bd9Sstevel@tonic-gate status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de); 332*7c478bd9Sstevel@tonic-gate 333*7c478bd9Sstevel@tonic-gate /* 334*7c478bd9Sstevel@tonic-gate * If error is protected, it will jump to proper routine 335*7c478bd9Sstevel@tonic-gate * to handle the handle; if it is in user level, we just 336*7c478bd9Sstevel@tonic-gate * kill the user process; if the driver thinks the error is 337*7c478bd9Sstevel@tonic-gate * not fatal, we can drive on. If none of above are true, 338*7c478bd9Sstevel@tonic-gate * we panic 339*7c478bd9Sstevel@tonic-gate */ 340*7c478bd9Sstevel@tonic-gate if ((aflt->flt_prot == AFLT_PROT_NONE) && (aflt->flt_priv == 1) && 341*7c478bd9Sstevel@tonic-gate (status == DDI_FM_FATAL)) 342*7c478bd9Sstevel@tonic-gate aflt->flt_panic = 1; 343*7c478bd9Sstevel@tonic-gate } 344*7c478bd9Sstevel@tonic-gate 345*7c478bd9Sstevel@tonic-gate /* 346*7c478bd9Sstevel@tonic-gate * This routine checks to see if we are under any error protection when 347*7c478bd9Sstevel@tonic-gate * the error happens. If we are under error protection, we unwind to 348*7c478bd9Sstevel@tonic-gate * the protection and indicate fault. 349*7c478bd9Sstevel@tonic-gate */ 350*7c478bd9Sstevel@tonic-gate static int 351*7c478bd9Sstevel@tonic-gate errh_error_protected(struct regs *rp, struct async_flt *aflt, int *expected) 352*7c478bd9Sstevel@tonic-gate { 353*7c478bd9Sstevel@tonic-gate int trampolined = 0; 354*7c478bd9Sstevel@tonic-gate ddi_acc_hdl_t *hp; 355*7c478bd9Sstevel@tonic-gate 356*7c478bd9Sstevel@tonic-gate if (curthread->t_ontrap != NULL) { 357*7c478bd9Sstevel@tonic-gate on_trap_data_t *otp = curthread->t_ontrap; 358*7c478bd9Sstevel@tonic-gate 359*7c478bd9Sstevel@tonic-gate if (otp->ot_prot & OT_DATA_EC) { 360*7c478bd9Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_EC; 361*7c478bd9Sstevel@tonic-gate otp->ot_trap |= OT_DATA_EC; 362*7c478bd9Sstevel@tonic-gate rp->r_pc = otp->ot_trampoline; 363*7c478bd9Sstevel@tonic-gate rp->r_npc = rp->r_pc +4; 364*7c478bd9Sstevel@tonic-gate trampolined = 1; 365*7c478bd9Sstevel@tonic-gate } 366*7c478bd9Sstevel@tonic-gate 367*7c478bd9Sstevel@tonic-gate if (otp->ot_prot & OT_DATA_ACCESS) { 368*7c478bd9Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_ACCESS; 369*7c478bd9Sstevel@tonic-gate otp->ot_trap |= OT_DATA_ACCESS; 370*7c478bd9Sstevel@tonic-gate rp->r_pc = otp->ot_trampoline; 371*7c478bd9Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4; 372*7c478bd9Sstevel@tonic-gate trampolined = 1; 373*7c478bd9Sstevel@tonic-gate /* 374*7c478bd9Sstevel@tonic-gate * for peek and caut_gets 375*7c478bd9Sstevel@tonic-gate * errors are expected 376*7c478bd9Sstevel@tonic-gate */ 377*7c478bd9Sstevel@tonic-gate hp = (ddi_acc_hdl_t *)otp->ot_handle; 378*7c478bd9Sstevel@tonic-gate if (!hp) 379*7c478bd9Sstevel@tonic-gate *expected = DDI_FM_ERR_PEEK; 380*7c478bd9Sstevel@tonic-gate else if (hp->ah_acc.devacc_attr_access == 381*7c478bd9Sstevel@tonic-gate DDI_CAUTIOUS_ACC) 382*7c478bd9Sstevel@tonic-gate *expected = DDI_FM_ERR_EXPECTED; 383*7c478bd9Sstevel@tonic-gate } 384*7c478bd9Sstevel@tonic-gate } else if (curthread->t_lofault) { 385*7c478bd9Sstevel@tonic-gate aflt->flt_prot = AFLT_PROT_COPY; 386*7c478bd9Sstevel@tonic-gate rp->r_g1 = EFAULT; 387*7c478bd9Sstevel@tonic-gate rp->r_pc = curthread->t_lofault; 388*7c478bd9Sstevel@tonic-gate rp->r_npc = rp->r_pc + 4; 389*7c478bd9Sstevel@tonic-gate trampolined = 1; 390*7c478bd9Sstevel@tonic-gate } 391*7c478bd9Sstevel@tonic-gate 392*7c478bd9Sstevel@tonic-gate return (trampolined); 393*7c478bd9Sstevel@tonic-gate } 394*7c478bd9Sstevel@tonic-gate 395*7c478bd9Sstevel@tonic-gate /* 396*7c478bd9Sstevel@tonic-gate * Queue one event. 397*7c478bd9Sstevel@tonic-gate */ 398*7c478bd9Sstevel@tonic-gate static void 399*7c478bd9Sstevel@tonic-gate cpu_queue_one_event(errh_async_flt_t *errh_fltp) 400*7c478bd9Sstevel@tonic-gate { 401*7c478bd9Sstevel@tonic-gate struct async_flt *aflt = (struct async_flt *)errh_fltp; 402*7c478bd9Sstevel@tonic-gate errorq_t *eqp; 403*7c478bd9Sstevel@tonic-gate 404*7c478bd9Sstevel@tonic-gate if (aflt->flt_panic) 405*7c478bd9Sstevel@tonic-gate eqp = ue_queue; 406*7c478bd9Sstevel@tonic-gate else 407*7c478bd9Sstevel@tonic-gate eqp = ce_queue; 408*7c478bd9Sstevel@tonic-gate 409*7c478bd9Sstevel@tonic-gate errorq_dispatch(eqp, errh_fltp, sizeof (errh_async_flt_t), 410*7c478bd9Sstevel@tonic-gate aflt->flt_panic); 411*7c478bd9Sstevel@tonic-gate } 412*7c478bd9Sstevel@tonic-gate 413*7c478bd9Sstevel@tonic-gate /* 414*7c478bd9Sstevel@tonic-gate * The cpu_async_log_err() function is called by the ce/ue_drain() function to 415*7c478bd9Sstevel@tonic-gate * handle logging for CPU events that are dequeued. As such, it can be invoked 416*7c478bd9Sstevel@tonic-gate * from softint context, from AST processing in the trap() flow, or from the 417*7c478bd9Sstevel@tonic-gate * panic flow. We decode the CPU-specific data, and log appropriate messages. 418*7c478bd9Sstevel@tonic-gate */ 419*7c478bd9Sstevel@tonic-gate void 420*7c478bd9Sstevel@tonic-gate cpu_async_log_err(void *flt) 421*7c478bd9Sstevel@tonic-gate { 422*7c478bd9Sstevel@tonic-gate errh_async_flt_t *errh_fltp = (errh_async_flt_t *)flt; 423*7c478bd9Sstevel@tonic-gate errh_er_t *errh_erp = (errh_er_t *)&errh_fltp->errh_er; 424*7c478bd9Sstevel@tonic-gate 425*7c478bd9Sstevel@tonic-gate switch (errh_erp->desc) { 426*7c478bd9Sstevel@tonic-gate case ERRH_DESC_UCOR_RE: 427*7c478bd9Sstevel@tonic-gate if (errh_erp->attr & ERRH_ATTR_MEM) { 428*7c478bd9Sstevel@tonic-gate /* 429*7c478bd9Sstevel@tonic-gate * Turn on the PAGE_IS_TOXIC flag. The page will be 430*7c478bd9Sstevel@tonic-gate * scrubbed when it is freed. 431*7c478bd9Sstevel@tonic-gate */ 432*7c478bd9Sstevel@tonic-gate (void) errh_page_settoxic(errh_fltp, PAGE_IS_TOXIC); 433*7c478bd9Sstevel@tonic-gate } 434*7c478bd9Sstevel@tonic-gate 435*7c478bd9Sstevel@tonic-gate break; 436*7c478bd9Sstevel@tonic-gate 437*7c478bd9Sstevel@tonic-gate case ERRH_DESC_PR_NRE: 438*7c478bd9Sstevel@tonic-gate case ERRH_DESC_DEF_NRE: 439*7c478bd9Sstevel@tonic-gate if (errh_erp->attr & ERRH_ATTR_MEM) { 440*7c478bd9Sstevel@tonic-gate /* 441*7c478bd9Sstevel@tonic-gate * For non-resumable memory error, retire 442*7c478bd9Sstevel@tonic-gate * the page here. 443*7c478bd9Sstevel@tonic-gate */ 444*7c478bd9Sstevel@tonic-gate errh_page_retire(errh_fltp); 445*7c478bd9Sstevel@tonic-gate } 446*7c478bd9Sstevel@tonic-gate break; 447*7c478bd9Sstevel@tonic-gate 448*7c478bd9Sstevel@tonic-gate default: 449*7c478bd9Sstevel@tonic-gate break; 450*7c478bd9Sstevel@tonic-gate } 451*7c478bd9Sstevel@tonic-gate } 452*7c478bd9Sstevel@tonic-gate 453*7c478bd9Sstevel@tonic-gate /* 454*7c478bd9Sstevel@tonic-gate * Called from ce_drain(). 455*7c478bd9Sstevel@tonic-gate */ 456*7c478bd9Sstevel@tonic-gate void 457*7c478bd9Sstevel@tonic-gate cpu_ce_log_err(struct async_flt *aflt) 458*7c478bd9Sstevel@tonic-gate { 459*7c478bd9Sstevel@tonic-gate switch (aflt->flt_class) { 460*7c478bd9Sstevel@tonic-gate case CPU_FAULT: 461*7c478bd9Sstevel@tonic-gate cpu_async_log_err(aflt); 462*7c478bd9Sstevel@tonic-gate break; 463*7c478bd9Sstevel@tonic-gate 464*7c478bd9Sstevel@tonic-gate case BUS_FAULT: 465*7c478bd9Sstevel@tonic-gate cpu_async_log_err(aflt); 466*7c478bd9Sstevel@tonic-gate break; 467*7c478bd9Sstevel@tonic-gate 468*7c478bd9Sstevel@tonic-gate default: 469*7c478bd9Sstevel@tonic-gate break; 470*7c478bd9Sstevel@tonic-gate } 471*7c478bd9Sstevel@tonic-gate } 472*7c478bd9Sstevel@tonic-gate 473*7c478bd9Sstevel@tonic-gate /* 474*7c478bd9Sstevel@tonic-gate * Called from ue_drain(). 475*7c478bd9Sstevel@tonic-gate */ 476*7c478bd9Sstevel@tonic-gate void 477*7c478bd9Sstevel@tonic-gate cpu_ue_log_err(struct async_flt *aflt) 478*7c478bd9Sstevel@tonic-gate { 479*7c478bd9Sstevel@tonic-gate switch (aflt->flt_class) { 480*7c478bd9Sstevel@tonic-gate case CPU_FAULT: 481*7c478bd9Sstevel@tonic-gate cpu_async_log_err(aflt); 482*7c478bd9Sstevel@tonic-gate break; 483*7c478bd9Sstevel@tonic-gate 484*7c478bd9Sstevel@tonic-gate case BUS_FAULT: 485*7c478bd9Sstevel@tonic-gate cpu_async_log_err(aflt); 486*7c478bd9Sstevel@tonic-gate break; 487*7c478bd9Sstevel@tonic-gate 488*7c478bd9Sstevel@tonic-gate default: 489*7c478bd9Sstevel@tonic-gate break; 490*7c478bd9Sstevel@tonic-gate } 491*7c478bd9Sstevel@tonic-gate } 492*7c478bd9Sstevel@tonic-gate 493*7c478bd9Sstevel@tonic-gate /* 494*7c478bd9Sstevel@tonic-gate * Turn on flag on the error memory region. 495*7c478bd9Sstevel@tonic-gate */ 496*7c478bd9Sstevel@tonic-gate static void 497*7c478bd9Sstevel@tonic-gate errh_page_settoxic(errh_async_flt_t *errh_fltp, uchar_t flag) 498*7c478bd9Sstevel@tonic-gate { 499*7c478bd9Sstevel@tonic-gate page_t *pp; 500*7c478bd9Sstevel@tonic-gate uint64_t flt_real_addr_start = errh_fltp->errh_er.ra; 501*7c478bd9Sstevel@tonic-gate uint64_t flt_real_addr_end = flt_real_addr_start + 502*7c478bd9Sstevel@tonic-gate errh_fltp->errh_er.sz - 1; 503*7c478bd9Sstevel@tonic-gate int64_t current_addr; 504*7c478bd9Sstevel@tonic-gate 505*7c478bd9Sstevel@tonic-gate if (errh_fltp->errh_er.sz == 0) 506*7c478bd9Sstevel@tonic-gate return; 507*7c478bd9Sstevel@tonic-gate 508*7c478bd9Sstevel@tonic-gate for (current_addr = flt_real_addr_start; 509*7c478bd9Sstevel@tonic-gate current_addr < flt_real_addr_end; current_addr += MMU_PAGESIZE) { 510*7c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock((pfn_t) 511*7c478bd9Sstevel@tonic-gate (current_addr >> MMU_PAGESHIFT)); 512*7c478bd9Sstevel@tonic-gate 513*7c478bd9Sstevel@tonic-gate if (pp != NULL) { 514*7c478bd9Sstevel@tonic-gate page_settoxic(pp, flag); 515*7c478bd9Sstevel@tonic-gate } 516*7c478bd9Sstevel@tonic-gate } 517*7c478bd9Sstevel@tonic-gate } 518*7c478bd9Sstevel@tonic-gate 519*7c478bd9Sstevel@tonic-gate /* 520*7c478bd9Sstevel@tonic-gate * Retire the page(s) indicated in the error report. 521*7c478bd9Sstevel@tonic-gate */ 522*7c478bd9Sstevel@tonic-gate static void 523*7c478bd9Sstevel@tonic-gate errh_page_retire(errh_async_flt_t *errh_fltp) 524*7c478bd9Sstevel@tonic-gate { 525*7c478bd9Sstevel@tonic-gate page_t *pp; 526*7c478bd9Sstevel@tonic-gate uint64_t flt_real_addr_start = errh_fltp->errh_er.ra; 527*7c478bd9Sstevel@tonic-gate uint64_t flt_real_addr_end = flt_real_addr_start + 528*7c478bd9Sstevel@tonic-gate errh_fltp->errh_er.sz - 1; 529*7c478bd9Sstevel@tonic-gate int64_t current_addr; 530*7c478bd9Sstevel@tonic-gate 531*7c478bd9Sstevel@tonic-gate if (errh_fltp->errh_er.sz == 0) 532*7c478bd9Sstevel@tonic-gate return; 533*7c478bd9Sstevel@tonic-gate 534*7c478bd9Sstevel@tonic-gate for (current_addr = flt_real_addr_start; 535*7c478bd9Sstevel@tonic-gate current_addr < flt_real_addr_end; current_addr += MMU_PAGESIZE) { 536*7c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock((pfn_t) 537*7c478bd9Sstevel@tonic-gate (current_addr >> MMU_PAGESHIFT)); 538*7c478bd9Sstevel@tonic-gate 539*7c478bd9Sstevel@tonic-gate if (pp != NULL) { 540*7c478bd9Sstevel@tonic-gate (void) page_retire(pp, PAGE_IS_TOXIC); 541*7c478bd9Sstevel@tonic-gate } 542*7c478bd9Sstevel@tonic-gate } 543*7c478bd9Sstevel@tonic-gate } 544*7c478bd9Sstevel@tonic-gate 545*7c478bd9Sstevel@tonic-gate void 546*7c478bd9Sstevel@tonic-gate mem_scrub(uint64_t paddr, uint64_t len) 547*7c478bd9Sstevel@tonic-gate { 548*7c478bd9Sstevel@tonic-gate uint64_t pa, length, scrubbed_len; 549*7c478bd9Sstevel@tonic-gate uint64_t ret = H_EOK; 550*7c478bd9Sstevel@tonic-gate 551*7c478bd9Sstevel@tonic-gate pa = paddr; 552*7c478bd9Sstevel@tonic-gate length = len; 553*7c478bd9Sstevel@tonic-gate scrubbed_len = 0; 554*7c478bd9Sstevel@tonic-gate 555*7c478bd9Sstevel@tonic-gate while (ret == H_EOK) { 556*7c478bd9Sstevel@tonic-gate ret = hv_mem_scrub(pa, length, &scrubbed_len); 557*7c478bd9Sstevel@tonic-gate 558*7c478bd9Sstevel@tonic-gate if (ret == H_EOK || scrubbed_len >= length) { 559*7c478bd9Sstevel@tonic-gate break; 560*7c478bd9Sstevel@tonic-gate } 561*7c478bd9Sstevel@tonic-gate 562*7c478bd9Sstevel@tonic-gate pa += scrubbed_len; 563*7c478bd9Sstevel@tonic-gate length -= scrubbed_len; 564*7c478bd9Sstevel@tonic-gate } 565*7c478bd9Sstevel@tonic-gate } 566*7c478bd9Sstevel@tonic-gate 567*7c478bd9Sstevel@tonic-gate void 568*7c478bd9Sstevel@tonic-gate mem_sync(caddr_t va, size_t len) 569*7c478bd9Sstevel@tonic-gate { 570*7c478bd9Sstevel@tonic-gate uint64_t pa, length, flushed; 571*7c478bd9Sstevel@tonic-gate uint64_t ret = H_EOK; 572*7c478bd9Sstevel@tonic-gate 573*7c478bd9Sstevel@tonic-gate pa = va_to_pa((caddr_t)va); 574*7c478bd9Sstevel@tonic-gate 575*7c478bd9Sstevel@tonic-gate if (pa == (uint64_t)-1) 576*7c478bd9Sstevel@tonic-gate return; 577*7c478bd9Sstevel@tonic-gate 578*7c478bd9Sstevel@tonic-gate length = len; 579*7c478bd9Sstevel@tonic-gate flushed = 0; 580*7c478bd9Sstevel@tonic-gate 581*7c478bd9Sstevel@tonic-gate while (ret == H_EOK) { 582*7c478bd9Sstevel@tonic-gate ret = hv_mem_sync(pa, length, &flushed); 583*7c478bd9Sstevel@tonic-gate 584*7c478bd9Sstevel@tonic-gate if (ret == H_EOK || flushed >= length) { 585*7c478bd9Sstevel@tonic-gate break; 586*7c478bd9Sstevel@tonic-gate } 587*7c478bd9Sstevel@tonic-gate 588*7c478bd9Sstevel@tonic-gate pa += flushed; 589*7c478bd9Sstevel@tonic-gate length -= flushed; 590*7c478bd9Sstevel@tonic-gate } 591*7c478bd9Sstevel@tonic-gate } 592*7c478bd9Sstevel@tonic-gate 593*7c478bd9Sstevel@tonic-gate /* 594*7c478bd9Sstevel@tonic-gate * If resumable queue is full, we need to check if any cpu is in 595*7c478bd9Sstevel@tonic-gate * error state. If not, we drive on. If yes, we need to panic. The 596*7c478bd9Sstevel@tonic-gate * hypervisor call hv_cpu_state() is being used for checking the 597*7c478bd9Sstevel@tonic-gate * cpu state. 598*7c478bd9Sstevel@tonic-gate */ 599*7c478bd9Sstevel@tonic-gate static void 600*7c478bd9Sstevel@tonic-gate errh_rq_full(struct async_flt *afltp) 601*7c478bd9Sstevel@tonic-gate { 602*7c478bd9Sstevel@tonic-gate processorid_t who; 603*7c478bd9Sstevel@tonic-gate uint64_t cpu_state; 604*7c478bd9Sstevel@tonic-gate uint64_t retval; 605*7c478bd9Sstevel@tonic-gate 606*7c478bd9Sstevel@tonic-gate for (who = 0; who < NCPU; who++) 607*7c478bd9Sstevel@tonic-gate if (CPU_IN_SET(cpu_ready_set, who)) { 608*7c478bd9Sstevel@tonic-gate retval = hv_cpu_state(who, &cpu_state); 609*7c478bd9Sstevel@tonic-gate if (retval != H_EOK || cpu_state == CPU_STATE_ERROR) { 610*7c478bd9Sstevel@tonic-gate afltp->flt_panic = 1; 611*7c478bd9Sstevel@tonic-gate break; 612*7c478bd9Sstevel@tonic-gate } 613*7c478bd9Sstevel@tonic-gate } 614*7c478bd9Sstevel@tonic-gate } 615*7c478bd9Sstevel@tonic-gate 616*7c478bd9Sstevel@tonic-gate /* 617*7c478bd9Sstevel@tonic-gate * Return processor specific async error structure 618*7c478bd9Sstevel@tonic-gate * size used. 619*7c478bd9Sstevel@tonic-gate */ 620*7c478bd9Sstevel@tonic-gate int 621*7c478bd9Sstevel@tonic-gate cpu_aflt_size(void) 622*7c478bd9Sstevel@tonic-gate { 623*7c478bd9Sstevel@tonic-gate return (sizeof (errh_async_flt_t)); 624*7c478bd9Sstevel@tonic-gate } 625*7c478bd9Sstevel@tonic-gate 626*7c478bd9Sstevel@tonic-gate #define SZ_TO_ETRS_SHIFT 6 627*7c478bd9Sstevel@tonic-gate 628*7c478bd9Sstevel@tonic-gate /* 629*7c478bd9Sstevel@tonic-gate * Message print out when resumable queue is overflown 630*7c478bd9Sstevel@tonic-gate */ 631*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 632*7c478bd9Sstevel@tonic-gate void 633*7c478bd9Sstevel@tonic-gate rq_overflow(struct regs *rp, uint64_t head_offset, 634*7c478bd9Sstevel@tonic-gate uint64_t tail_offset) 635*7c478bd9Sstevel@tonic-gate { 636*7c478bd9Sstevel@tonic-gate rq_overflow_count++; 637*7c478bd9Sstevel@tonic-gate } 638*7c478bd9Sstevel@tonic-gate 639*7c478bd9Sstevel@tonic-gate /* 640*7c478bd9Sstevel@tonic-gate * Handler to process a fatal error. This routine can be called from a 641*7c478bd9Sstevel@tonic-gate * softint, called from trap()'s AST handling, or called from the panic flow. 642*7c478bd9Sstevel@tonic-gate */ 643*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 644*7c478bd9Sstevel@tonic-gate static void 645*7c478bd9Sstevel@tonic-gate ue_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep) 646*7c478bd9Sstevel@tonic-gate { 647*7c478bd9Sstevel@tonic-gate cpu_ue_log_err(aflt); 648*7c478bd9Sstevel@tonic-gate } 649*7c478bd9Sstevel@tonic-gate 650*7c478bd9Sstevel@tonic-gate /* 651*7c478bd9Sstevel@tonic-gate * Handler to process a correctable error. This routine can be called from a 652*7c478bd9Sstevel@tonic-gate * softint. We just call the CPU module's logging routine. 653*7c478bd9Sstevel@tonic-gate */ 654*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 655*7c478bd9Sstevel@tonic-gate static void 656*7c478bd9Sstevel@tonic-gate ce_drain(void *ignored, struct async_flt *aflt, errorq_elem_t *eqep) 657*7c478bd9Sstevel@tonic-gate { 658*7c478bd9Sstevel@tonic-gate cpu_ce_log_err(aflt); 659*7c478bd9Sstevel@tonic-gate } 660*7c478bd9Sstevel@tonic-gate 661*7c478bd9Sstevel@tonic-gate /* 662*7c478bd9Sstevel@tonic-gate * Allocate error queue sizes based on max_ncpus. max_ncpus is set just 663*7c478bd9Sstevel@tonic-gate * after ncpunode has been determined. ncpus is set in start_other_cpus 664*7c478bd9Sstevel@tonic-gate * which is called after error_init() but may change dynamically. 665*7c478bd9Sstevel@tonic-gate */ 666*7c478bd9Sstevel@tonic-gate void 667*7c478bd9Sstevel@tonic-gate error_init(void) 668*7c478bd9Sstevel@tonic-gate { 669*7c478bd9Sstevel@tonic-gate char tmp_name[MAXSYSNAME]; 670*7c478bd9Sstevel@tonic-gate dnode_t node; 671*7c478bd9Sstevel@tonic-gate size_t size = cpu_aflt_size(); 672*7c478bd9Sstevel@tonic-gate 673*7c478bd9Sstevel@tonic-gate /* 674*7c478bd9Sstevel@tonic-gate * Initialize the correctable and uncorrectable error queues. 675*7c478bd9Sstevel@tonic-gate */ 676*7c478bd9Sstevel@tonic-gate ue_queue = errorq_create("ue_queue", (errorq_func_t)ue_drain, NULL, 677*7c478bd9Sstevel@tonic-gate MAX_ASYNC_FLTS * (max_ncpus + 1), size, PIL_2, ERRORQ_VITAL); 678*7c478bd9Sstevel@tonic-gate 679*7c478bd9Sstevel@tonic-gate ce_queue = errorq_create("ce_queue", (errorq_func_t)ce_drain, NULL, 680*7c478bd9Sstevel@tonic-gate MAX_CE_FLTS * (max_ncpus + 1), size, PIL_1, 0); 681*7c478bd9Sstevel@tonic-gate 682*7c478bd9Sstevel@tonic-gate if (ue_queue == NULL || ce_queue == NULL) 683*7c478bd9Sstevel@tonic-gate panic("failed to create required system error queue"); 684*7c478bd9Sstevel@tonic-gate 685*7c478bd9Sstevel@tonic-gate /* 686*7c478bd9Sstevel@tonic-gate * Initialize the busfunc list mutex. This must be a PIL_15 spin lock 687*7c478bd9Sstevel@tonic-gate * because we will need to acquire it from cpu_async_error(). 688*7c478bd9Sstevel@tonic-gate */ 689*7c478bd9Sstevel@tonic-gate mutex_init(&bfd_lock, NULL, MUTEX_SPIN, (void *)PIL_15); 690*7c478bd9Sstevel@tonic-gate 691*7c478bd9Sstevel@tonic-gate node = prom_rootnode(); 692*7c478bd9Sstevel@tonic-gate if ((node == OBP_NONODE) || (node == OBP_BADNODE)) { 693*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "error_init: node 0x%x\n", (uint_t)node); 694*7c478bd9Sstevel@tonic-gate return; 695*7c478bd9Sstevel@tonic-gate } 696*7c478bd9Sstevel@tonic-gate 697*7c478bd9Sstevel@tonic-gate if (((size = prom_getproplen(node, "reset-reason")) != -1) && 698*7c478bd9Sstevel@tonic-gate (size <= MAXSYSNAME) && 699*7c478bd9Sstevel@tonic-gate (prom_getprop(node, "reset-reason", tmp_name) != -1)) { 700*7c478bd9Sstevel@tonic-gate if (reset_debug) { 701*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "System booting after %s\n", tmp_name); 702*7c478bd9Sstevel@tonic-gate } else if (strncmp(tmp_name, "FATAL", 5) == 0) { 703*7c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, 704*7c478bd9Sstevel@tonic-gate "System booting after fatal error %s\n", tmp_name); 705*7c478bd9Sstevel@tonic-gate } 706*7c478bd9Sstevel@tonic-gate } 707*7c478bd9Sstevel@tonic-gate } 708