/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ /* * PX Fault Management Architecture */ #include #include #include #include #include #include #include #include "px_obj.h" extern uint_t px_ranges_phi_mask; #define PX_PCIE_PANIC_BITS \ (PCIE_AER_UCE_DLP | PCIE_AER_UCE_FCP | PCIE_AER_UCE_TO | \ PCIE_AER_UCE_RO | PCIE_AER_UCE_MTLP | PCIE_AER_UCE_ECRC) #define PX_PCIE_NO_PANIC_BITS \ (PCIE_AER_UCE_TRAINING | PCIE_AER_UCE_SD | PCIE_AER_UCE_CA | \ PCIE_AER_UCE_UC | PCIE_AER_UCE_UR) /* * Global panicing state variabled used to control if further error handling * should occur. If the system is already panic'ing or if PX itself has * recommended panic'ing the system, no further error handling should occur to * prevent the system from hanging. */ boolean_t px_panicing = B_FALSE; static int px_pcie_ptlp(dev_info_t *dip, ddi_fm_error_t *derr, px_err_pcie_t *regs); #if defined(DEBUG) static void px_pcie_log(dev_info_t *dip, px_err_pcie_t *regs); #else /* DEBUG */ #define px_pcie_log 0 && #endif /* DEBUG */ /* * Initialize px FMA support */ int px_fm_attach(px_t *px_p) { int i; dev_info_t *dip = px_p->px_dip; pcie_bus_t *bus_p; px_p->px_fm_cap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE | DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE; /* * check parents' capability */ ddi_fm_init(dip, &px_p->px_fm_cap, &px_p->px_fm_ibc); /* * parents need to be ereport and error handling capable */ ASSERT(px_p->px_fm_cap && (DDI_FM_ERRCB_CAPABLE | DDI_FM_EREPORT_CAPABLE)); /* * Initialize lock to synchronize fabric error handling */ mutex_init(&px_p->px_fm_mutex, NULL, MUTEX_DRIVER, (void *)px_p->px_fm_ibc); px_p->px_pfd_idx = 0; for (i = 0; i < 5; i++) pcie_rc_init_pfd(dip, &px_p->px_pfd_arr[i]); PCIE_DIP2PFD(dip) = px_p->px_pfd_arr; bus_p = PCIE_DIP2BUS(dip); bus_p->bus_rp_bdf = px_p->px_bdf; bus_p->bus_rp_dip = dip; return (DDI_SUCCESS); } /* * Deregister FMA */ void px_fm_detach(px_t *px_p) { int i; mutex_destroy(&px_p->px_fm_mutex); ddi_fm_fini(px_p->px_dip); for (i = 0; i < 5; i++) pcie_rc_fini_pfd(&px_p->px_pfd_arr[i]); } /* * register error callback in parent */ void px_fm_cb_enable(px_t *px_p) { ddi_fm_handler_register(px_p->px_dip, px_fm_callback, px_p); } void px_fm_cb_disable(px_t *px_p) { ddi_fm_handler_unregister(px_p->px_dip); } /* * Function used to setup access functions depending on level of desired * protection. */ void px_fm_acc_setup(ddi_map_req_t *mp, dev_info_t *rdip, pci_regspec_t *rp) { uchar_t fflag; ndi_err_t *errp; ddi_acc_hdl_t *hp; ddi_acc_impl_t *ap; hp = mp->map_handlep; ap = (ddi_acc_impl_t *)hp->ah_platform_private; fflag = ap->ahi_common.ah_acc.devacc_attr_access; if (mp->map_op == DDI_MO_MAP_LOCKED) { ndi_fmc_insert(rdip, ACC_HANDLE, (void *)hp, NULL); switch (fflag) { case DDI_FLAGERR_ACC: ap->ahi_get8 = i_ddi_prot_get8; ap->ahi_get16 = i_ddi_prot_get16; ap->ahi_get32 = i_ddi_prot_get32; ap->ahi_get64 = i_ddi_prot_get64; ap->ahi_put8 = i_ddi_prot_put8; ap->ahi_put16 = i_ddi_prot_put16; ap->ahi_put32 = i_ddi_prot_put32; ap->ahi_put64 = i_ddi_prot_put64; ap->ahi_rep_get8 = i_ddi_prot_rep_get8; ap->ahi_rep_get16 = i_ddi_prot_rep_get16; ap->ahi_rep_get32 = i_ddi_prot_rep_get32; ap->ahi_rep_get64 = i_ddi_prot_rep_get64; ap->ahi_rep_put8 = i_ddi_prot_rep_put8; ap->ahi_rep_put16 = i_ddi_prot_rep_put16; ap->ahi_rep_put32 = i_ddi_prot_rep_put32; ap->ahi_rep_put64 = i_ddi_prot_rep_put64; impl_acc_err_init(hp); errp = ((ddi_acc_impl_t *)hp)->ahi_err; if ((rp->pci_phys_hi & PCI_REG_ADDR_M) == PCI_ADDR_CONFIG) errp->err_cf = px_err_cfg_hdl_check; else errp->err_cf = px_err_pio_hdl_check; break; case DDI_CAUTIOUS_ACC : ap->ahi_get8 = i_ddi_caut_get8; ap->ahi_get16 = i_ddi_caut_get16; ap->ahi_get32 = i_ddi_caut_get32; ap->ahi_get64 = i_ddi_caut_get64; ap->ahi_put8 = i_ddi_caut_put8; ap->ahi_put16 = i_ddi_caut_put16; ap->ahi_put32 = i_ddi_caut_put32; ap->ahi_put64 = i_ddi_caut_put64; ap->ahi_rep_get8 = i_ddi_caut_rep_get8; ap->ahi_rep_get16 = i_ddi_caut_rep_get16; ap->ahi_rep_get32 = i_ddi_caut_rep_get32; ap->ahi_rep_get64 = i_ddi_caut_rep_get64; ap->ahi_rep_put8 = i_ddi_caut_rep_put8; ap->ahi_rep_put16 = i_ddi_caut_rep_put16; ap->ahi_rep_put32 = i_ddi_caut_rep_put32; ap->ahi_rep_put64 = i_ddi_caut_rep_put64; impl_acc_err_init(hp); errp = ((ddi_acc_impl_t *)hp)->ahi_err; if ((rp->pci_phys_hi & PCI_REG_ADDR_M) == PCI_ADDR_CONFIG) errp->err_cf = px_err_cfg_hdl_check; else errp->err_cf = px_err_pio_hdl_check; break; default: /* Illegal state, remove the handle from cache */ ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp); break; } } else if (mp->map_op == DDI_MO_UNMAP) { ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp); } } /* * Function used to initialize FMA for our children nodes. Called * through pci busops when child node calls ddi_fm_init. */ /*ARGSUSED*/ int px_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap, ddi_iblock_cookie_t *ibc_p) { px_t *px_p = DIP_TO_STATE(dip); ASSERT(ibc_p != NULL); *ibc_p = px_p->px_fm_ibc; return (px_p->px_fm_cap); } /* * lock access for exclusive PCIe access */ void px_bus_enter(dev_info_t *dip, ddi_acc_handle_t handle) { px_pec_t *pec_p = ((px_t *)DIP_TO_STATE(dip))->px_pec_p; /* * Exclusive access has been used for cautious put/get, * Both utilize i_ddi_ontrap which, on sparcv9, implements * similar protection as what on_trap() does, and which calls * membar #Sync to flush out all cpu deferred errors * prior to get/put operation, so here we're not calling * membar #Sync - a difference from what's in pci_bus_enter(). */ mutex_enter(&pec_p->pec_pokefault_mutex); pec_p->pec_acc_hdl = handle; } /* * unlock access for exclusive PCIe access */ /* ARGSUSED */ void px_bus_exit(dev_info_t *dip, ddi_acc_handle_t handle) { px_t *px_p = DIP_TO_STATE(dip); px_pec_t *pec_p = px_p->px_pec_p; pec_p->pec_acc_hdl = NULL; mutex_exit(&pec_p->pec_pokefault_mutex); } static uint64_t px_in_addr_range(dev_info_t *dip, pci_ranges_t *ranges_p, uint64_t addr) { uint64_t addr_low, addr_high; addr_low = (uint64_t)(ranges_p->parent_high & px_ranges_phi_mask) << 32; addr_low |= (uint64_t)ranges_p->parent_low; addr_high = addr_low + ((uint64_t)ranges_p->size_high << 32) + (uint64_t)ranges_p->size_low; DBG(DBG_ERR_INTR, dip, "Addr: 0x%llx high: 0x%llx low: 0x%llx\n", addr, addr_high, addr_low); if ((addr < addr_high) && (addr >= addr_low)) return (addr_low); return (0); } /* * PCI error callback which is registered with our parent to call * for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors * and PCI BERR/TO/UE on IO Loads. */ /*ARGSUSED*/ int px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data) { dev_info_t *pdip = ddi_get_parent(dip); px_t *px_p = (px_t *)impl_data; int i, acc_type = 0; int lookup, rc_err, fab_err; uint64_t addr, base_addr; uint64_t fault_addr = (uint64_t)derr->fme_bus_specific; pcie_req_id_t bdf = PCIE_INVALID_BDF; pci_ranges_t *ranges_p; int range_len; pf_data_t *pfd_p; /* * If the current thread already owns the px_fm_mutex, then we * have encountered an error while processing a previous * error. Attempting to take the mutex again will cause the * system to deadlock. */ if (px_p->px_fm_mutex_owner == curthread) return (DDI_FM_FATAL); i_ddi_fm_handler_exit(pdip); if (px_fm_enter(px_p) != DDI_SUCCESS) { i_ddi_fm_handler_enter(pdip); return (DDI_FM_FATAL); } /* * Make sure this failed load came from this PCIe port. Check by * matching the upper 32 bits of the address with the ranges property. */ range_len = px_p->px_ranges_length / sizeof (pci_ranges_t); i = 0; for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) { base_addr = px_in_addr_range(dip, ranges_p, fault_addr); if (base_addr) { switch (ranges_p->child_high & PCI_ADDR_MASK) { case PCI_ADDR_CONFIG: acc_type = PF_ADDR_CFG; addr = 0; bdf = (pcie_req_id_t)((fault_addr >> 12) & 0xFFFF); break; case PCI_ADDR_IO: case PCI_ADDR_MEM64: case PCI_ADDR_MEM32: acc_type = PF_ADDR_PIO; addr = fault_addr - base_addr; bdf = PCIE_INVALID_BDF; break; } break; } } /* This address doesn't belong to this leaf, just return with OK */ if (!acc_type) { px_fm_exit(px_p); i_ddi_fm_handler_enter(pdip); return (DDI_FM_OK); } rc_err = px_err_cmn_intr(px_p, derr, PX_TRAP_CALL, PX_FM_BLOCK_ALL); lookup = pf_hdl_lookup(dip, derr->fme_ena, acc_type, (uint64_t)addr, bdf); pfd_p = px_rp_en_q(px_p, bdf, addr, (PCI_STAT_R_MAST_AB | PCI_STAT_R_TARG_AB)); PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_DATA; /* Update affected info, either addr or bdf is not NULL */ if (addr) { PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_ADDR; } else if (PCIE_CHECK_VALID_BDF(bdf)) { PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = PF_AFFECTED_BDF; PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = bdf; } fab_err = px_scan_fabric(px_p, dip, derr); px_fm_exit(px_p); i_ddi_fm_handler_enter(pdip); if (!px_die) return (DDI_FM_OK); if ((rc_err & (PX_PANIC | PX_PROTECTED)) || (fab_err & PF_ERR_FATAL_FLAGS) || (lookup == PF_HDL_NOTFOUND)) return (DDI_FM_FATAL); else if ((rc_err == PX_NO_ERROR) && (fab_err == PF_ERR_NO_ERROR)) return (DDI_FM_OK); return (DDI_FM_NONFATAL); } /* * px_err_fabric_intr: * Interrupt handler for PCIE fabric block. * o lock * o create derr * o px_err_cmn_intr(leaf, with jbc) * o send ereport(fire fmri, derr, payload = BDF) * o dispatch (leaf) * o unlock * o handle error: fatal? fm_panic() : return INTR_CLAIMED) */ /* ARGSUSED */ uint_t px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, pcie_req_id_t rid) { dev_info_t *rpdip = px_p->px_dip; int rc_err, fab_err; ddi_fm_error_t derr; uint32_t rp_status; uint16_t ce_source, ue_source; pf_data_t *pfd_p; if (px_fm_enter(px_p) != DDI_SUCCESS) goto done; /* Create the derr */ bzero(&derr, sizeof (ddi_fm_error_t)); derr.fme_version = DDI_FME_VERSION; derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); derr.fme_flag = DDI_FM_ERR_UNEXPECTED; px_err_safeacc_check(px_p, &derr); if (msg_code == PCIE_MSG_CODE_ERR_COR) { rp_status = PCIE_AER_RE_STS_CE_RCVD; ce_source = rid; ue_source = 0; } else { rp_status = PCIE_AER_RE_STS_FE_NFE_RCVD; ce_source = 0; ue_source = rid; if (msg_code == PCIE_MSG_CODE_ERR_NONFATAL) rp_status |= PCIE_AER_RE_STS_NFE_MSGS_RCVD; else { rp_status |= PCIE_AER_RE_STS_FE_MSGS_RCVD; rp_status |= PCIE_AER_RE_STS_FIRST_UC_FATAL; } } if (derr.fme_flag == DDI_FM_ERR_UNEXPECTED) { ddi_fm_ereport_post(rpdip, PCI_ERROR_SUBCLASS "." PCIEX_FABRIC, derr.fme_ena, DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8, 0, FIRE_PRIMARY, DATA_TYPE_BOOLEAN_VALUE, B_TRUE, "pcie_adv_rp_status", DATA_TYPE_UINT32, rp_status, "pcie_adv_rp_command", DATA_TYPE_UINT32, 0, "pcie_adv_rp_ce_src_id", DATA_TYPE_UINT16, ce_source, "pcie_adv_rp_ue_src_id", DATA_TYPE_UINT16, ue_source, NULL); } /* Ensure that the rid of the fabric message will get scanned. */ pfd_p = px_rp_en_q(px_p, rid, 0, 0); PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_FABRIC; rc_err = px_err_cmn_intr(px_p, &derr, PX_INTR_CALL, PX_FM_BLOCK_PCIE); /* call rootport dispatch */ fab_err = px_scan_fabric(px_p, rpdip, &derr); px_err_panic(rc_err, PX_RC, fab_err, B_TRUE); px_fm_exit(px_p); px_err_panic(rc_err, PX_RC, fab_err, B_FALSE); done: return (DDI_INTR_CLAIMED); } /* * px_scan_fabric: * * Check for drain state and if there is anything to scan. * * Note on pfd: Different interrupts will populate the pfd's differently. The * px driver can have a total of 5 different error sources, so it has a queue of * 5 pfds. Each valid PDF is linked together and passed to pf_scan_fabric. * * Each error handling will populate the following info in the pfd * * Root Fault Intr Src Affected BDF * ----------------+---------------+------------ * Callback/CPU Trap Address/BDF |DATA |Lookup Addr * Mondo 62/63 (sun4u) decode error |N/A |N/A * EPKT (sun4v) decode epkt |INTERNAL |decode epkt * Fabric Message fabric payload |FABRIC |NULL * Peek/Poke Address/BDF |NULL |NULL * ----------------+---------------+------------ */ int px_scan_fabric(px_t *px_p, dev_info_t *rpdip, ddi_fm_error_t *derr) { int fab_err = 0; ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); if (!px_lib_is_in_drain_state(px_p) && px_p->px_pfd_idx) { fab_err = pf_scan_fabric(rpdip, derr, px_p->px_pfd_arr); } return (fab_err); } /* * px_err_safeacc_check: * Check to see if a peek/poke and cautious access is currently being * done on a particular leaf. * * Safe access reads induced fire errors will be handled by cpu trap handler * which will call px_fm_callback() which calls this function. In that * case, the derr fields will be set by trap handler with the correct values. * * Safe access writes induced errors will be handled by px interrupt * handlers, this function will fill in the derr fields. * * If a cpu trap does occur, it will quiesce all other interrupts allowing * the cpu trap error handling to finish before Fire receives an interrupt. * * If fire does indeed have an error when a cpu trap occurs as a result of * a safe access, a trap followed by a Mondo/Fabric interrupt will occur. * In which case derr will be initialized as "UNEXPECTED" by the interrupt * handler and this function will need to find if this error occured in the * middle of a safe access operation. * * @param px_p leaf in which to check access * @param derr fm err data structure to be updated */ void px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr) { px_pec_t *pec_p = px_p->px_pec_p; int acctype = pec_p->pec_safeacc_type; ASSERT(MUTEX_HELD(&px_p->px_fm_mutex)); if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) { return; } /* safe access checking */ switch (acctype) { case DDI_FM_ERR_EXPECTED: /* * cautious access protection, protected from all err. */ ddi_fm_acc_err_get(pec_p->pec_acc_hdl, derr, DDI_FME_VERSION); derr->fme_flag = acctype; derr->fme_acc_handle = pec_p->pec_acc_hdl; break; case DDI_FM_ERR_POKE: /* * ddi_poke protection, check nexus and children for * expected errors. */ membar_sync(); derr->fme_flag = acctype; break; case DDI_FM_ERR_PEEK: derr->fme_flag = acctype; break; } } /* * Suggest panic if any EQ (except CE q) has overflown. */ int px_err_check_eq(dev_info_t *dip) { px_t *px_p = DIP_TO_STATE(dip); px_msiq_state_t *msiq_state_p = &px_p->px_ib_p->ib_msiq_state; px_pec_t *pec_p = px_p->px_pec_p; msiqid_t eq_no = msiq_state_p->msiq_1st_msiq_id; pci_msiq_state_t msiq_state; int i; for (i = 0; i < msiq_state_p->msiq_cnt; i++) { if (i + eq_no == pec_p->pec_corr_msg_msiq_id) /* skip CE q */ continue; if ((px_lib_msiq_getstate(dip, i + eq_no, &msiq_state) != DDI_SUCCESS) || msiq_state == PCI_MSIQ_STATE_ERROR) return (PX_PANIC); } return (PX_NO_PANIC); } /* ARGSUSED */ int px_err_check_pcie(dev_info_t *dip, ddi_fm_error_t *derr, px_err_pcie_t *regs, pf_intr_type_t intr_type) { px_t *px_p = DIP_TO_STATE(dip); pf_data_t *pfd_p = px_get_pfd(px_p); int i; pf_pcie_adv_err_regs_t *adv_reg = PCIE_ADV_REG(pfd_p); PCIE_ROOT_EH_SRC(pfd_p)->intr_type = intr_type; /* * set RC s_status in PCI term to coordinate with downstream fabric * errors ananlysis. */ if (regs->primary_ue & PCIE_AER_UCE_UR) PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = PCI_STAT_R_MAST_AB; if (regs->primary_ue & PCIE_AER_UCE_CA) PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = PCI_STAT_R_TARG_AB; if (regs->primary_ue & (PCIE_AER_UCE_PTLP | PCIE_AER_UCE_ECRC)) PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = PCI_STAT_PERROR; if (!regs->primary_ue) goto done; adv_reg->pcie_ce_status = regs->ce_reg; adv_reg->pcie_ue_status = regs->ue_reg | regs->primary_ue; PCIE_ADV_HDR(pfd_p, 0) = regs->rx_hdr1; PCIE_ADV_HDR(pfd_p, 1) = regs->rx_hdr2; PCIE_ADV_HDR(pfd_p, 2) = regs->rx_hdr3; PCIE_ADV_HDR(pfd_p, 3) = regs->rx_hdr4; for (i = regs->primary_ue; i != 1; i = i >> 1) adv_reg->pcie_adv_ctl++; if (regs->primary_ue & (PCIE_AER_UCE_UR | PCIE_AER_UCE_CA)) { if (pf_tlp_decode(PCIE_DIP2BUS(dip), adv_reg) == DDI_SUCCESS) PCIE_ROOT_FAULT(pfd_p)->scan_bdf = adv_reg->pcie_ue_tgt_bdf; } else if (regs->primary_ue & PCIE_AER_UCE_PTLP) { if (pf_tlp_decode(PCIE_DIP2BUS(dip), adv_reg) == DDI_SUCCESS) { PCIE_ROOT_FAULT(pfd_p)->scan_bdf = adv_reg->pcie_ue_tgt_bdf; if (adv_reg->pcie_ue_tgt_trans == PF_ADDR_PIO) PCIE_ROOT_FAULT(pfd_p)->scan_addr = adv_reg->pcie_ue_tgt_addr; } /* * Normally for Poisoned Completion TLPs we can look at the * transmit log header for the original request and the original * address, however this doesn't seem to be working. HW BUG. */ } done: px_pcie_log(dip, regs); /* Return No Error here and let the pcie misc module analyse it */ return (PX_NO_ERROR); } #if defined(DEBUG) static void px_pcie_log(dev_info_t *dip, px_err_pcie_t *regs) { DBG(DBG_ERR_INTR, dip, "A PCIe RC error has occured\n" "\tCE: 0x%x UE: 0x%x Primary UE: 0x%x\n" "\tTX Hdr: 0x%x 0x%x 0x%x 0x%x\n\tRX Hdr: 0x%x 0x%x 0x%x 0x%x\n", regs->ce_reg, regs->ue_reg, regs->primary_ue, regs->tx_hdr1, regs->tx_hdr2, regs->tx_hdr3, regs->tx_hdr4, regs->rx_hdr1, regs->rx_hdr2, regs->rx_hdr3, regs->rx_hdr4); } #endif /* * look through poisoned TLP cases and suggest panic/no panic depend on * handle lookup. */ static int px_pcie_ptlp(dev_info_t *dip, ddi_fm_error_t *derr, px_err_pcie_t *regs) { pf_pcie_adv_err_regs_t adv_reg; pcie_req_id_t bdf; uint64_t addr; uint32_t trans_type; int tlp_sts, tlp_cmd; int lookup = PF_HDL_NOTFOUND; if (regs->primary_ue != PCIE_AER_UCE_PTLP) return (PX_PANIC); if (!regs->rx_hdr1) goto done; adv_reg.pcie_ue_hdr[0] = regs->rx_hdr1; adv_reg.pcie_ue_hdr[1] = regs->rx_hdr2; adv_reg.pcie_ue_hdr[2] = regs->rx_hdr3; adv_reg.pcie_ue_hdr[3] = regs->rx_hdr4; tlp_sts = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg); tlp_cmd = ((pcie_tlp_hdr_t *)(adv_reg.pcie_ue_hdr))->type; if (tlp_sts == DDI_FAILURE) goto done; bdf = adv_reg.pcie_ue_tgt_bdf; addr = adv_reg.pcie_ue_tgt_addr; trans_type = adv_reg.pcie_ue_tgt_trans; switch (tlp_cmd) { case PCIE_TLP_TYPE_CPL: case PCIE_TLP_TYPE_CPLLK: /* * Usually a PTLP is a CPL with data. Grab the completer BDF * from the RX TLP, and the original address from the TX TLP. */ if (regs->tx_hdr1) { adv_reg.pcie_ue_hdr[0] = regs->tx_hdr1; adv_reg.pcie_ue_hdr[1] = regs->tx_hdr2; adv_reg.pcie_ue_hdr[2] = regs->tx_hdr3; adv_reg.pcie_ue_hdr[3] = regs->tx_hdr4; lookup = pf_tlp_decode(PCIE_DIP2BUS(dip), &adv_reg); if (lookup != DDI_SUCCESS) break; addr = adv_reg.pcie_ue_tgt_addr; trans_type = adv_reg.pcie_ue_tgt_trans; } /* FALLTHRU */ case PCIE_TLP_TYPE_IO: case PCIE_TLP_TYPE_MEM: case PCIE_TLP_TYPE_MEMLK: lookup = pf_hdl_lookup(dip, derr->fme_ena, trans_type, addr, bdf); break; default: lookup = PF_HDL_NOTFOUND; } done: return (lookup == PF_HDL_FOUND ? PX_NO_PANIC : PX_PANIC); } /* * px_get_pdf automatically allocates a RC pf_data_t and returns a pointer to * it. This function should be used when an error requires a fabric scan. */ pf_data_t * px_get_pfd(px_t *px_p) { int idx = px_p->px_pfd_idx++; pf_data_t *pfd_p = &px_p->px_pfd_arr[idx]; /* Clear Old Data */ PCIE_ROOT_FAULT(pfd_p)->scan_bdf = PCIE_INVALID_BDF; PCIE_ROOT_FAULT(pfd_p)->scan_addr = 0; PCIE_ROOT_EH_SRC(pfd_p)->intr_type = PF_INTR_TYPE_NONE; PCIE_ROOT_EH_SRC(pfd_p)->intr_data = NULL; PFD_AFFECTED_DEV(pfd_p)->pe_affected_flags = 0; PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF; PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = 0; PCIE_ADV_REG(pfd_p)->pcie_ce_status = 0; PCIE_ADV_REG(pfd_p)->pcie_ue_status = 0; PCIE_ADV_REG(pfd_p)->pcie_adv_ctl = 0; pfd_p->pe_next = NULL; if (idx > 0) { px_p->px_pfd_arr[idx - 1].pe_next = pfd_p; pfd_p->pe_prev = &px_p->px_pfd_arr[idx - 1]; } else { pfd_p->pe_prev = NULL; } pfd_p->pe_severity_flags = 0; pfd_p->pe_severity_mask = 0; pfd_p->pe_orig_severity_flags = 0; pfd_p->pe_valid = B_TRUE; return (pfd_p); } /* * This function appends a pf_data structure to the error q which is used later * during PCIe fabric scan. It signifies: * o errs rcvd in RC, that may have been propagated to/from the fabric * o the fabric scan code should scan the device path of fault bdf/addr * * scan_bdf: The bdf that caused the fault, which may have error bits set. * scan_addr: The PIO addr that caused the fault, such as failed PIO, but not * failed DMAs. * s_status: Secondary Status equivalent to why the fault occured. * (ie S-TA/MA, R-TA) * Either the scan bdf or addr may be NULL, but not both. */ pf_data_t * px_rp_en_q(px_t *px_p, pcie_req_id_t scan_bdf, uint32_t scan_addr, uint16_t s_status) { pf_data_t *pfd_p; if (!PCIE_CHECK_VALID_BDF(scan_bdf) && !scan_addr) return (NULL); pfd_p = px_get_pfd(px_p); PCIE_ROOT_FAULT(pfd_p)->scan_bdf = scan_bdf; PCIE_ROOT_FAULT(pfd_p)->scan_addr = (uint64_t)scan_addr; PCI_BDG_ERR_REG(pfd_p)->pci_bdg_sec_stat = s_status; return (pfd_p); } /* * Find and Mark CFG Handles as failed associated with the given BDF. We should * always know the BDF for CFG accesses, since it is encoded in the address of * the TLP. Since there can be multiple cfg handles, mark them all as failed. */ /* ARGSUSED */ int px_err_cfg_hdl_check(dev_info_t *dip, const void *handle, const void *arg1, const void *arg2) { int status = DDI_FM_FATAL; uint32_t addr = *(uint32_t *)arg1; uint16_t bdf = *(uint16_t *)arg2; pcie_bus_t *bus_p; DBG(DBG_ERR_INTR, dip, "Check CFG Hdl: dip 0x%p addr 0x%x bdf=0x%x\n", dip, addr, bdf); bus_p = PCIE_DIP2BUS(dip); /* * Because CFG and IO Acc Handlers are on the same cache list and both * types of hdls gets called for both types of errors. For this checker * only mark the device as "Non-Fatal" if the addr == NULL and bdf != * NULL. */ status = (!addr && (PCIE_CHECK_VALID_BDF(bdf) && (bus_p->bus_bdf == bdf))) ? DDI_FM_NONFATAL : DDI_FM_FATAL; return (status); } /* * Find and Mark all ACC Handles associated with a give address and BDF as * failed. If the BDF != NULL, then check to see if the device has a ACC Handle * associated with ADDR. If the handle is not found, mark all the handles as * failed. If the BDF == NULL, mark the handle as failed if it is associated * with ADDR. */ int px_err_pio_hdl_check(dev_info_t *dip, const void *handle, const void *arg1, const void *arg2) { dev_info_t *px_dip; px_t *px_p; pci_ranges_t *ranges_p; int range_len; ddi_acc_handle_t ap = (ddi_acc_handle_t)handle; ddi_acc_hdl_t *hp = impl_acc_hdl_get(ap); int i, status = DDI_FM_FATAL; uint64_t fault_addr = *(uint64_t *)arg1; uint16_t bdf = *(uint16_t *)arg2; uint64_t base_addr, range_addr; uint_t size; /* * Find the correct px dip. On system with a real Root Port, it's the * node above the root port. On systems without a real Root Port the px * dip is the bus_rp_dip. */ px_dip = PCIE_DIP2BUS(dip)->bus_rp_dip; if (!PCIE_IS_RC(PCIE_DIP2BUS(px_dip))) px_dip = ddi_get_parent(px_dip); ASSERT(PCIE_IS_RC(PCIE_DIP2BUS(px_dip))); px_p = INST_TO_STATE(ddi_get_instance(px_dip)); DBG(DBG_ERR_INTR, dip, "Check PIO Hdl: dip 0x%x addr 0x%x bdf=0x%x\n", dip, fault_addr, bdf); /* Normalize the base addr to the addr and strip off the HB info. */ base_addr = (hp->ah_pfn << MMU_PAGESHIFT) + hp->ah_offset; range_len = px_p->px_ranges_length / sizeof (pci_ranges_t); i = 0; for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) { range_addr = px_in_addr_range(dip, ranges_p, base_addr); if (range_addr) { switch (ranges_p->child_high & PCI_ADDR_MASK) { case PCI_ADDR_IO: case PCI_ADDR_MEM64: case PCI_ADDR_MEM32: base_addr = base_addr - range_addr; break; } break; } } /* * Mark the handle as failed if the ADDR is mapped, or if we * know the BDF and ADDR == 0. */ size = hp->ah_len; if (((fault_addr >= base_addr) && (fault_addr < (base_addr + size))) || ((fault_addr == 0) && (PCIE_CHECK_VALID_BDF(bdf) && (bdf == PCIE_DIP2BUS(dip)->bus_bdf)))) status = DDI_FM_NONFATAL; return (status); } /* * Find and Mark all DNA Handles associated with a give address and BDF as * failed. If the BDF != NULL, then check to see if the device has a DMA Handle * associated with ADDR. If the handle is not found, mark all the handles as * failed. If the BDF == NULL, mark the handle as failed if it is associated * with ADDR. */ int px_err_dma_hdl_check(dev_info_t *dip, const void *handle, const void *arg1, const void *arg2) { ddi_dma_impl_t *pcie_dp; int status = DDI_FM_FATAL; uint32_t addr = *(uint32_t *)arg1; uint16_t bdf = *(uint16_t *)arg2; uint32_t base_addr; uint_t size; DBG(DBG_ERR_INTR, dip, "Check PIO Hdl: dip 0x%x addr 0x%x bdf=0x%x\n", dip, addr, bdf); pcie_dp = (ddi_dma_impl_t *)handle; base_addr = (uint32_t)pcie_dp->dmai_mapping; size = pcie_dp->dmai_size; /* * Mark the handle as failed if the ADDR is mapped, or if we * know the BDF and ADDR == 0. */ if (((addr >= base_addr) && (addr < (base_addr + size))) || ((addr == 0) && PCIE_CHECK_VALID_BDF(bdf))) status = DDI_FM_NONFATAL; return (status); } int px_fm_enter(px_t *px_p) { if (px_panicing || (px_p->px_fm_mutex_owner == curthread)) return (DDI_FAILURE); mutex_enter(&px_p->px_fm_mutex); /* * In rare cases when trap occurs and in the middle of scanning the * fabric, a PIO will fail in the scan fabric. The CPU error handling * code will correctly panic the system, while a mondo for the failed * PIO may also show up. Normally the mondo will try to grab the mutex * and wait until the callback finishes. But in this rare case, * mutex_enter actually suceeds also continues to scan the fabric. * * This code below is designed specifically to check for this case. If * we successfully grab the px_fm_mutex, the px_fm_mutex_owner better be * NULL. If it isn't that means we are in the rare corner case. Return * DDI_FAILURE, this should prevent PX from doing anymore error * handling. */ if (px_p->px_fm_mutex_owner) { return (DDI_FAILURE); } px_p->px_fm_mutex_owner = curthread; if (px_panicing) { px_fm_exit(px_p); return (DDI_FAILURE); } /* Signal the PCIe error handling module error handling is starting */ pf_eh_enter(PCIE_DIP2BUS(px_p->px_dip)); return (DDI_SUCCESS); } static void px_guest_panic(px_t *px_p) { pf_data_t *root_pfd_p = PCIE_DIP2PFD(px_p->px_dip); pf_data_t *pfd_p; pcie_bus_t *bus_p, *root_bus_p; pcie_req_id_list_t *rl; /* * check if all devices under the root device are unassigned. * this function should quickly return in non-IOV environment. */ root_bus_p = PCIE_PFD2BUS(root_pfd_p); if (PCIE_BDG_IS_UNASSIGNED(root_bus_p)) return; for (pfd_p = root_pfd_p; pfd_p; pfd_p = pfd_p->pe_next) { bus_p = PCIE_PFD2BUS(pfd_p); /* assume all affected devs were in the error Q */ if (!PCIE_BUS2DOM(bus_p)->nfma_panic) continue; if (PCIE_IS_BDG(bus_p)) { rl = PCIE_BDF_LIST_GET(bus_p); while (rl) { px_panic_domain(px_p, rl->bdf); rl = rl->next; } } else { px_panic_domain(px_p, bus_p->bus_bdf); } /* clear panic flag */ PCIE_BUS2DOM(bus_p)->nfma_panic = B_FALSE; } } void px_fm_exit(px_t *px_p) { px_p->px_fm_mutex_owner = NULL; if (px_p->px_pfd_idx == 0) { mutex_exit(&px_p->px_fm_mutex); return; } /* panic the affected domains that are non-fma-capable */ px_guest_panic(px_p); /* Signal the PCIe error handling module error handling is ending */ pf_eh_exit(PCIE_DIP2BUS(px_p->px_dip)); px_p->px_pfd_idx = 0; mutex_exit(&px_p->px_fm_mutex); } /* * Panic if the err tunable is set and that we are not already in the middle * of panic'ing. * * rc_err = Error severity of PX specific errors * msg = Where the error was detected * fabric_err = Error severity of PCIe Fabric errors * isTest = Test if error severity causes panic */ #define MSZ (sizeof (fm_msg) -strlen(fm_msg) - 1) void px_err_panic(int rc_err, int msg, int fabric_err, boolean_t isTest) { char fm_msg[96] = ""; int ferr = PX_NO_ERROR; if (panicstr) { px_panicing = B_TRUE; return; } if (!(rc_err & px_die)) goto fabric; if (msg & PX_RC) (void) strncat(fm_msg, px_panic_rc_msg, MSZ); if (msg & PX_RP) (void) strncat(fm_msg, px_panic_rp_msg, MSZ); if (msg & PX_HB) (void) strncat(fm_msg, px_panic_hb_msg, MSZ); fabric: if (fabric_err & PF_ERR_FATAL_FLAGS) ferr = PX_PANIC; else if (fabric_err & ~(PF_ERR_FATAL_FLAGS | PF_ERR_NO_ERROR)) ferr = PX_NO_PANIC; if (ferr & px_die) { if (strlen(fm_msg)) { (void) strncat(fm_msg, " and", MSZ); } (void) strncat(fm_msg, px_panic_fab_msg, MSZ); } if (strlen(fm_msg)) { px_panicing = B_TRUE; if (!isTest) fm_panic("Fatal error has occured in:%s.(0x%x)(0x%x)", fm_msg, rc_err, fabric_err); } }