/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include #include #include #include #include /* * Pointer to platform specific function to initialize a cache of DIMM * serial ids */ int (*p2init_sid_cache)(void); /* * This file contains the common code that is used for parsing * ecc unum data and logging it appropriately as the platform * that calls this code implements. */ int plat_ecc_dispatch_task(plat_ecc_message_t *); static void plat_ecc_send_msg(void *); #define CHECK_UNUM \ if (unum_ptr == NULL) { \ break; \ } /* * See plat_ecc_unum.h for the meaning of these variables. */ int ecc_log_fruid_enable = ECC_FRUID_ENABLE_DEFAULT; uint32_t plat_ecc_capability_map_domain = PLAT_ECC_CAPABILITY_DOMAIN_DEFAULT; uint32_t plat_ecc_capability_map_sc = PLAT_ECC_CAPABILITY_SC_DEFAULT; uint16_t ecc_error2_mailbox_flags = PLAT_ECC_ERROR2_SEND_DEFAULT; uint16_t ecc_indictment2_mailbox_flags = PLAT_ECC_SEND_INDICT2_DEFAULT; /* * We log all ECC errors using the function that is defined as * plat_send_ecc_mailbox_msg(); We first parse the unum string and * then pass the data to be logged to the plat_send_ecc_mailbox_msg * function for logging. Each platform that uses this code needs to * implement a suitable function for this purpose. */ void plat_log_fruid_error(int synd_code, struct async_flt *ecc, char *unum, uint64_t afsr_bit) { plat_ecc_error_data_t ecc_error_data; enum plat_ecc_type ecc_type = PLAT_ECC_UNKNOWN; int board_num; int proc_position; int invalid_unum = 1; bzero(&ecc_error_data, sizeof (plat_ecc_error_data_t)); ecc_error_data.version = PLAT_ECC_VERSION; switch (afsr_bit) { case C_AFSR_CE: ecc_error_data.error_code = PLAT_ERROR_CODE_CE; break; case C_AFSR_UE: ecc_error_data.error_code = PLAT_ERROR_CODE_UE; break; case C_AFSR_EDC: ecc_error_data.error_code = PLAT_ERROR_CODE_EDC; break; case C_AFSR_EDU: ecc_error_data.error_code = PLAT_ERROR_CODE_EDU; break; case C_AFSR_WDC: ecc_error_data.error_code = PLAT_ERROR_CODE_WDC; break; case C_AFSR_WDU: ecc_error_data.error_code = PLAT_ERROR_CODE_WDU; break; case C_AFSR_CPC: ecc_error_data.error_code = PLAT_ERROR_CODE_CPC; break; case C_AFSR_CPU: ecc_error_data.error_code = PLAT_ERROR_CODE_CPU; break; case C_AFSR_UCC: ecc_error_data.error_code = PLAT_ERROR_CODE_UCC; break; case C_AFSR_UCU: ecc_error_data.error_code = PLAT_ERROR_CODE_UCU; break; case C_AFSR_EMC: ecc_error_data.error_code = PLAT_ERROR_CODE_EMC; break; case C_AFSR_EMU: ecc_error_data.error_code = PLAT_ERROR_CODE_EMU; break; default: /* * Do not send messages with unknown error codes, since * the SC will not be able to tell what type of error * occurred. */ return; } ecc_error_data.detecting_proc = ecc->flt_bus_id; if (ecc->flt_in_memory) ecc_type = PLAT_ECC_MEMORY; else if (ecc->flt_status & ECC_ECACHE) ecc_type = PLAT_ECC_ECACHE; switch (ecc_type) { case PLAT_ECC_MEMORY: { /* * The unum string is expected to be in this form: * "/N0/SB12/P0/B0/D2 J13500, ..." * for serengeti. As this code is shared with Starcat * if N is missing then it is set to 0. * From that we will extract the bank number, dimm * number, and Jnumber. */ char *unum_ptr = unum; char *jno_ptr = ecc_error_data.Jnumber; int i; /* * On Serengeti we expect to find 'N' in the unum string * however, on Starcat 'N' does not appear in the unum string. * We do not want this code to break at this point, so the * unum_ptr is reset to the start of unum string if we fail * to find an 'N'. */ unum_ptr = strchr(unum_ptr, 'N'); if (unum_ptr == NULL) { ecc_error_data.node_no = 0; unum_ptr = unum; } else { unum_ptr++; ecc_error_data.node_no = stoi(&unum_ptr); } /* * Now pull out the SB number */ unum_ptr = strstr(unum_ptr, "SB"); CHECK_UNUM; unum_ptr += 2; board_num = stoi(&unum_ptr); /* * Now pull out the Proc position (relative to the board) */ unum_ptr = strchr(unum_ptr, 'P'); CHECK_UNUM; unum_ptr++; proc_position = stoi(&unum_ptr); /* * Using the SB number and Proc position we create a FRU * cpu id. */ ecc_error_data.proc_num = plat_make_fru_cpuid(board_num, 0, proc_position); /* * Now pull out the Memory Bank number */ unum_ptr = strchr(unum_ptr, 'B'); CHECK_UNUM; unum_ptr++; ecc_error_data.bank_no = (stoi(&unum_ptr) & 0x01); /* * Now pull out the Dimm number within the Memory Bank. */ unum_ptr = strchr(unum_ptr, 'D'); CHECK_UNUM; unum_ptr++; ecc_error_data.ecache_dimm_no = (stoi(&unum_ptr) & 0x03); /* * Now pull out the J-number. */ unum_ptr = strchr(unum_ptr, 'J'); CHECK_UNUM; unum_ptr++; for (i = PLAT_ECC_JNUMBER_LENGTH; i > 0 && *unum_ptr >= '0' && *unum_ptr <= '9'; i--) *jno_ptr++ = *unum_ptr++; *jno_ptr = '\0'; /* * If we get here, we can assume the unum is valid */ invalid_unum = 0; break; } case PLAT_ECC_ECACHE: { /* * The unum string is expected to be in this form: * "[/N0/][SB|IO]12/P0/E0 J13500, ..." * for serengeti. As this code is shared with Starcat * if N is missing then it is set to 0. IO may only appear * on Starcats. From that we will extract the bank number, * dimm number, and Jnumber. */ char *unum_ptr = unum; char *jno_ptr = ecc_error_data.Jnumber; int is_maxcat = 0; int i; /* * On Serengeti we expect to find 'N' in the unum string * however, on Starcat 'N' does not appear in the unum string. * We do not want this code to break at this point, so the * unum_ptr is reset to the start of unum string if we fail * to find an 'N'. */ unum_ptr = strchr(unum_ptr, 'N'); if (unum_ptr == NULL) { ecc_error_data.node_no = 0; unum_ptr = unum; } else { unum_ptr++; ecc_error_data.node_no = stoi(&unum_ptr); } /* * Now pull out the SB/IO number */ unum_ptr = strstr(unum_ptr, "SB"); if (unum_ptr == NULL) { /* * Since this is an E$ error, it must have occurred on * either a System Board (represented by "SB" in the * unum string) or a Maxcat board ("IO" in the unum * string). Since we failed the "SB" check, we'll * assume this is a maxcat board. */ is_maxcat = 1; unum_ptr = strstr(unum, "IO"); } CHECK_UNUM; unum_ptr += 2; board_num = stoi(&unum_ptr); /* * Now pull out the Proc position (relative to the board) */ unum_ptr = strchr(unum_ptr, 'P'); CHECK_UNUM; unum_ptr++; proc_position = stoi(&unum_ptr); /* * Using the SB/IO number, slot 0/1 value (is_maxcat), and * proc position, we create the cpu id. */ ecc_error_data.proc_num = plat_make_fru_cpuid(board_num, is_maxcat, proc_position); ecc_error_data.bank_no = 0; /* not used */ unum_ptr = strchr(unum_ptr, 'E'); CHECK_UNUM; unum_ptr++; ecc_error_data.ecache_dimm_no = (stoi(&unum_ptr) & 0x01); unum_ptr = strchr(unum_ptr, 'J'); CHECK_UNUM; unum_ptr++; for (i = PLAT_ECC_JNUMBER_LENGTH; i > 0 && *unum_ptr >= '0' && *unum_ptr <= '9'; i--) *jno_ptr++ = *unum_ptr++; *jno_ptr = '\0'; /* * If we get here, we can assume the unum is valid */ invalid_unum = 0; break; } default: /* * Unknown error */ break; } /* * This is where CHECK_UNUM goes when it finds an error */ if (ECC_SYND_DATA_BEGIN <= synd_code && synd_code < ECC_SYND_ECC_BEGIN) { ecc_error_data.error_type = PLAT_ERROR_TYPE_SINGLE; ecc_error_data.databit_type = PLAT_BIT_TYPE_DATA; ecc_error_data.databit_no = synd_code; } else if (ECC_SYND_ECC_BEGIN <= synd_code && synd_code < ECC_SYND_MTAG_BEGIN) { ecc_error_data.error_type = PLAT_ERROR_TYPE_SINGLE; ecc_error_data.databit_type = PLAT_BIT_TYPE_ECC; ecc_error_data.databit_no = synd_code - ECC_SYND_ECC_BEGIN; } else if (ECC_SYND_MTAG_BEGIN <= synd_code && synd_code < ECC_SYND_MECC_BEGIN) { ecc_error_data.error_type = PLAT_ERROR_TYPE_SINGLE; ecc_error_data.databit_type = PLAT_BIT_TYPE_MTAG_D; ecc_error_data.databit_no = synd_code - ECC_SYND_MTAG_BEGIN; } else if (ECC_SYND_MECC_BEGIN <= synd_code && synd_code < ECC_SYND_M2) { ecc_error_data.error_type = PLAT_ERROR_TYPE_SINGLE; ecc_error_data.databit_type = PLAT_BIT_TYPE_MTAG_E; ecc_error_data.databit_no = synd_code - ECC_SYND_MECC_BEGIN; } else { switch (synd_code) { case ECC_SYND_M2: ecc_error_data.error_type = PLAT_ERROR_TYPE_M2; break; case ECC_SYND_M3: ecc_error_data.error_type = PLAT_ERROR_TYPE_M3; break; case ECC_SYND_M4: ecc_error_data.error_type = PLAT_ERROR_TYPE_M4; break; case ECC_SYND_M: ecc_error_data.error_type = PLAT_ERROR_TYPE_M; break; default: ecc_error_data.error_type = PLAT_ERROR_TYPE_UNK; break; } ecc_error_data.databit_type = PLAT_BIT_TYPE_MULTI; ecc_error_data.databit_no = 0; /* not used */ } #ifdef DEBUG if (invalid_unum && (ecc_error_data.error_code != PLAT_ERROR_CODE_UE) && unum && *unum) cmn_err(CE_WARN, "Unexpected unum string format: %s\n", unum); #endif /* * Send this data off as a mailbox message to the SC. */ (void) plat_send_ecc_mailbox_msg(PLAT_ECC_ERROR_MESSAGE, &ecc_error_data); } /* * The unum string for memory is expected to be in this form: * "[/N0/]SB12/P0/B0/D2 [J13500]" * Or if the unum was generated as the result of a UE: * "[/N0/]SB12/P0/B0 [J13500, ...]" * From that we will extract the board number, processor position, * bank number and jnumber. * * Return (1) for an invalid unum string. If the unum is for an * individual DIMM and there is no jnumber, jnumber will be set * to -1 and the caller can decide if the unum is valid. This * is because Serengeti does not have jnumbers for bank unums * which may be used to create DIMM unums (e.g. for acquiring * DIMM serial ids). */ int parse_unum_memory(char *unum, int *board, int *pos, int *bank, int *dimm, int *jnumber) { char *c; if ((c = strstr(unum, "SB")) == NULL) return (1); c += 2; *board = (uint8_t)stoi(&c); if (*c++ != '/' || *c++ != 'P') return (1); *pos = stoi(&c); if (*c++ != '/' || *c++ != 'B') return (1); *bank = stoi(&c); if ((c = strchr(c, 'D')) == NULL) { *dimm = -1; *jnumber = 0; return (0); } c++; *dimm = stoi(&c); if ((c = strchr(c, 'J')) == NULL) { *jnumber = -1; return (0); } c++; *jnumber = (uint16_t)stoi(&c); return (0); } /* * The unum string for ecache is expected to be in this form: * "[/N0/][SB|IO]12/P0/E0 J13500, ..." * From that we will extract the board number, processor position and * junmber. * * return (1) for any invalid unum string. */ static int parse_unum_ecache(char *unum, int *board, int *pos, int *jnumber, int *maxcat) { char *c; if ((c = strstr(unum, "SB")) == NULL) { /* * Since this is an E$ error, it must have occurred on * either a System Board (represented by "SB" in the * unum string) or a Maxcat board ("IO" in the unum * string). */ if ((c = strstr(unum, "IO")) == NULL) return (1); *maxcat = 1; } c += 2; *board = (uint8_t)stoi(&c); if (*c++ != '/' || *c++ != 'P') return (1); *pos = stoi(&c); if ((c = strchr(c, 'J')) == NULL) return (1); c++; *jnumber = (uint16_t)stoi(&c); return (0); } /* The following array maps the error to its corresponding set */ static int plat_ecc_e2d_map[PLAT_ECC_ERROR2_NUMVALS] = { PLAT_ECC_ERROR2_NONE, /* 0x00 */ PLAT_ECC_ERROR2_SEND_L2_XXC, /* 0x01 */ PLAT_ECC_ERROR2_SEND_L2_XXU, /* 0x02 */ PLAT_ECC_ERROR2_SEND_L3_XXC, /* 0x03 */ PLAT_ECC_ERROR2_SEND_L3_XXU, /* 0x04 */ PLAT_ECC_ERROR2_SEND_MEM_ERRS, /* 0x05 */ PLAT_ECC_ERROR2_SEND_MEM_ERRS, /* 0x06 */ PLAT_ECC_ERROR2_SEND_MEM_ERRS, /* 0x07 */ PLAT_ECC_ERROR2_SEND_BUS_ERRS, /* 0x08 */ PLAT_ECC_ERROR2_SEND_BUS_ERRS, /* 0x09 */ PLAT_ECC_ERROR2_SEND_BUS_ERRS, /* 0x0a */ PLAT_ECC_ERROR2_SEND_BUS_ERRS, /* 0x0b */ PLAT_ECC_ERROR2_SEND_L2_TAG_ERRS, /* 0x0c */ PLAT_ECC_ERROR2_SEND_L2_TAG_ERRS, /* 0x0d */ PLAT_ECC_ERROR2_SEND_L3_TAG_ERRS, /* 0x0e */ PLAT_ECC_ERROR2_SEND_L3_TAG_ERRS, /* 0x0f */ PLAT_ECC_ERROR2_SEND_L1_PARITY, /* 0x10 */ PLAT_ECC_ERROR2_SEND_L1_PARITY, /* 0x11 */ PLAT_ECC_ERROR2_SEND_TLB_PARITY, /* 0x12 */ PLAT_ECC_ERROR2_SEND_TLB_PARITY, /* 0x13 */ PLAT_ECC_ERROR2_SEND_IV_ERRS, /* 0x14 */ PLAT_ECC_ERROR2_SEND_IV_ERRS, /* 0x15 */ PLAT_ECC_ERROR2_SEND_MTAG_XXC, /* 0x16 */ PLAT_ECC_ERROR2_SEND_IV_MTAG_XXC, /* 0x17 */ PLAT_ECC_ERROR2_SEND_L3_XXC, /* 0x18 */ PLAT_ECC_ERROR2_SEND_PCACHE /* 0x19 */ }; /* * log enhanced error information to SC. */ void plat_log_fruid_error2(int msg_type, char *unum, struct async_flt *aflt, plat_ecc_ch_async_flt_t *ecc_ch_flt) { plat_ecc_error2_data_t e2d = {0}; int board, pos, bank, dimm, jnumber; int maxcat = 0; uint16_t flags; /* Check the flags */ flags = plat_ecc_e2d_map[msg_type]; if ((ecc_error2_mailbox_flags & flags) == 0) return; /* Fill the header */ e2d.ee2d_major_version = PLAT_ECC_ERROR2_VERSION_MAJOR; e2d.ee2d_minor_version = PLAT_ECC_ERROR2_VERSION_MINOR; e2d.ee2d_msg_type = PLAT_ECC_ERROR2_MESSAGE; e2d.ee2d_msg_length = sizeof (plat_ecc_error2_data_t); /* Fill the data */ if (aflt->flt_in_memory) { if (parse_unum_memory(unum, &board, &pos, &bank, &dimm, &jnumber) || (dimm != -1 && jnumber == -1)) return; /* * Using the SB number and Proc position we create a FRU * cpu id. */ e2d.ee2d_owning_proc = plat_make_fru_cpuid(board, 0, pos); e2d.ee2d_jnumber = jnumber; e2d.ee2d_bank_number = bank; } else if (aflt->flt_status & ECC_ECACHE) { if (parse_unum_ecache(unum, &board, &pos, &jnumber, &maxcat)) return; /* * Using the SB number and Proc position we create a FRU * cpu id. */ e2d.ee2d_owning_proc = plat_make_fru_cpuid(board, maxcat, pos); e2d.ee2d_jnumber = jnumber; e2d.ee2d_bank_number = (uint8_t)-1; } else { /* * L1 Cache */ e2d.ee2d_owning_proc = aflt->flt_bus_id; e2d.ee2d_jnumber = (uint16_t)-1; e2d.ee2d_bank_number = (uint8_t)-1; } e2d.ee2d_type = (uint8_t)msg_type; e2d.ee2d_afar_status = (uint8_t)ecc_ch_flt->ecaf_afar_status; e2d.ee2d_synd_status = (uint8_t)ecc_ch_flt->ecaf_synd_status; e2d.ee2d_detecting_proc = aflt->flt_bus_id; e2d.ee2d_cpu_impl = cpunodes[e2d.ee2d_owning_proc].implementation; e2d.ee2d_timestamp = aflt->flt_id; e2d.ee2d_afsr = aflt->flt_stat; e2d.ee2d_afar = aflt->flt_addr; e2d.ee2d_sdw_afsr = ecc_ch_flt->ecaf_sdw_afsr; e2d.ee2d_sdw_afar = ecc_ch_flt->ecaf_sdw_afar; e2d.ee2d_afsr_ext = ecc_ch_flt->ecaf_afsr_ext; e2d.ee2d_sdw_afsr_ext = ecc_ch_flt->ecaf_sdw_afsr_ext; /* Send the message to SC */ (void) plat_send_ecc_mailbox_msg(PLAT_ECC_ERROR2_MESSAGE, &e2d); } uint8_t ecc_indictment_mailbox_disable = PLAT_ECC_INDICTMENT_OK; uint8_t ecc_indictment_mailbox_flags = PLAT_ECC_SEND_DEFAULT_INDICT; /* * We log all Solaris indictments of failing hardware. We pull the system * board number and jnumber out of the unum string, and calculate the cpuid * from some members of the unum string. The rest of the structure is filled * in through the other arguments. The data structure is then passed to * plat_ecc_dispatch_task(). This function should only be loaded into memory * or called on platforms that define a plat_send_ecc_mailbox_msg() function. */ static int plat_log_fruid_indictment(int msg_type, struct async_flt *aflt, char *unum) { plat_ecc_message_t *wrapperp; plat_ecc_indict_msg_contents_t *contentsp; char *unum_ptr; int is_maxcat = 0; switch (ecc_indictment_mailbox_disable) { case (PLAT_ECC_INDICTMENT_OK): case (PLAT_ECC_INDICTMENT_SUSPECT): break; case (PLAT_ECC_INDICTMENT_NO_SEND): default: return (ECONNREFUSED); } switch (msg_type) { case (PLAT_ECC_INDICT_DIMM): if ((ecc_indictment_mailbox_flags & PLAT_ECC_SEND_DIMM_INDICT) == 0) return (ECONNREFUSED); break; case (PLAT_ECC_INDICT_ECACHE_CORRECTABLES): if ((ecc_indictment_mailbox_flags & PLAT_ECC_SEND_ECACHE_XXC_INDICT) == 0) return (ECONNREFUSED); break; case (PLAT_ECC_INDICT_ECACHE_UNCORRECTABLE): if ((ecc_indictment_mailbox_flags & PLAT_ECC_SEND_ECACHE_XXU_INDICT) == 0) return (ECONNREFUSED); break; default: return (ECONNREFUSED); } /* LINTED: E_TRUE_LOGICAL_EXPR */ ASSERT(sizeof (plat_ecc_indictment_data_t) == PLAT_ECC_INDICT_SIZE); wrapperp = (plat_ecc_message_t *) kmem_zalloc(sizeof (plat_ecc_message_t), KM_SLEEP); wrapperp->ecc_msg_status = PLAT_ECC_NO_MSG_ACTIVE; wrapperp->ecc_msg_type = PLAT_ECC_INDICTMENT_MESSAGE; wrapperp->ecc_msg_len = sizeof (plat_ecc_indictment_data_t); wrapperp->ecc_msg_data = kmem_zalloc(wrapperp->ecc_msg_len, KM_SLEEP); contentsp = &(((plat_ecc_indictment_data_t *) wrapperp->ecc_msg_data)->msg_contents); /* * Find board_num, jnumber, and proc position from the unum string. * Use the board number, is_maxcat, and proc position to calculate * cpuid. */ unum_ptr = strstr(unum, "SB"); if (unum_ptr == NULL) { is_maxcat = 1; unum_ptr = strstr(unum, "IO"); if (unum_ptr == NULL) { kmem_free(wrapperp->ecc_msg_data, wrapperp->ecc_msg_len); kmem_free(wrapperp, sizeof (plat_ecc_message_t)); return (EINVAL); } } unum_ptr += 2; contentsp->board_num = (uint8_t)stoi(&unum_ptr); unum_ptr = strchr(unum_ptr, 'P'); if (unum_ptr == NULL) { kmem_free(wrapperp->ecc_msg_data, wrapperp->ecc_msg_len); kmem_free(wrapperp, sizeof (plat_ecc_message_t)); return (EINVAL); } unum_ptr++; contentsp->detecting_proc = (uint16_t)plat_make_fru_cpuid(contentsp->board_num, is_maxcat, stoi(&unum_ptr)); unum_ptr = strchr(unum_ptr, 'J'); if (unum_ptr == NULL) { kmem_free(wrapperp->ecc_msg_data, wrapperp->ecc_msg_len); kmem_free(wrapperp, sizeof (plat_ecc_message_t)); return (EINVAL); } unum_ptr++; contentsp->jnumber = (uint16_t)stoi(&unum_ptr); /* * Fill in the rest of the data */ contentsp->version = PLAT_ECC_INDICTMENT_VERSION; contentsp->indictment_type = msg_type; contentsp->indictment_uncertain = ecc_indictment_mailbox_disable; contentsp->syndrome = aflt->flt_synd; contentsp->afsr = aflt->flt_stat; contentsp->afar = aflt->flt_addr; /* * Build the solaris_version string: */ (void) snprintf(contentsp->solaris_version, PLAT_ECC_VERSION_LENGTH, "%s %s", utsname.release, utsname.version); /* * Send the data on to the queuing function */ return (plat_ecc_dispatch_task(wrapperp)); } /* The following array maps the indictment to its corresponding set */ static int plat_ecc_i2d_map[PLAT_ECC_INDICT2_NUMVALS] = { PLAT_ECC_INDICT2_NONE, /* 0x00 */ PLAT_ECC_SEND_INDICT2_L2_XXU, /* 0x01 */ PLAT_ECC_SEND_INDICT2_L2_XXC_SERD, /* 0x02 */ PLAT_ECC_SEND_INDICT2_L2_TAG_SERD, /* 0x03 */ PLAT_ECC_SEND_INDICT2_L3_XXU, /* 0x04 */ PLAT_ECC_SEND_INDICT2_L3_XXC_SERD, /* 0x05 */ PLAT_ECC_SEND_INDICT2_L3_TAG_SERD, /* 0x06 */ PLAT_ECC_SEND_INDICT2_L1_SERD, /* 0x07 */ PLAT_ECC_SEND_INDICT2_L1_SERD, /* 0x08 */ PLAT_ECC_SEND_INDICT2_TLB_SERD, /* 0x09 */ PLAT_ECC_SEND_INDICT2_TLB_SERD, /* 0x0a */ PLAT_ECC_SEND_INDICT2_FPU, /* 0x0b */ PLAT_ECC_SEND_INDICT2_PCACHE_SERD /* 0x0c */ }; static int plat_log_fruid_indictment2(int msg_type, struct async_flt *aflt, char *unum) { plat_ecc_message_t *wrapperp; plat_ecc_indictment2_data_t *i2d; int board, pos, jnumber; int maxcat = 0; uint16_t flags; /* * If the unum is null or empty, skip parsing it */ if (unum && unum[0] != '\0') { if (parse_unum_ecache(unum, &board, &pos, &jnumber, &maxcat)) return (EINVAL); } if ((ecc_indictment_mailbox_disable != PLAT_ECC_INDICTMENT_OK) && (ecc_indictment_mailbox_disable != PLAT_ECC_INDICTMENT_SUSPECT)) return (ECONNREFUSED); /* Check the flags */ flags = plat_ecc_i2d_map[msg_type]; if ((ecc_indictment2_mailbox_flags & flags) == 0) return (ECONNREFUSED); wrapperp = (plat_ecc_message_t *) kmem_zalloc(sizeof (plat_ecc_message_t), KM_SLEEP); /* Initialize the wrapper */ wrapperp->ecc_msg_status = PLAT_ECC_NO_MSG_ACTIVE; wrapperp->ecc_msg_type = PLAT_ECC_INDICTMENT2_MESSAGE; wrapperp->ecc_msg_len = sizeof (plat_ecc_indictment2_data_t); wrapperp->ecc_msg_data = kmem_zalloc(wrapperp->ecc_msg_len, KM_SLEEP); i2d = (plat_ecc_indictment2_data_t *)wrapperp->ecc_msg_data; /* Fill the header */ i2d->ei2d_major_version = PLAT_ECC_INDICT2_MAJOR_VERSION; i2d->ei2d_minor_version = PLAT_ECC_INDICT2_MINOR_VERSION; i2d->ei2d_msg_type = PLAT_ECC_INDICTMENT2_MESSAGE; i2d->ei2d_msg_length = sizeof (plat_ecc_indictment2_data_t); /* Fill the data */ if (unum && unum[0] != '\0') { i2d->ei2d_arraigned_proc = plat_make_fru_cpuid(board, maxcat, pos); i2d->ei2d_board_num = board; i2d->ei2d_jnumber = jnumber; } else { i2d->ei2d_arraigned_proc = aflt->flt_inst; i2d->ei2d_board_num = (uint8_t) plat_make_fru_boardnum(i2d->ei2d_arraigned_proc); i2d->ei2d_jnumber = (uint16_t)-1; } i2d->ei2d_type = msg_type; i2d->ei2d_uncertain = ecc_indictment_mailbox_disable; i2d->ei2d_cpu_impl = cpunodes[i2d->ei2d_arraigned_proc].implementation; i2d->ei2d_timestamp = aflt->flt_id; /* * Send the data on to the queuing function */ return (plat_ecc_dispatch_task(wrapperp)); } int plat_ecc_capability_send(void) { plat_ecc_message_t *wrapperp; plat_capability_data_t *cap; int ver_len; wrapperp = kmem_zalloc(sizeof (plat_ecc_message_t), KM_SLEEP); ver_len = strlen(utsname.release) + strlen(utsname.version) + 2; /* Initialize the wrapper */ wrapperp->ecc_msg_status = PLAT_ECC_NO_MSG_ACTIVE; wrapperp->ecc_msg_type = PLAT_ECC_CAPABILITY_MESSAGE; wrapperp->ecc_msg_len = sizeof (plat_capability_data_t) + ver_len; wrapperp->ecc_msg_data = kmem_zalloc(wrapperp->ecc_msg_len, KM_SLEEP); cap = (plat_capability_data_t *)wrapperp->ecc_msg_data; /* Fill the header */ cap->capd_major_version = PLAT_ECC_CAP_VERSION_MAJOR; cap->capd_minor_version = PLAT_ECC_CAP_VERSION_MINOR; cap->capd_msg_type = PLAT_ECC_CAPABILITY_MESSAGE; cap->capd_msg_length = wrapperp->ecc_msg_len; /* Set the default domain capability */ cap->capd_capability = PLAT_ECC_CAPABILITY_DOMAIN_DEFAULT; /* * Build the solaris_version string: * utsname.release + " " + utsname.version */ (void) snprintf(cap->capd_solaris_version, ver_len, "%s %s", utsname.release, utsname.version); /* * Send the data on to the queuing function */ return (plat_ecc_dispatch_task(wrapperp)); } int plat_ecc_capability_sc_get(int type) { switch (type) { case PLAT_ECC_ERROR_MESSAGE: if (ecc_log_fruid_enable && (!(plat_ecc_capability_map_sc & PLAT_ECC_CAPABILITY_ERROR2))) return (1); break; case PLAT_ECC_ERROR2_MESSAGE: if (plat_ecc_capability_map_sc & PLAT_ECC_CAPABILITY_ERROR2) return (1); break; case PLAT_ECC_INDICTMENT_MESSAGE: if (!(plat_ecc_capability_map_sc & PLAT_ECC_CAPABILITY_INDICT2) || !(plat_ecc_capability_map_domain & PLAT_ECC_CAPABILITY_FMA)) return (1); break; case PLAT_ECC_INDICTMENT2_MESSAGE: if (plat_ecc_capability_map_sc & PLAT_ECC_CAPABILITY_INDICT2) return (1); break; case PLAT_ECC_DIMM_SID_MESSAGE: if (plat_ecc_capability_map_sc & PLAT_ECC_CAPABILITY_DIMM_SID) return (1); /* FALLTHROUGH */ default: return (0); } return (0); } int plat_ecc_cap_sc_set_cnt = 0; void plat_ecc_capability_sc_set(uint32_t cap) { plat_ecc_capability_map_sc = cap; if (!plat_ecc_cap_sc_set_cnt && (cap & PLAT_ECC_CAPABILITY_DIMM_SID)) if (p2init_sid_cache) p2init_sid_cache(); plat_ecc_cap_sc_set_cnt++; } /* * The following table represents mapping between the indictment1 reason * to its type. */ static plat_ecc_bl_map_t plat_ecc_bl_map_v1[] = { { "l2cachedata", PLAT_ECC_INDICT_ECACHE_CORRECTABLES }, { "l3cachedata", PLAT_ECC_INDICT_ECACHE_CORRECTABLES }, { "l2cachedata", PLAT_ECC_INDICT_ECACHE_UNCORRECTABLE }, { "l3cachedata", PLAT_ECC_INDICT_ECACHE_UNCORRECTABLE } }; /* * The following table represents mapping between the indictment2 reason * to its type. */ static plat_ecc_bl_map_t plat_ecc_bl_map_v2[] = { { "l2cachedata", PLAT_ECC_INDICT2_L2_SERD }, { "l3cachedata", PLAT_ECC_INDICT2_L3_SERD }, { "l2cachedata", PLAT_ECC_INDICT2_L2_UE }, { "l3cachedata", PLAT_ECC_INDICT2_L3_UE }, { "l2cachetag", PLAT_ECC_INDICT2_L2_TAG_SERD }, { "l3cachetag", PLAT_ECC_INDICT2_L3_TAG_SERD }, { "icache", PLAT_ECC_INDICT2_ICACHE_SERD }, { "dcache", PLAT_ECC_INDICT2_DCACHE_SERD }, { "pcache", PLAT_ECC_INDICT2_PCACHE_SERD }, { "itlb", PLAT_ECC_INDICT2_ITLB_SERD }, { "dtlb", PLAT_ECC_INDICT2_DTLB_SERD }, { "fpu", PLAT_ECC_INDICT2_FPU } }; /* * The following function returns the indictment type for a given version */ static int flt_name_to_msg_type(const char *fault, int indict_version) { plat_ecc_bl_map_t *mapp; char *fltnm = "fault.cpu."; int mapsz; char *p; int i; /* Check if it starts with proper fault name */ if (strncmp(fault, fltnm, strlen(fltnm)) != 0) return (PLAT_ECC_INDICT_NONE); fault += strlen(fltnm); /* c = "ultraSPARC-IV.icache" */ /* Skip the cpu type */ if ((p = strchr(fault, '.')) == NULL) return (PLAT_ECC_INDICT_NONE); p++; /* skip the "." */ if (indict_version == 0) { mapp = plat_ecc_bl_map_v1; mapsz = sizeof (plat_ecc_bl_map_v1) / sizeof (plat_ecc_bl_map_t); } else { mapp = plat_ecc_bl_map_v2; mapsz = sizeof (plat_ecc_bl_map_v2) / sizeof (plat_ecc_bl_map_t); } for (i = 0; i < mapsz; i++) { if (strcmp(p, mapp[i].ebm_reason) == 0) { return (mapp[i].ebm_type); } } return (PLAT_ECC_INDICT_NONE); } /* * Blacklisting */ int plat_blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class) { struct async_flt aflt; char *unum; int msg_type, is_old_indict; if (fmri == NULL) return (EINVAL); if (cmd != BLIOC_INSERT) return (ENOTSUP); /* * We support both the blacklisting of CPUs via mem-schemed * FMRIs that name E$ J-numbers, and CPUs via cpu-schemed FMRIs * that name the cpuid. */ if (strcmp(scheme, FM_FMRI_SCHEME_MEM) == 0) { if (nvlist_lookup_string(fmri, FM_FMRI_MEM_UNUM, &unum)) return (EINVAL); aflt.flt_inst = (uint_t)-1; } else if (strcmp(scheme, FM_FMRI_SCHEME_CPU) == 0) { if (nvlist_lookup_uint32(fmri, FM_FMRI_CPU_ID, &aflt.flt_inst)) return (EINVAL); unum = NULL; } else { return (ENOTSUP); } /* * If the SC cannot handle indictment2, so fall back to old one. * Also if the domain does not support FMA, then send only the old one. */ is_old_indict = plat_ecc_capability_sc_get(PLAT_ECC_INDICTMENT_MESSAGE); if (is_old_indict) msg_type = flt_name_to_msg_type(class, 0); else msg_type = flt_name_to_msg_type(class, 1); if (msg_type == PLAT_ECC_INDICT_NONE) return (ENOTSUP); /* * The current blacklisting interfaces are designed for a world where * the SC is much more involved in the diagnosis and error reporting * process than it is in the FMA world. As such, the existing * interfaces want all kinds of information about the error that's * triggering the blacklist. In the FMA world, we don't have access * to any of that information by the time we're doing the blacklist, * so we fake values. */ aflt.flt_id = gethrtime(); aflt.flt_addr = -1; aflt.flt_stat = -1; aflt.flt_synd = (ushort_t)-1; if (is_old_indict) { if (unum && unum[0] != '\0') return (plat_log_fruid_indictment(msg_type, &aflt, unum)); else return (ENOTSUP); } else { return (plat_log_fruid_indictment2(msg_type, &aflt, unum)); } } static kcondvar_t plat_ecc_condvar; static kmutex_t plat_ecc_mutex; static taskq_t *plat_ecc_taskq; /* * plat_ecc_dispatch_task: Dispatch the task on a taskq and wait for the * return value. We use cv_wait_sig to wait for the return values. If a * signal interrupts us, we return EINTR. Otherwise, we return the value * returned by the mailbox functions. * * To avoid overloading the lower-level mailbox routines, we use a taskq * to serialize all messages. Currently, it is expected that only one * process (fmd) will use this ioctl, so the delay caused by the taskq * should not have much of an effect. */ int plat_ecc_dispatch_task(plat_ecc_message_t *msg) { int ret; ASSERT(msg != NULL); ASSERT(plat_ecc_taskq != NULL); if (taskq_dispatch(plat_ecc_taskq, plat_ecc_send_msg, (void *)msg, TQ_NOSLEEP) == TASKQID_INVALID) { kmem_free(msg->ecc_msg_data, msg->ecc_msg_len); kmem_free(msg, sizeof (plat_ecc_message_t)); return (ENOMEM); } mutex_enter(&plat_ecc_mutex); /* * It's possible that the taskq function completed before we * acquired the mutex. Check for this first. If this did not * happen, we wait for the taskq function to signal us, or an * interrupt. We also check ecc_msg_status to protect against * spurious wakeups from cv_wait_sig. */ if (msg->ecc_msg_status == PLAT_ECC_MSG_SENT) { ret = msg->ecc_msg_ret; kmem_free(msg->ecc_msg_data, msg->ecc_msg_len); kmem_free(msg, sizeof (plat_ecc_message_t)); } else { msg->ecc_msg_status = PLAT_ECC_TASK_DISPATCHED; while ((ret = cv_wait_sig(&plat_ecc_condvar, &plat_ecc_mutex)) != 0 && msg->ecc_msg_status == PLAT_ECC_TASK_DISPATCHED) ; if ((ret == 0) && (msg->ecc_msg_status != PLAT_ECC_MSG_SENT)) { /* An interrupt was received */ msg->ecc_msg_status = PLAT_ECC_INTERRUPT_RECEIVED; ret = EINTR; } else { ret = msg->ecc_msg_ret; kmem_free(msg->ecc_msg_data, msg->ecc_msg_len); kmem_free(msg, sizeof (plat_ecc_message_t)); } } mutex_exit(&plat_ecc_mutex); return (ret); } static void plat_ecc_send_msg(void *arg) { plat_ecc_message_t *msg = arg; int ret; /* * Send this data off as a mailbox message to the SC. */ ret = plat_send_ecc_mailbox_msg(msg->ecc_msg_type, msg->ecc_msg_data); mutex_enter(&plat_ecc_mutex); /* * If the dispatching function received an interrupt, don't bother * signalling it, and throw away the results. Otherwise, set the * return value and signal the condvar. */ if (msg->ecc_msg_status == PLAT_ECC_INTERRUPT_RECEIVED) { kmem_free(msg->ecc_msg_data, msg->ecc_msg_len); kmem_free(msg, sizeof (plat_ecc_message_t)); } else { msg->ecc_msg_ret = ret; msg->ecc_msg_status = PLAT_ECC_MSG_SENT; cv_broadcast(&plat_ecc_condvar); } mutex_exit(&plat_ecc_mutex); } void plat_ecc_init(void) { int bd; mutex_init(&plat_ecc_mutex, NULL, MUTEX_DEFAULT, NULL); cv_init(&plat_ecc_condvar, NULL, CV_DEFAULT, NULL); plat_ecc_taskq = taskq_create("plat_ecc_taskq", 1, minclsyspri, PLAT_ECC_TASKQ_MIN, PLAT_ECC_TASKQ_MAX, TASKQ_PREPOPULATE); ASSERT(plat_ecc_taskq != NULL); for (bd = 0; bd < plat_max_cpumem_boards(); bd++) { mutex_init(&domain_dimm_sids[bd].pdsb_lock, NULL, MUTEX_DEFAULT, NULL); } }