/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * This file contains the environmental PICL plug-in module. */ /* * This plugin sets up the PICLTREE for Chicago WS. * It provides functionality to get/set temperatures and * fan speeds. * * The environmental policy defaults to the auto mode * as programmed by OBP at boot time. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "envd.h" #include #include int debug_fd; /* * PICL plugin entry points */ static void piclenvd_register(void); static void piclenvd_init(void); static void piclenvd_fini(void); /* * Env setup routines */ extern void env_picl_setup(void); extern void env_picl_destroy(void); extern int env_picl_setup_tuneables(void); static boolean_t has_fan_failed(env_fan_t *fanp); /* * PSU fan fault handling */ static boolean_t has_psufan_failed(void); static int psufan_last_status = FAN_OK; #pragma init(piclenvd_register) /* * Plugin registration information */ static picld_plugin_reg_t my_reg_info = { PICLD_PLUGIN_VERSION, PICLD_PLUGIN_CRITICAL, "SUNW_piclenvd", piclenvd_init, piclenvd_fini, }; #define REGISTER_INFORMATION_STRING_LENGTH 16 static char fan_rpm_string[REGISTER_INFORMATION_STRING_LENGTH] = {0}; static char fan_status_string[REGISTER_INFORMATION_STRING_LENGTH] = {0}; static int scsi_log_sense(env_disk_t *diskp, uchar_t page_code, void *pagebuf, uint16_t pagelen, int page_control); static int scsi_mode_select(env_disk_t *diskp, uchar_t page_code, uchar_t *pagebuf, uint16_t pagelen); static int get_disk_temp(env_disk_t *); /* * ES Segment stuff */ static es_sensor_blk_t sensor_ctl[MAX_SENSORS]; /* * Default limits for sensors, in case ES segment is not present, or has * inconsistent information */ static es_sensor_blk_t sensor_default_ctl[MAX_SENSORS] = { { CPU0_HIGH_POWER_OFF, CPU0_HIGH_SHUTDOWN, CPU0_HIGH_WARNING, CPU0_LOW_WARNING, CPU0_LOW_SHUTDOWN, CPU0_LOW_POWER_OFF }, { CPU1_HIGH_POWER_OFF, CPU1_HIGH_SHUTDOWN, CPU1_HIGH_WARNING, CPU1_LOW_WARNING, CPU1_LOW_SHUTDOWN, CPU1_LOW_POWER_OFF }, { ADT7462_HIGH_POWER_OFF, ADT7462_HIGH_SHUTDOWN, ADT7462_HIGH_WARNING, ADT7462_LOW_WARNING, ADT7462_LOW_SHUTDOWN, ADT7462_LOW_POWER_OFF }, { MB_HIGH_POWER_OFF, MB_HIGH_SHUTDOWN, MB_HIGH_WARNING, MB_LOW_WARNING, MB_LOW_SHUTDOWN, MB_LOW_POWER_OFF }, { LM95221_HIGH_POWER_OFF, LM95221_HIGH_SHUTDOWN, LM95221_HIGH_WARNING, LM95221_LOW_WARNING, LM95221_LOW_SHUTDOWN, LM95221_LOW_POWER_OFF }, { FIRE_HIGH_POWER_OFF, FIRE_HIGH_SHUTDOWN, FIRE_HIGH_WARNING, FIRE_LOW_WARNING, FIRE_LOW_SHUTDOWN, FIRE_LOW_POWER_OFF }, { LSI1064_HIGH_POWER_OFF, LSI1064_HIGH_SHUTDOWN, LSI1064_HIGH_WARNING, LSI1064_LOW_WARNING, LSI1064_LOW_SHUTDOWN, LSI1064_LOW_POWER_OFF }, { FRONT_PANEL_HIGH_POWER_OFF, FRONT_PANEL_HIGH_SHUTDOWN, FRONT_PANEL_HIGH_WARNING, FRONT_PANEL_LOW_WARNING, FRONT_PANEL_LOW_SHUTDOWN, FRONT_PANEL_LOW_POWER_OFF }, { PSU_HIGH_POWER_OFF, PSU_HIGH_SHUTDOWN, PSU_HIGH_WARNING, PSU_LOW_WARNING, PSU_LOW_SHUTDOWN, PSU_LOW_POWER_OFF } }; /* * Env thread variables */ static boolean_t system_shutdown_started = B_FALSE; static boolean_t system_temp_thr_created = B_FALSE; static pthread_t system_temp_thr_id; static pthread_attr_t thr_attr; static boolean_t disk_temp_thr_created = B_FALSE; static pthread_t disk_temp_thr_id; static boolean_t fan_thr_created = B_FALSE; static pthread_t fan_thr_id; /* * PM thread related variables */ static pthread_t pmthr_tid; /* pmthr thread ID */ static int pm_fd = -1; /* PM device file descriptor */ static boolean_t pmthr_created = B_FALSE; static int cur_lpstate; /* cur low power state */ /* * Envd plug-in verbose flag set by SUNW_PICLENVD_DEBUG environment var * Setting the verbose tuneable also enables debugging for better * control */ int env_debug = 0; /* * These are debug variables for keeping track of the total number * of Fan and Temp sensor retries over the lifetime of the plugin. */ static int total_fan_retries = 0; static int total_temp_retries = 0; /* * Fan devices */ static env_fan_t envd_system_fan0 = { ENV_SYSTEM_FAN0, ENV_SYSTEM_FAN0_DEVFS, SYSTEM_FAN0_ID, SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1, }; static env_fan_t envd_system_fan1 = { ENV_SYSTEM_FAN1, ENV_SYSTEM_FAN1_DEVFS, SYSTEM_FAN1_ID, SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1, }; static env_fan_t envd_system_fan2 = { ENV_SYSTEM_FAN2, ENV_SYSTEM_FAN2_DEVFS, SYSTEM_FAN2_ID, SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1, }; static env_fan_t envd_system_fan3 = { ENV_SYSTEM_FAN3, ENV_SYSTEM_FAN3_DEVFS, SYSTEM_FAN3_ID, SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1, }; static env_fan_t envd_system_fan4 = { ENV_SYSTEM_FAN4, ENV_SYSTEM_FAN4_DEVFS, SYSTEM_FAN4_ID, SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1, }; /* * Disk devices */ static env_disk_t envd_disk0 = { ENV_DISK0, ENV_DISK0_DEVFS, DISK0_PHYSPATH, DISK0_NODE_PATH, DISK0_ID, -1, }; static env_disk_t envd_disk1 = { ENV_DISK1, ENV_DISK1_DEVFS, DISK1_PHYSPATH, DISK1_NODE_PATH, DISK1_ID, -1, }; static env_disk_t envd_disk2 = { ENV_DISK2, ENV_DISK2_DEVFS, DISK2_PHYSPATH, DISK2_NODE_PATH, DISK2_ID, -1, }; static env_disk_t envd_disk3 = { ENV_DISK3, ENV_DISK3_DEVFS, DISK3_PHYSPATH, DISK3_NODE_PATH, DISK3_ID, -1, }; /* * Sensors */ static env_sensor_t envd_sensor_cpu0 = { SENSOR_CPU0, SENSOR_CPU0_DEVFS, CPU0_SENSOR_ID, -1, NULL, }; static env_sensor_t envd_sensor_cpu1 = { SENSOR_CPU1, SENSOR_CPU1_DEVFS, CPU1_SENSOR_ID, -1, NULL, }; static env_sensor_t envd_sensor_adt7462 = { SENSOR_ADT7462, SENSOR_ADT7462_DEVFS, ADT7462_SENSOR_ID, -1, NULL, }; static env_sensor_t envd_sensor_mb = { SENSOR_MB, SENSOR_MB_DEVFS, MB_SENSOR_ID, -1, NULL, }; static env_sensor_t envd_sensor_lm95221 = { SENSOR_LM95221, SENSOR_LM95221_DEVFS, LM95221_SENSOR_ID, -1, NULL, }; static env_sensor_t envd_sensor_fire = { SENSOR_FIRE, SENSOR_FIRE_DEVFS, FIRE_SENSOR_ID, -1, NULL, }; static env_sensor_t envd_sensor_lsi1064 = { SENSOR_LSI1064, SENSOR_LSI1064_DEVFS, LSI1064_SENSOR_ID, -1, NULL, }; static env_sensor_t envd_sensor_front_panel = { SENSOR_FRONT_PANEL, SENSOR_FRONT_PANEL_DEVFS, FRONT_PANEL_SENSOR_ID, -1, NULL, }; static env_sensor_t envd_sensor_psu = { SENSOR_PSU, SENSOR_PSU_DEVFS, PSU_SENSOR_ID, -1, NULL, }; /* * The vendor-id and device-id are the properties associated with * the SCSI controller. This is used to identify a particular controller * like LSI1064. */ #define VENDOR_ID "vendor-id" #define DEVICE_ID "device-id" /* * The implementation for SCSI disk drives to supply info. about * temperature is not mandatory. Hence we first determine if the * temperature page is supported. To do this we need to scan the list * of pages supported. */ #define SUPPORTED_LPAGES 0 #define TEMPERATURE_PAGE 0x0D #define LOGPAGEHDRSIZE 4 /* * NULL terminated array of fans */ static env_fan_t *envd_fans[] = { &envd_system_fan0, &envd_system_fan1, &envd_system_fan2, &envd_system_fan3, &envd_system_fan4, NULL }; /* * NULL terminated array of disks */ static env_disk_t *envd_disks[] = { &envd_disk0, &envd_disk1, &envd_disk2, &envd_disk3, NULL }; /* * NULL terminated array of temperature sensors */ #define N_ENVD_SENSORS 9 static env_sensor_t *envd_sensors[] = { &envd_sensor_cpu0, &envd_sensor_cpu1, &envd_sensor_adt7462, &envd_sensor_mb, &envd_sensor_lm95221, &envd_sensor_fire, &envd_sensor_lsi1064, &envd_sensor_front_panel, &envd_sensor_psu, NULL }; #define NOT_AVAILABLE "NA" /* * Tuneables */ #define ENABLE 1 #define DISABLE 0 static int disk_high_warn_temperature = DISK_HIGH_WARN_TEMPERATURE; static int disk_low_warn_temperature = DISK_LOW_WARN_TEMPERATURE; static int disk_high_shutdown_temperature = DISK_HIGH_SHUTDOWN_TEMPERATURE; static int disk_low_shutdown_temperature = DISK_LOW_SHUTDOWN_TEMPERATURE; static int disk_scan_interval = DISK_SCAN_INTERVAL; static int sensor_scan_interval = SENSOR_SCAN_INTERVAL; static int fan_scan_interval = FAN_SCAN_INTERVAL; static int get_int_val(ptree_rarg_t *parg, void *buf); static int set_int_val(ptree_warg_t *parg, const void *buf); static int get_string_val(ptree_rarg_t *parg, void *buf); static int set_string_val(ptree_warg_t *parg, const void *buf); static int shutdown_override = 0; static int sensor_warning_interval = SENSOR_WARNING_INTERVAL; static int sensor_warning_duration = SENSOR_WARNING_DURATION; static int sensor_shutdown_interval = SENSOR_SHUTDOWN_INTERVAL; static int disk_warning_interval = DISK_WARNING_INTERVAL; static int disk_warning_duration = DISK_WARNING_DURATION; static int disk_shutdown_interval = DISK_SHUTDOWN_INTERVAL; static int system_temp_monitor = 1; /* enabled */ static int fan_monitor = 1; /* enabled */ static int pm_monitor = 1; /* enabled */ /* Disable disk temperature monitoring until we have LSI fw support */ int disk_temp_monitor = 0; static char shutdown_cmd[] = SHUTDOWN_CMD; const char *iofru_devname = I2C_DEVFS "/" IOFRU_DEV; env_tuneable_t tuneables[] = { {"system_temp-monitor", PICL_PTYPE_INT, &system_temp_monitor, &get_int_val, &set_int_val, sizeof (int)}, {"fan-monitor", PICL_PTYPE_INT, &fan_monitor, &get_int_val, &set_int_val, sizeof (int)}, {"pm-monitor", PICL_PTYPE_INT, &pm_monitor, &get_int_val, &set_int_val, sizeof (int)}, {"shutdown-override", PICL_PTYPE_INT, &shutdown_override, &get_int_val, &set_int_val, sizeof (int)}, {"sensor-warning-duration", PICL_PTYPE_INT, &sensor_warning_duration, &get_int_val, &set_int_val, sizeof (int)}, {"disk-scan-interval", PICL_PTYPE_INT, &disk_scan_interval, &get_int_val, &set_int_val, sizeof (int)}, {"fan-scan-interval", PICL_PTYPE_INT, &fan_scan_interval, &get_int_val, &set_int_val, sizeof (int)}, {"sensor-scan-interval", PICL_PTYPE_INT, &sensor_scan_interval, &get_int_val, &set_int_val, sizeof (int)}, {"sensor_warning-interval", PICL_PTYPE_INT, &sensor_warning_interval, &get_int_val, &set_int_val, sizeof (int)}, {"sensor_shutdown-interval", PICL_PTYPE_INT, &sensor_shutdown_interval, &get_int_val, &set_int_val, sizeof (int)}, {"disk_warning-interval", PICL_PTYPE_INT, &disk_warning_interval, &get_int_val, &set_int_val, sizeof (int)}, {"disk_warning-duration", PICL_PTYPE_INT, &disk_warning_duration, &get_int_val, &set_int_val, sizeof (int)}, {"disk_shutdown-interval", PICL_PTYPE_INT, &disk_shutdown_interval, &get_int_val, &set_int_val, sizeof (int)}, {"shutdown-command", PICL_PTYPE_CHARSTRING, shutdown_cmd, &get_string_val, &set_string_val, sizeof (shutdown_cmd)}, {"monitor-disk-temp", PICL_PTYPE_INT, &disk_temp_monitor, &get_int_val, &set_int_val, sizeof (int)}, {"disk-high-warn-temperature", PICL_PTYPE_INT, &disk_high_warn_temperature, &get_int_val, &set_int_val, sizeof (int)}, {"disk-low-warn-temperature", PICL_PTYPE_INT, &disk_low_warn_temperature, &get_int_val, &set_int_val, sizeof (int)}, {"disk-high-shutdown-temperature", PICL_PTYPE_INT, &disk_high_shutdown_temperature, &get_int_val, &set_int_val, sizeof (int)}, {"disk-low-shutdown-temperature", PICL_PTYPE_INT, &disk_low_shutdown_temperature, &get_int_val, &set_int_val, sizeof (int)}, {"verbose", PICL_PTYPE_INT, &env_debug, &get_int_val, &set_int_val, sizeof (int)} }; /* * We use this to figure out how many tuneables there are * This is variable because the publishing routine needs this info * in piclenvsetup.c */ int ntuneables = (sizeof (tuneables)/sizeof (tuneables[0])); /* * Lookup fan and return a pointer to env_fan_t data structure. */ env_fan_t * fan_lookup(char *name) { int i; env_fan_t *fanp; for (i = 0; (fanp = envd_fans[i]) != NULL; i++) { if (strcmp(fanp->name, name) == 0) return (fanp); } return (NULL); } /* * Lookup sensor and return a pointer to env_sensor_t data structure. */ env_sensor_t * sensor_lookup(char *name) { env_sensor_t *sensorp; int i; for (i = 0; i < N_ENVD_SENSORS; ++i) { sensorp = envd_sensors[i]; if (strcmp(sensorp->name, name) == 0) return (sensorp); } return (NULL); } /* * Lookup disk and return a pointer to env_disk_t data structure. */ env_disk_t * disk_lookup(char *name) { int i; env_disk_t *diskp; for (i = 0; (diskp = envd_disks[i]) != NULL; i++) { if (strncmp(diskp->name, name, strlen(name)) == 0) return (diskp); } return (NULL); } /* * Get current temperature * Returns -1 on error, 0 if successful */ int get_temperature(env_sensor_t *sensorp, tempr_t *temp) { int fd = sensorp->fd; int retval = 0; if (fd == -1) retval = -1; else if (ioctl(fd, PIC_GET_TEMPERATURE, temp) != 0) { retval = -1; sensorp->error++; if (sensorp->error == MAX_SENSOR_RETRIES) { envd_log(LOG_WARNING, ENV_SENSOR_ACCESS_FAIL, sensorp->name, errno, strerror(errno)); } total_temp_retries++; (void) sleep(1); } else if (sensorp->error != 0) { if (sensorp->error >= MAX_SENSOR_RETRIES) { envd_log(LOG_WARNING, ENV_SENSOR_ACCESS_OK, sensorp->name); } sensorp->error = 0; if (total_temp_retries && env_debug) { envd_log(LOG_WARNING, "Total retries for sensors = %d", total_temp_retries); } } return (retval); } /* * Get current disk temperature * Returns -1 on error, 0 if successful */ int disk_temperature(env_disk_t *diskp, tempr_t *temp) { int retval = 0; if (diskp == NULL) retval = -1; else *temp = diskp->current_temp; return (retval); } /* * Get current fan speed * This function returns a RPM value for fanspeed * in fanspeedp. * Returns -1 on error, 0 if successful */ int get_fan_speed(env_fan_t *fanp, fanspeed_t *fanspeedp) { uint8_t tach; int real_tach; int retries; if (fanp->fd == -1) return (-1); if (has_fan_failed(fanp)) { *fanspeedp = 0; return (0); } /* try to read the fan information */ for (retries = 0; retries < MAX_FAN_RETRIES; retries++) { if (ioctl(fanp->fd, PIC_GET_FAN_SPEED, &tach) == 0) break; (void) sleep(1); } total_fan_retries += retries; if (retries >= MAX_FAN_RETRIES) return (-1); if (total_fan_retries && env_debug) { envd_log(LOG_WARNING, "total retries for fan = %d", total_fan_retries); } real_tach = tach << 8; *fanspeedp = TACH_TO_RPM(real_tach); return (0); } /* * Set fan speed * This function accepts a percentage of fan speed * from 0-100 and programs the HW monitor fans to the corresponding * fanspeed value. * Returns -1 on error, -2 on invalid args passed, 0 if successful */ int set_fan_speed(env_fan_t *fanp, fanspeed_t fanspeed) { uint8_t speed; if (fanp->fd == -1) return (-1); if (fanspeed < 0 || fanspeed > 100) return (-2); speed = fanspeed; if (ioctl(fanp->fd, PIC_SET_FAN_SPEED, &speed) != 0) return (-1); return (0); } /* * close all fan devices */ static void envd_close_fans(void) { int i; env_fan_t *fanp; for (i = 0; (fanp = envd_fans[i]) != NULL; i++) { if (fanp->fd != -1) { (void) close(fanp->fd); fanp->fd = -1; } } } /* * Close sensor devices and freeup resources */ static void envd_close_sensors(void) { env_sensor_t *sensorp; int i; for (i = 0; i < N_ENVD_SENSORS; ++i) { sensorp = envd_sensors[i]; if (sensorp->fd != -1) { (void) close(sensorp->fd); sensorp->fd = -1; } } } /* * Open fan devices and initialize per fan data structure. */ static int envd_setup_fans(void) { int i, fd; env_fan_t *fanp; int fancnt = 0; picl_nodehdl_t tnodeh; for (i = 0; (fanp = envd_fans[i]) != NULL; i++) { fanp->last_status = FAN_OK; /* Make sure cpu0/1 present for validating cpu fans */ if (fanp->id == CPU0_FAN_ID) { if (ptree_get_node_by_path(CPU0_PATH, &tnodeh) != PICL_SUCCESS) { if (env_debug) { envd_log(LOG_ERR, "get node by path failed for %s\n", CPU0_PATH); } fanp->present = B_FALSE; continue; } } if (fanp->id == CPU1_FAN_ID) { if (ptree_get_node_by_path(CPU1_PATH, &tnodeh) != PICL_SUCCESS) { if (env_debug) { envd_log(LOG_ERR, "get node by path failed for %s\n", CPU0_PATH); } fanp->present = B_FALSE; continue; } } if ((fd = open(fanp->devfs_path, O_RDWR)) == -1) { envd_log(LOG_CRIT, ENV_FAN_OPEN_FAIL, fanp->name, fanp->devfs_path, errno, strerror(errno)); fanp->present = B_FALSE; continue; } fanp->fd = fd; fanp->present = B_TRUE; fancnt++; } if (fancnt == 0) return (-1); return (0); } static int envd_setup_disks(void) { int ret, i, page_index, page_len; picl_nodehdl_t tnodeh; env_disk_t *diskp; uint_t vendor_id; uint_t device_id; uchar_t log_page[256]; if (ptree_get_node_by_path(SCSI_CONTROLLER_NODE_PATH, &tnodeh) != PICL_SUCCESS) { if (env_debug) { envd_log(LOG_ERR, "On-Board SCSI controller %s " "not found in the system.\n", SCSI_CONTROLLER_NODE_PATH); } return (-1); } if ((ret = ptree_get_propval_by_name(tnodeh, VENDOR_ID, &vendor_id, sizeof (vendor_id))) != 0) { if (env_debug) { envd_log(LOG_ERR, "Error in getting vendor-id " "for SCSI controller. ret = %d errno = 0x%d\n", ret, errno); } return (-1); } if ((ret = ptree_get_propval_by_name(tnodeh, DEVICE_ID, &device_id, sizeof (device_id))) != 0) { if (env_debug) { envd_log(LOG_ERR, "Error in getting device-id " "for SCSI controller. ret = %d errno = 0x%d\n", ret, errno); } return (-1); } /* * We have found LSI1064 SCSi controller onboard. */ for (i = 0; (diskp = envd_disks[i]) != NULL; i++) { if (ptree_get_node_by_path(diskp->nodepath, &tnodeh) != PICL_SUCCESS) { diskp->present = B_FALSE; if (env_debug) { envd_log(LOG_ERR, "DISK %d: %s not found in the system.\n", diskp->id, diskp->nodepath); } continue; } if ((diskp->fd = open(diskp->devfs_path, O_RDONLY)) == -1) { diskp->present = B_FALSE; if (env_debug) { envd_log(LOG_ERR, "Error in opening %s errno = 0x%x\n", diskp->devfs_path, errno); } continue; } diskp->present = B_TRUE; diskp->tpage_supported = B_FALSE; diskp->smart_supported = B_FALSE; diskp->warning_tstamp = 0; diskp->shutdown_tstamp = 0; diskp->high_warning = disk_high_warn_temperature; diskp->low_warning = disk_low_warn_temperature; diskp->high_shutdown = disk_high_shutdown_temperature; diskp->low_shutdown = disk_low_shutdown_temperature; /* * Find out if the Temperature page is supported by the disk. */ if (scsi_log_sense(diskp, SUPPORTED_LPAGES, log_page, sizeof (log_page), 1) == 0) { page_len = ((log_page[2] << 8) & 0xFF00) | log_page[3]; for (page_index = LOGPAGEHDRSIZE; page_index < page_len + LOGPAGEHDRSIZE; page_index++) { if (log_page[page_index] != TEMPERATURE_PAGE) continue; diskp->tpage_supported = B_TRUE; if (env_debug) { envd_log(LOG_ERR, "tpage supported for %s\n", diskp->nodepath); } } } /* * If the temp log page failed, we can check if this is * a SATA drive and attempt to read the temperature * using the SMART interface. */ if (diskp->tpage_supported != B_TRUE) { uchar_t iec_page[IEC_PAGE_SIZE]; if (env_debug) envd_log(LOG_ERR, "Turning on SMART\n"); (void) memset(iec_page, 0, sizeof (iec_page)); iec_page[0] = IEC_PAGE; /* SMART PAGE */ iec_page[1] = 0xa; /* length */ /* Notification, only when requested */ iec_page[3] = REPORT_ON_REQUEST; ret = scsi_mode_select(diskp, IEC_PAGE, iec_page, sizeof (iec_page)); /* * Since we know this is a SMART capable * drive, we will try to set the page and * determine if the drive is not capable * of reading the TEMP page when we * try to read the temperature and disable * it then. We do not fail when reading * or writing this page because we will * determine the SMART capabilities * when reading the temperature. */ if ((ret != 0) && (env_debug)) { envd_log(LOG_ERR, "Failed to set mode page"); } diskp->smart_supported = B_TRUE; diskp->tpage_supported = B_TRUE; } if (get_disk_temp(diskp) < 0) { envd_log(LOG_ERR, " error reading temperature of:%s\n", diskp->name); } else if (env_debug) { envd_log(LOG_ERR, "%s: temperature = %d\n", diskp->name, diskp->current_temp); } } return (0); } static int envd_es_setup(void) { seeprom_scn_t scn_hdr; seeprom_seg_t seg_hdr; es_data_t *envseg; es_sensor_t *sensorp; int i, fd, id; int envseg_len, esd_len; char *envsegp; /* * Open the front io fru */ if ((fd = open(iofru_devname, O_RDONLY)) == -1) { envd_log(LOG_ERR, ENV_FRU_OPEN_FAIL, iofru_devname, errno); return (-1); } /* * Read section header from the fru SEEPROM */ if (lseek(fd, SSCN_OFFSET, SEEK_SET) == (off_t)-1 || read(fd, &scn_hdr, sizeof (scn_hdr)) != sizeof (scn_hdr)) { envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname); (void) close(fd); return (-1); } if ((scn_hdr.sscn_tag != SSCN_TAG) || (GET_UNALIGN16(&scn_hdr.sscn_ver) != SSCN_VER)) { envd_log(LOG_ERR, ENV_FRU_BAD_SCNHDR, scn_hdr.sscn_tag, GET_UNALIGN16(&scn_hdr.sscn_ver)); (void) close(fd); return (-1); } /* * Locate environmental segment */ for (i = 0; i < scn_hdr.sscn_nsegs; i++) { if (read(fd, &seg_hdr, sizeof (seg_hdr)) != sizeof (seg_hdr)) { envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname); (void) close(fd); return (-1); } if (env_debug) { envd_log(LOG_INFO, "Seg name: %x off:%x len:%x\n", GET_UNALIGN16(&seg_hdr.sseg_name), GET_UNALIGN16(&seg_hdr.sseg_off), GET_UNALIGN16(&seg_hdr.sseg_len)); } if (GET_UNALIGN16(&seg_hdr.sseg_name) == ENVSEG_NAME) break; } if (i == scn_hdr.sscn_nsegs) { envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname); (void) close(fd); return (-1); } /* * Read environmental segment */ envseg_len = GET_UNALIGN16(&seg_hdr.sseg_len); if ((envseg = malloc(envseg_len)) == NULL) { envd_log(LOG_ERR, ENV_FRU_NOMEM_FOR_SEG, envseg_len); (void) close(fd); return (-1); } if (lseek(fd, (off_t)GET_UNALIGN16(&seg_hdr.sseg_off), SEEK_SET) == (off_t)-1 || read(fd, envseg, envseg_len) != envseg_len) { envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname); free(envseg); (void) close(fd); return (-1); } /* * Check environmental segment data for consistency */ esd_len = sizeof (*envseg) + (envseg->esd_nsensors - 1) * sizeof (envseg->esd_sensors[0]); if (envseg->esd_ver != ENVSEG_VERSION || envseg_len < esd_len) { envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname); free(envseg); (void) close(fd); return (-1); } /* * Process environmental segment data */ if (envseg->esd_nsensors > MAX_SENSORS) { envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname); free(envseg); (void) close(fd); return (-1); } sensorp = &(envseg->esd_sensors[0]); envsegp = (char *)envseg; for (i = 0; i < envseg->esd_nsensors; i++) { uint32_t ess_id; (void) memcpy(&ess_id, sensorp->ess_id, sizeof (sensorp->ess_id)); if (env_debug) { envd_log(LOG_INFO, "\n Sensor Id %x offset %x", ess_id, sensorp->ess_off); } if (ess_id >= MAX_SENSORS) { envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname); free(envseg); (void) close(fd); return (-1); } (void) memcpy(&sensor_ctl[ess_id], &envsegp[sensorp->ess_off], sizeof (es_sensor_blk_t)); sensorp++; } /* * Match sensor/ES id and point to correct data based on IDs */ for (i = 0; i < N_ENVD_SENSORS; i++) { id = envd_sensors[i]->id; envd_sensors[i]->es = &sensor_ctl[id]; } /* * Cleanup and return */ free(envseg); (void) close(fd); return (0); } static void envd_es_default_setup(void) { int i, id; for (i = 0; i < N_ENVD_SENSORS; i++) { id = envd_sensors[i]->id; envd_sensors[i]->es = &sensor_default_ctl[id]; } } /* * Open temperature sensor devices and initialize per sensor data structure. */ static int envd_setup_sensors(void) { env_sensor_t *sensorp; int sensorcnt = 0; int i; picl_nodehdl_t tnodeh; for (i = 0; i < N_ENVD_SENSORS; i++) { if (env_debug) envd_log(LOG_ERR, "scanning sensor %d\n", i); sensorp = envd_sensors[i]; /* Initialize sensor's initial state */ sensorp->shutdown_initiated = B_FALSE; sensorp->warning_tstamp = 0; sensorp->shutdown_tstamp = 0; sensorp->error = 0; /* Make sure cpu0/1 sensors are present */ if (sensorp->id == CPU0_SENSOR_ID) { if (ptree_get_node_by_path(CPU0_PATH, &tnodeh) != PICL_SUCCESS) { if (env_debug) { envd_log(LOG_ERR, "get node by path failed for %s\n", CPU0_PATH); } sensorp->present = B_FALSE; continue; } } if (sensorp->id == CPU1_SENSOR_ID) { if (ptree_get_node_by_path(CPU1_PATH, &tnodeh) != PICL_SUCCESS) { if (env_debug) { envd_log(LOG_ERR, "get node by path failed for %s\n", CPU1_PATH); } sensorp->present = B_FALSE; continue; } } sensorp->fd = open(sensorp->devfs_path, O_RDWR); if (sensorp->fd == -1) { if (env_debug) { envd_log(LOG_ERR, ENV_SENSOR_OPEN_FAIL, sensorp->name, sensorp->devfs_path, errno, strerror(errno)); } sensorp->present = B_FALSE; continue; } /* * Determine if the front panel is attached, we want the * information if it exists, but should not shut down * the system if it is removed. */ if (sensorp->id == FRONT_PANEL_SENSOR_ID) { tempr_t temp; int tries; for (tries = 0; tries < MAX_SENSOR_RETRIES; tries++) { if (ioctl(sensorp->fd, PIC_GET_TEMPERATURE, &temp) == 0) { break; } (void) sleep(1); } if (tries == MAX_SENSOR_RETRIES) sensorp->present = B_FALSE; } sensorp->present = B_TRUE; sensorcnt++; } if (sensorcnt == 0) return (-1); return (0); } /* ARGSUSED */ static void * pmthr(void *args) { pm_state_change_t pmstate; char physpath[PATH_MAX]; int pre_lpstate; uint8_t estar_state; int env_monitor_fd; pmstate.physpath = physpath; pmstate.size = sizeof (physpath); cur_lpstate = 0; pre_lpstate = 1; pm_fd = open(PM_DEVICE, O_RDWR); if (pm_fd == -1) { envd_log(LOG_ERR, PM_THREAD_EXITING, errno, strerror(errno)); return (NULL); } for (;;) { /* * Get PM state change events to check if the system * is in lowest power state and inform PIC which controls * fan speeds. * * To minimize polling, we use the blocking interface * to get the power state change event here. */ if (ioctl(pm_fd, PM_GET_STATE_CHANGE_WAIT, &pmstate) != 0) { if (errno != EINTR) break; continue; } do { if (env_debug) { envd_log(LOG_INFO, "pmstate event:0x%x flags:%x" "comp:%d oldval:%d newval:%d path:%s\n", pmstate.event, pmstate.flags, pmstate.component, pmstate.old_level, pmstate.new_level, pmstate.physpath); } cur_lpstate = (pmstate.flags & PSC_ALL_LOWEST) ? 1 : 0; } while (ioctl(pm_fd, PM_GET_STATE_CHANGE, &pmstate) == 0); if (pre_lpstate != cur_lpstate) { pre_lpstate = cur_lpstate; estar_state = (cur_lpstate & 0x1); if (env_debug) envd_log(LOG_ERR, "setting PIC ESTAR SATE to %x\n", estar_state); env_monitor_fd = open(ENV_MONITOR_DEVFS, O_RDWR); if (env_monitor_fd != -1) { if (ioctl(env_monitor_fd, PIC_SET_ESTAR_MODE, &estar_state) < 0) { if (env_debug) envd_log(LOG_ERR, "unable to set ESTAR_MODE in PIC\n"); } (void) close(env_monitor_fd); } else { if (env_debug) envd_log(LOG_ERR, "Failed to open %s\n", ENV_MONITOR_DEVFS); } } } /*NOTREACHED*/ return (NULL); } /* * This is env thread which monitors the current temperature when * warning threshold is exceeded. The job is to make sure it does * not execced/decrease shutdown threshold. If it does it will start * forced shutdown to avoid reaching hardware poweroff via THERM interrupt. */ /*ARGSUSED*/ static void * system_temp_thr(void *args) { char syscmd[BUFSIZ]; char msgbuf[BUFSIZ]; timespec_t to; int ret, i; env_sensor_t *sensorp; pthread_mutex_t env_monitor_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_cond_t env_monitor_cv = PTHREAD_COND_INITIALIZER; time_t ct; tempr_t temp; for (;;) { /* * Sleep for specified seconds before issuing IOCTL * again. */ (void) pthread_mutex_lock(&env_monitor_mutex); ret = pthread_cond_reltimedwait_np(&env_monitor_cv, &env_monitor_mutex, &to); to.tv_sec = sensor_scan_interval; to.tv_nsec = 0; if (ret != ETIMEDOUT) { (void) pthread_mutex_unlock(&env_monitor_mutex); continue; } (void) pthread_mutex_unlock(&env_monitor_mutex); for (i = 0; i < N_ENVD_SENSORS; i++) { sensorp = envd_sensors[i]; if (sensorp->present == B_FALSE) continue; if (get_temperature(sensorp, &temp) == -1) continue; sensorp->cur_temp = temp; if (env_debug) { envd_log(LOG_ERR, "%s temp = %d", sensorp->name, sensorp->cur_temp); } /* * If this sensor already triggered system shutdown, * don't log any more shutdown/warning messages for it. */ if (sensorp->shutdown_initiated) continue; /* * Check for the temperature in warning and shutdown * range and take appropriate action. */ if (SENSOR_TEMP_IN_WARNING_RANGE(sensorp->cur_temp, sensorp)) { /* * Check if the temperature has been in * warning range during last * sensor_warning_duration interval. If so, * the temperature is truly in warning range * and we need to log a warning message, but * no more than once every * sensor_warning_interval seconds. */ time_t wtstamp = sensorp->warning_tstamp; ct = (time_t)(gethrtime() / NANOSEC); if (sensorp->warning_start == 0) sensorp->warning_start = ct; if (((ct - sensorp->warning_start) >= sensor_warning_duration) && (wtstamp == 0 || (ct - wtstamp) >= sensor_warning_interval)) { envd_log(LOG_CRIT, ENV_WARNING_MSG, sensorp->name, sensorp->cur_temp, (int8_t) sensorp->es->esb_low_warning, (int8_t) sensorp->es->esb_high_warning); sensorp->warning_tstamp = ct; } } else if (sensorp->warning_start != 0) sensorp->warning_start = 0; if (!shutdown_override && SENSOR_TEMP_IN_SHUTDOWN_RANGE(sensorp->cur_temp, sensorp)) { ct = (time_t)(gethrtime() / NANOSEC); if (sensorp->shutdown_tstamp == 0) sensorp->shutdown_tstamp = ct; /* * Shutdown the system if the temperature * remains in the shutdown range for over * sensor_shutdown_interval seconds. */ if ((ct - sensorp->shutdown_tstamp) >= sensor_shutdown_interval) { /* * Log error */ sensorp->shutdown_initiated = B_TRUE; (void) snprintf(msgbuf, sizeof (msgbuf), ENV_SHUTDOWN_MSG, sensorp->name, sensorp->cur_temp, (int8_t) sensorp->es->esb_low_shutdown, (int8_t) sensorp->es->esb_high_shutdown); envd_log(LOG_ALERT, msgbuf); /* * Shutdown the system (only once) */ if (system_shutdown_started == B_FALSE) { (void) snprintf(syscmd, sizeof (syscmd), "%s \"%s\"", shutdown_cmd, msgbuf); envd_log(LOG_ALERT, syscmd); system_shutdown_started = B_TRUE; (void) system(syscmd); } } } else if (sensorp->shutdown_tstamp != 0) sensorp->shutdown_tstamp = 0; } } /* end of forever loop */ /*NOTREACHED*/ return (NULL); } static int scsi_log_sense(env_disk_t *diskp, uchar_t page_code, void *pagebuf, uint16_t pagelen, int page_control) { struct uscsi_cmd ucmd_buf; uchar_t cdb_buf[CDB_GROUP1]; struct scsi_extended_sense sense_buf; int ret_val; bzero(&cdb_buf, sizeof (cdb_buf)); bzero(&ucmd_buf, sizeof (ucmd_buf)); bzero(&sense_buf, sizeof (sense_buf)); cdb_buf[0] = SCMD_LOG_SENSE_G1; /* * For SATA we need to have the current threshold value set. * For SAS drives we can use the current cumulative value. * This is set for non-SMART drives, by passing a non-zero * page_control. */ if (page_control) cdb_buf[2] = (0x01 << 6) | page_code; else cdb_buf[2] = page_code; cdb_buf[7] = (uchar_t)((pagelen & 0xFF00) >> 8); cdb_buf[8] = (uchar_t)(pagelen & 0x00FF); ucmd_buf.uscsi_cdb = (char *)cdb_buf; ucmd_buf.uscsi_cdblen = sizeof (cdb_buf); ucmd_buf.uscsi_bufaddr = (caddr_t)pagebuf; ucmd_buf.uscsi_buflen = pagelen; ucmd_buf.uscsi_rqbuf = (caddr_t)&sense_buf; ucmd_buf.uscsi_rqlen = sizeof (struct scsi_extended_sense); ucmd_buf.uscsi_flags = USCSI_RQENABLE | USCSI_READ | USCSI_SILENT; ucmd_buf.uscsi_timeout = DEFAULT_SCSI_TIMEOUT; ret_val = ioctl(diskp->fd, USCSICMD, ucmd_buf); if ((ret_val == 0) && (ucmd_buf.uscsi_status == 0)) { if (env_debug) envd_log(LOG_ERR, "log sense command for page_code 0x%x succeeded\n", page_code); return (ret_val); } if (env_debug) envd_log(LOG_ERR, "log sense command for %s failed. " "page_code 0x%x ret_val = 0x%x " "status = 0x%x errno = 0x%x\n", diskp->name, page_code, ret_val, ucmd_buf.uscsi_status, errno); return (1); } static int get_disk_temp(env_disk_t *diskp) { int ret; uchar_t tpage[256]; if (diskp->smart_supported == B_TRUE) { smart_structure smartpage; smart_attribute *temp_attrib = NULL; uint8_t checksum; uint8_t *index; int i; bzero(&smartpage, sizeof (smartpage)); ret = scsi_log_sense(diskp, GET_SMART_INFO, &smartpage, sizeof (smartpage), 0); if (ret != 0) { diskp->current_temp = DISK_INVALID_TEMP; diskp->ref_temp = DISK_INVALID_TEMP; return (-1); } /* * verify the checksum of the data. A 2's compliment * of the result addition of the is stored in the * last byte. The sum of all the checksum should be * 0. If the checksum is bad, return an error for * this iteration. */ index = (uint8_t *)&smartpage; for (i = checksum = 0; i < 512; i++) checksum += index[i]; if ((checksum != 0) && env_debug) { envd_log(LOG_ERR, "SMART checksum error! 0x%x\n", checksum); /* * We got bad data back from the drive, fail this * time around and picl will retry again. If this * continues to fail picl will give this drive a * failed status. */ diskp->current_temp = DISK_INVALID_TEMP; diskp->ref_temp = DISK_INVALID_TEMP; return (-1); } /* * Scan through the various SMART data and look for * the complete drive temp. */ for (i = 0; (i < SMART_FIELDS) && (smartpage.attribute[i].id != 0) && (temp_attrib == NULL); i++) { if (smartpage.attribute[i].id == HDA_TEMP) { temp_attrib = &smartpage.attribute[i]; } } /* * If we dont find any temp SMART attributes, this drive * does not support this page, disable temp checking * for this drive. */ if (temp_attrib == NULL) { /* * If the checksum is valid, the temp. attributes are * not supported, disable this drive from temp. * checking. */ if (env_debug) envd_log(LOG_ERR, "Temp ATTRIBUTE not supported\n"); diskp->smart_supported = B_FALSE; diskp->tpage_supported = B_FALSE; diskp->current_temp = DISK_INVALID_TEMP; diskp->ref_temp = DISK_INVALID_TEMP; return (-1); } if (env_debug) { envd_log(LOG_ERR, "flags = 0x%x%x,curr = 0x%x," "data = 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", temp_attrib->flags[0], temp_attrib->flags[1], temp_attrib->raw_data[0], temp_attrib->raw_data[1], temp_attrib->raw_data[2], temp_attrib->raw_data[3], temp_attrib->raw_data[4], temp_attrib->raw_data[5], temp_attrib->raw_data[6], temp_attrib->raw_data[7]); } if (temp_attrib->raw_data[1] != 0xFF) { diskp->current_temp = temp_attrib->raw_data[2]; diskp->ref_temp = temp_attrib->raw_data[2]; } else { diskp->ref_temp = DISK_INVALID_TEMP; diskp->current_temp = DISK_INVALID_TEMP; return (-1); } } else { ret = scsi_log_sense(diskp, TEMPERATURE_PAGE, tpage, sizeof (tpage), 1); if (ret != 0) { diskp->current_temp = DISK_INVALID_TEMP; diskp->ref_temp = DISK_INVALID_TEMP; return (-1); } /* * For the current temperature verify that the parameter * length is 0x02 and the parameter code is 0x00 * Temperature value of 255(0xFF) is considered INVALID. */ if ((tpage[7] == 0x02) && (tpage[4] == 0x00) && (tpage[5] == 0x00)) { if (tpage[9] == 0xFF) { diskp->current_temp = DISK_INVALID_TEMP; return (-1); } else { diskp->current_temp = tpage[9]; } } /* * For the reference temperature verify that the parameter * length is 0x02 and the parameter code is 0x01 * Temperature value of 255(0xFF) is considered INVALID. */ if ((tpage[13] == 0x02) && (tpage[10] == 0x00) && (tpage[11] == 0x01)) { if (tpage[15] == 0xFF) { diskp->ref_temp = DISK_INVALID_TEMP; } else { diskp->ref_temp = tpage[15]; } } } return (0); } /* ARGSUSED */ static void * disk_temp_thr(void *args) { char syscmd[BUFSIZ]; char msgbuf[BUFSIZ]; timespec_t to; int ret, i; env_disk_t *diskp; pthread_mutex_t env_monitor_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_cond_t env_monitor_cv = PTHREAD_COND_INITIALIZER; pm_state_change_t pmstate; int idle_time; int disk_pm_fd; time_t ct; if ((disk_pm_fd = open(PM_DEVICE, O_RDWR)) == -1) { envd_log(LOG_ERR, DISK_TEMP_THREAD_EXITING, errno, strerror(errno)); return (NULL); } for (;;) { /* * Sleep for specified seconds before issuing IOCTL * again. */ (void) pthread_mutex_lock(&env_monitor_mutex); ret = pthread_cond_reltimedwait_np(&env_monitor_cv, &env_monitor_mutex, &to); to.tv_sec = disk_scan_interval; to.tv_nsec = 0; if (ret != ETIMEDOUT) { (void) pthread_mutex_unlock( &env_monitor_mutex); continue; } (void) pthread_mutex_unlock(&env_monitor_mutex); for (i = 0; (diskp = envd_disks[i]) != NULL; i++) { if (diskp->present == B_FALSE) continue; if (diskp->tpage_supported == B_FALSE) continue; /* * If the disk temperature is above the warning threshold * continue monitoring until the temperature drops below * warning threshold. * if the temperature is in the NORMAL range monitor only * when the disk is BUSY. * We do not want to read the disk temperature if the disk is * is idling. The reason for this is disk will never get into * lowest power mode if we scan the disk temperature * peridoically. To avoid this situation we first determine * the idle_time of the disk. If the disk has been IDLE since * we scanned the temperature last time we will not read the * temperature. */ if (!DISK_TEMP_IN_WARNING_RANGE(diskp->current_temp, diskp)) { pmstate.physpath = diskp->physpath; pmstate.size = strlen(diskp->physpath); pmstate.component = 0; if ((idle_time = ioctl(disk_pm_fd, PM_GET_TIME_IDLE, &pmstate)) == -1) { if (errno != EINTR) { if (env_debug) envd_log(LOG_ERR, "ioctl PM_GET_TIME_IDLE failed for DISK0. errno=0x%x\n", errno); continue; } continue; } if (idle_time >= (disk_scan_interval/2)) { if (env_debug) { envd_log(LOG_ERR, "%s idle time = %d\n", diskp->name, idle_time); } continue; } } ret = get_disk_temp(diskp); if (ret != 0) continue; if (env_debug) { envd_log(LOG_ERR, "%s temp = %d ref. temp = %d\n", diskp->name, diskp->current_temp, diskp->ref_temp); } /* * If this disk already triggered system shutdown, don't * log any more shutdown/warning messages for it. */ if (diskp->shutdown_initiated) continue; /* * Check for the temperature in warning and shutdown range * and take appropriate action. */ if (DISK_TEMP_IN_WARNING_RANGE(diskp->current_temp, diskp)) { /* * Check if the temperature has been in warning * range during last disk_warning_duration interval. * If so, the temperature is truly in warning * range and we need to log a warning message, * but no more than once every disk_warning_interval * seconds. */ time_t wtstamp = diskp->warning_tstamp; ct = (time_t)(gethrtime() / NANOSEC); if (diskp->warning_start == 0) diskp->warning_start = ct; if (((ct - diskp->warning_start) >= disk_warning_duration) && (wtstamp == 0 || (ct - wtstamp) >= disk_warning_interval)) { envd_log(LOG_CRIT, ENV_WARNING_MSG, diskp->name, diskp->current_temp, diskp->low_warning, diskp->high_warning); diskp->warning_tstamp = ct; } } else if (diskp->warning_start != 0) diskp->warning_start = 0; if (!shutdown_override && DISK_TEMP_IN_SHUTDOWN_RANGE(diskp->current_temp, diskp)) { ct = (time_t)(gethrtime() / NANOSEC); if (diskp->shutdown_tstamp == 0) diskp->shutdown_tstamp = ct; /* * Shutdown the system if the temperature remains * in the shutdown range for over disk_shutdown_interval * seconds. */ if ((ct - diskp->shutdown_tstamp) >= disk_shutdown_interval) { /* log error */ diskp->shutdown_initiated = B_TRUE; (void) snprintf(msgbuf, sizeof (msgbuf), ENV_SHUTDOWN_MSG, diskp->name, diskp->current_temp, diskp->low_shutdown, diskp->high_shutdown); envd_log(LOG_ALERT, msgbuf); /* shutdown the system (only once) */ if (system_shutdown_started == B_FALSE) { (void) snprintf(syscmd, sizeof (syscmd), "%s \"%s\"", shutdown_cmd, msgbuf); envd_log(LOG_ALERT, syscmd); system_shutdown_started = B_TRUE; (void) system(syscmd); } } } else if (diskp->shutdown_tstamp != 0) diskp->shutdown_tstamp = 0; } } /* end of forever loop */ } static void * fan_thr(void *args) { char msgbuf[BUFSIZ]; timespec_t to; int ret, i; pthread_mutex_t env_monitor_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_cond_t env_monitor_cv = PTHREAD_COND_INITIALIZER; env_fan_t *fanp; #ifdef __lint args = args; #endif for (;;) { /* * Sleep for specified seconds before issuing IOCTL * again. */ (void) pthread_mutex_lock(&env_monitor_mutex); ret = pthread_cond_reltimedwait_np(&env_monitor_cv, &env_monitor_mutex, &to); to.tv_sec = fan_scan_interval; to.tv_nsec = 0; if (ret != ETIMEDOUT) { (void) pthread_mutex_unlock(&env_monitor_mutex); continue; } (void) pthread_mutex_unlock(&env_monitor_mutex); for (i = 0; (fanp = envd_fans[i]) != NULL; i++) { if (fanp->present == B_FALSE) continue; if (has_fan_failed(fanp) == B_TRUE) { if (fanp->last_status == FAN_FAILED) continue; fanp->last_status = FAN_FAILED; (void) snprintf(msgbuf, sizeof (msgbuf), ENV_FAN_FAILURE_WARNING_MSG, fanp->name, fan_rpm_string, fan_status_string); envd_log(LOG_ALERT, msgbuf); } else { if (fanp->last_status == FAN_OK) continue; fanp->last_status = FAN_OK; (void) snprintf(msgbuf, sizeof (msgbuf), ENV_FAN_OK_MSG, fanp->name); envd_log(LOG_ALERT, msgbuf); } } if (has_psufan_failed() == B_TRUE) { if (psufan_last_status == FAN_FAILED) continue; psufan_last_status = FAN_FAILED; (void) snprintf(msgbuf, sizeof (msgbuf), ENV_FAN_FAILURE_WARNING_MSG, SENSOR_PSU, fan_rpm_string, fan_status_string); envd_log(LOG_ALERT, msgbuf); } else { if (psufan_last_status == FAN_OK) continue; psufan_last_status = FAN_OK; (void) snprintf(msgbuf, sizeof (msgbuf), ENV_FAN_OK_MSG, SENSOR_PSU); envd_log(LOG_ALERT, msgbuf); } } /*NOTREACHED*/ return (NULL); } /* * Setup envrionmental monitor state and start threads to monitor * temperature, fan, disk and power management state. * Returns -1 on error, 0 if successful. */ static int envd_setup(void) { if (getenv("SUNW_piclenvd_debug") != NULL) env_debug = 1; if (pthread_attr_init(&thr_attr) != 0 || pthread_attr_setscope(&thr_attr, PTHREAD_SCOPE_SYSTEM) != 0) { return (-1); } /* * If ES segment is not present or has inconsistent information, we * use default values for sensor limits. For the sake of simplicity, * we still store these limits internally in the 'es' member in the * structure. */ if (envd_es_setup() < 0) { envd_log(LOG_WARNING, ENV_DEFAULT_LIMITS); envd_es_default_setup(); } if (envd_setup_sensors() < 0) { if (env_debug) envd_log(LOG_ERR, "Failed to setup sensors\n"); system_temp_monitor = 0; } if (envd_setup_fans() < 0) { if (env_debug) envd_log(LOG_ERR, "Failed to setup fans\n"); fan_monitor = 0; pm_monitor = 0; } /* * Disable disk temperature monitoring until we have * LSI fw support to read SATA disk temperature */ if (disk_temp_monitor) { if (envd_setup_disks() < 0) { if (env_debug) envd_log(LOG_ERR, "Failed to setup disks\n"); disk_temp_monitor = 0; } } /* * Create a thread to monitor system temperatures */ if ((system_temp_monitor) && (system_temp_thr_created == B_FALSE)) { if (pthread_create(&system_temp_thr_id, &thr_attr, system_temp_thr, NULL) != 0) { envd_log(LOG_ERR, ENVTHR_THREAD_CREATE_FAILED); } else { system_temp_thr_created = B_TRUE; if (env_debug) envd_log(LOG_ERR, "Created thread to monitor system temperatures\n"); } } /* * Create a thread to monitor fans */ if ((fan_monitor) && (fan_thr_created == B_FALSE)) { if (pthread_create(&fan_thr_id, &thr_attr, fan_thr, NULL) != 0) envd_log(LOG_ERR, ENVTHR_THREAD_CREATE_FAILED); else { fan_thr_created = B_TRUE; if (env_debug) { envd_log(LOG_ERR, "Created thread to monitor system fans\n"); } } } /* * Create a thread to monitor PM state */ if ((pm_monitor) && (pmthr_created == B_FALSE)) { if (pthread_create(&pmthr_tid, &thr_attr, pmthr, NULL) != 0) envd_log(LOG_CRIT, PM_THREAD_CREATE_FAILED); else { pmthr_created = B_TRUE; if (env_debug) envd_log(LOG_ERR, "Created thread to monitor system power state\n"); } } /* * Create a thread to monitor disk temperature */ if ((disk_temp_monitor) && (disk_temp_thr_created == B_FALSE)) { if (pthread_create(&disk_temp_thr_id, &thr_attr, disk_temp_thr, NULL) != 0) { envd_log(LOG_ERR, ENVTHR_THREAD_CREATE_FAILED); } else { disk_temp_thr_created = B_TRUE; if (env_debug) envd_log(LOG_ERR, "Created thread for disk temperatures\n"); } } return (0); } static void piclenvd_register(void) { picld_plugin_register(&my_reg_info); } static void piclenvd_init(void) { (void) env_picl_setup_tuneables(); /* * Do not allow disk temperature monitoring to be enabled * via tuneables. Disk temperature monitoring is disabled * until we have LSI fw support to read the temperature of * SATA disks */ disk_temp_monitor = 0; /* * Setup the environmental data structures */ if (envd_setup() != 0) { envd_log(LOG_CRIT, ENVD_PLUGIN_INIT_FAILED); return; } /* * Now setup/populate PICL tree */ env_picl_setup(); } static void piclenvd_fini(void) { /* * Invoke env_picl_destroy() to remove any PICL nodes/properties * (including volatile properties) we created. Once this call * returns, there can't be any more calls from the PICL framework * to get current temperature or fan speed. */ env_picl_destroy(); envd_close_sensors(); envd_close_fans(); } /*VARARGS2*/ void envd_log(int pri, const char *fmt, ...) { va_list ap; va_start(ap, fmt); vsyslog(pri, fmt, ap); va_end(ap); } /* * Tunables support functions */ static env_tuneable_t * tuneable_lookup(picl_prophdl_t proph) { int i; env_tuneable_t *tuneablep = NULL; for (i = 0; i < ntuneables; i++) { tuneablep = &tuneables[i]; if (tuneablep->proph == proph) return (tuneablep); } return (NULL); } static int get_string_val(ptree_rarg_t *parg, void *buf) { picl_prophdl_t proph; env_tuneable_t *tuneablep; proph = parg->proph; tuneablep = tuneable_lookup(proph); if (tuneablep == NULL) return (PICL_FAILURE); (void) memcpy(buf, tuneablep->value, tuneablep->nbytes); return (PICL_SUCCESS); } static int set_string_val(ptree_warg_t *parg, const void *buf) { picl_prophdl_t proph; env_tuneable_t *tuneablep; if (parg->cred.dc_euid != 0) return (PICL_PERMDENIED); proph = parg->proph; tuneablep = tuneable_lookup(proph); if (tuneablep == NULL) return (PICL_FAILURE); (void) memcpy(tuneables->value, buf, tuneables->nbytes); return (PICL_SUCCESS); } static int get_int_val(ptree_rarg_t *parg, void *buf) { picl_prophdl_t proph; env_tuneable_t *tuneablep; proph = parg->proph; tuneablep = tuneable_lookup(proph); if (tuneablep == NULL) return (PICL_FAILURE); (void) memcpy(buf, tuneablep->value, tuneablep->nbytes); return (PICL_SUCCESS); } static int set_int_val(ptree_warg_t *parg, const void *buf) { picl_prophdl_t proph; env_tuneable_t *tuneablep; if (parg->cred.dc_euid != 0) return (PICL_PERMDENIED); proph = parg->proph; tuneablep = tuneable_lookup(proph); if (tuneablep == NULL) return (PICL_FAILURE); (void) memcpy(tuneablep->value, buf, tuneablep->nbytes); return (PICL_SUCCESS); } boolean_t has_fan_failed(env_fan_t *fanp) { fanspeed_t fan_speed; uchar_t status; uint8_t tach; int real_tach; int ret, ntries; if (fanp->fd == -1) return (B_TRUE); /* * Read RF_FAN_STATUS bit of the fan fault register, retry if * the PIC is busy, with a 1 second delay to allow it to update. */ for (ntries = 0; ntries < MAX_RETRIES_FOR_FAN_FAULT; ntries++) { ret = ioctl(fanp->fd, PIC_GET_FAN_STATUS, &status); if ((ret == 0) && ((status & 0x1) == 0)) break; (void) sleep(1); } if (ntries > 0) { if (env_debug) { envd_log(LOG_ERR, "%d retries attempted in reading fan status.\n", ntries); } } if (ntries == MAX_RETRIES_FOR_FAN_FAULT) { (void) strncpy(fan_status_string, NOT_AVAILABLE, sizeof (fan_status_string)); (void) strncpy(fan_rpm_string, NOT_AVAILABLE, sizeof (fan_rpm_string)); return (B_TRUE); } if (env_debug) envd_log(LOG_ERR, "fan status = 0x%x\n", status); /* * ST_FFAULT bit isn't implemented yet and we're reading only * individual fan status */ if (status & 0x1) { (void) snprintf(fan_status_string, sizeof (fan_status_string), "0x%x", status); if (ioctl(fanp->fd, PIC_GET_FAN_SPEED, &tach) != 0) { (void) strncpy(fan_rpm_string, NOT_AVAILABLE, sizeof (fan_rpm_string)); } else { real_tach = tach << 8; fan_speed = TACH_TO_RPM(real_tach); (void) snprintf(fan_rpm_string, sizeof (fan_rpm_string), "%d", fan_speed); } return (B_TRUE); } return (B_FALSE); } boolean_t has_psufan_failed(void) { uchar_t status; int ret, ntries; if (envd_sensor_psu.fd == -1) return (B_FALSE); /* * For psu, only fan fault is visible, no fan speed */ (void) strncpy(fan_rpm_string, NOT_AVAILABLE, sizeof (fan_rpm_string)); /* * Read RF_FAN_STATUS bit of the fan fault register, retry if * the PIC is busy, with a 1 second delay to allow it to update. */ for (ntries = 0; ntries < MAX_RETRIES_FOR_FAN_FAULT; ntries++) { ret = ioctl(envd_sensor_psu.fd, PIC_GET_FAN_STATUS, &status); if ((ret == 0) && ((status & 0x1) == 0)) break; (void) sleep(1); } if (ntries > 0) { if (env_debug) { envd_log(LOG_ERR, "%d retries attempted in reading fan status.\n", ntries); } } if (ntries == MAX_RETRIES_FOR_FAN_FAULT) { (void) strncpy(fan_status_string, NOT_AVAILABLE, sizeof (fan_status_string)); return (B_TRUE); } if (env_debug) envd_log(LOG_ERR, "fan status = 0x%x\n", status); if (status & 0x1) { (void) snprintf(fan_status_string, sizeof (fan_status_string), "0x%x", status); return (B_TRUE); } return (B_FALSE); } static int scsi_mode_select(env_disk_t *diskp, uchar_t page_code, uchar_t *pagebuf, uint16_t pagelen) { struct uscsi_cmd ucmd_buf; uchar_t cdb_buf[CDB_GROUP1]; struct scsi_extended_sense sense_buf; int ret_val; bzero(&cdb_buf, sizeof (cdb_buf)); bzero(&ucmd_buf, sizeof (ucmd_buf)); bzero(&sense_buf, sizeof (sense_buf)); cdb_buf[0] = SCMD_MODE_SELECT_G1; cdb_buf[1] = 1<> 8); cdb_buf[8] = (uchar_t)(pagelen & 0x00FF); ucmd_buf.uscsi_cdb = (char *)cdb_buf; ucmd_buf.uscsi_cdblen = sizeof (cdb_buf); ucmd_buf.uscsi_bufaddr = (caddr_t)pagebuf; ucmd_buf.uscsi_buflen = pagelen; ucmd_buf.uscsi_rqbuf = (caddr_t)&sense_buf; ucmd_buf.uscsi_rqlen = sizeof (struct scsi_extended_sense); ucmd_buf.uscsi_flags = USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT; ucmd_buf.uscsi_timeout = DEFAULT_SCSI_TIMEOUT; ret_val = ioctl(diskp->fd, USCSICMD, ucmd_buf); if (ret_val == 0 && ucmd_buf.uscsi_status == 0) { return (ret_val); } if (env_debug) envd_log(LOG_ERR, "mode select command for %s failed. " "page_code 0x%x ret_val = 0x%x " "status = 0x%x errno = 0x%x\n", diskp->name, page_code, ret_val, ucmd_buf.uscsi_status, errno); return (1); }