1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * This file contains the environmental PICL plug-in module.
29  */
30 
31 
32 /*
33  * Excalibur system contains up to two CPU and two PCI MAX1617 temperature
34  * devices, each consisting of two sensors: die and ambient. Each sensor is
35  * represented as a different minor device and the current temperature is read
36  * via an I2C_GET_TEMPERATURE ioctl call to the max1617 driver. Additionally,
37  * the MAX1617 device supports both a low and high temperature limit, which
38  * can trigger an alert condition, causing power supply to turn off.
39  *
40  * The environmental monitor defines the following thresholds per sensor:
41  *
42  *	high_power_off		high hard shutdown
43  *	high_shutdown		high soft shutdown limit
44  *	high_warning		high warning limit
45  *	low_warning		low warning limit
46  *	low_shutdown		low soft shutdown limit
47  *	low_power_off		low hard shutdown limit
48  *
49  * Above mentioned threshold values can be changed via "piclenvd.conf"
50  * configuration file.
51  *
52  * Environmental monitoring is done by the "envthr" thread. It periodically
53  * monitors both CPU die and CPU ambient temperatures and takes appropriate
54  * action depending upon the current temperature and threshold values for
55  * that sensor. If the temperature reaches the high_shutdown limit or the
56  * low_shutdown limit, and remains there for over shutdown_interval seconds,
57  * it forces a graceful system shutdown via tuneable shutdown_cmd string
58  * variable. Otherwise, if the temperature reaches the high_warning limit
59  * or the low_warning limit, it logs and prints a message on the console.
60  * This message will be printed at most at "warning_interval" seconds
61  * interval, which is also a tuneable variable.
62  *
63  * Excalibur system contains three fans: cpu, system and power supply. The
64  * cpu and system fans are under software control and their speed can be
65  * set to a value in the range 0 through 63. However, the software has no
66  * control over the power supply fan's speed (it's automatically controlled
67  * by the hardware), but it can turn it ON or OFF. When in EStar mode (i.e.
68  * the lowest power state), the environmental monitor turns off the power
69  * supply fan.
70  *
71  * Each fan is represented as a different minor device and the fan speed
72  * can be controlled by writing to the TDA8444 device driver. Note that
73  * these devices are read only and the driver caches the last speed set
74  * for each fan, thus allowing an interface to read the current fan speed
75  * also.
76  *
77  * The policy to control fan speed depends upon the sensor. For CPU die
78  * sensor, different policy is used depending upon whether the temperature
79  * is rising, falling or steady state. In case of CPU ambient sensor, only
80  * one policy (speed proportional to the current temperature) is used.
81  *
82  * The power state monitoring is done by the "pmthr" thread. It uses the
83  * PM_GET_STATE_CHANGE and PM_GET_STATE_CHANGE_WAIT ioctl commands to pick
84  * up any power state change events. It processes all queued power state
85  * change events and determines the curret lowest power state and saves it
86  * in cur_lpstate variable.
87  *
88  * Once the "envthr" and "pmthr" threads have been started, they are never
89  * killed. This is desirable so that we can do environmental monitoring
90  * during reinit process.  The "envd_rwlock" reader/writer lock is used
91  * to protect initialization of global state during reinit process against
92  * the "envthr" and "pmthr" trying to reference that state.
93  */
94 
95 #include <stdio.h>
96 #include <stdlib.h>
97 #include <sys/sysmacros.h>
98 #include <limits.h>
99 #include <string.h>
100 #include <stdarg.h>
101 #include <alloca.h>
102 #include <unistd.h>
103 #include <sys/processor.h>
104 #include <syslog.h>
105 #include <errno.h>
106 #include <fcntl.h>
107 #include <picl.h>
108 #include <picltree.h>
109 #include <picldefs.h>
110 #include <pthread.h>
111 #include <signal.h>
112 #include <libdevinfo.h>
113 #include <sys/pm.h>
114 #include <sys/open.h>
115 #include <sys/time.h>
116 #include <sys/utsname.h>
117 #include <sys/systeminfo.h>
118 #include <sys/i2c/clients/max1617.h>
119 #include <sys/i2c/clients/i2c_client.h>
120 #include <sys/xcalwd.h>
121 #include "envd.h"
122 
123 static pthread_rwlock_t	envd_rwlock = PTHREAD_RWLOCK_INITIALIZER;
124 
125 /*
126  * PICL plguin
127  */
128 static void piclenvd_register(void);
129 static void piclenvd_init(void);
130 static void piclenvd_fini(void);
131 extern void env_picl_setup(void);
132 extern void env_picl_destroy(void);
133 
134 #pragma	init(piclenvd_register)
135 
136 static picld_plugin_reg_t my_reg_info = {
137 	PICLD_PLUGIN_VERSION_1,
138 	PICLD_PLUGIN_CRITICAL,
139 	"SUNW_piclenvd",
140 	piclenvd_init,
141 	piclenvd_fini,
142 };
143 
144 
145 /*
146  * Default threshold values for CPU junction/die and ambient sensors
147  */
148 static sensor_thresh_t cpu_die_thresh_default = {
149 	CPU_DIE_LOW_POWER_OFF, CPU_DIE_HIGH_POWER_OFF,
150 	CPU_DIE_LOW_SHUTDOWN, CPU_DIE_HIGH_SHUTDOWN,
151 	CPU_DIE_LOW_WARNING, CPU_DIE_HIGH_WARNING,
152 	MAX1617_MIN_TEMP, MAX1617_MAX_TEMP,
153 	POLICY_TARGET_TEMP, 2,
154 	CPU_DIE_NORMAL_TARGET, CPU_DIE_OTHER_TARGET,
155 	0, 0, 0, 0
156 };
157 
158 static sensor_thresh_t cpu_amb_thresh_default = {
159 	CPU_AMB_LOW_POWER_OFF, CPU_AMB_HIGH_POWER_OFF,
160 	CPU_AMB_LOW_SHUTDOWN, CPU_AMB_HIGH_SHUTDOWN,
161 	CPU_AMB_LOW_WARNING, CPU_AMB_HIGH_WARNING,
162 	MAX1617_MIN_TEMP, MAX1617_MAX_TEMP,
163 	POLICY_LINEAR, 2,
164 	CPU_AMB_LOW_NOMINAL, CPU_AMB_HIGH_NOMINAL,
165 	0, 0, 0, 0
166 };
167 
168 
169 /*
170  * Dummy sensor threshold data structure for processing threshold tuneables
171  */
172 static sensor_thresh_t	dummy_thresh;
173 
174 /*
175  * Temperature related constants for fan speed adjustment
176  */
177 #define	AVG_TEMP_HYSTERESIS	0.25
178 #define	RISING_TEMP_MARGIN	6
179 #define	FALLING_TEMP_MARGIN	3
180 
181 /*
182  * tuneable variables
183  */
184 #define	FAN_SLOW_ADJUSTMENT	20		/* in percentage */
185 #define	FAN_INCREMENT_LIMIT	6		/* absolute value */
186 #define	FAN_DECREMENT_LIMIT	1		/* absolute value */
187 #define	DEVFSADM_CMD 		"/usr/sbin/devfsadm -i max1617"
188 #define	FRU_DEVFSADM_CMD 	"/usr/sbin/devfsadm -i seeprom"
189 
190 int		env_debug;
191 static int	sensor_poll_interval;
192 static int	warning_interval;
193 static int	warning_duration;
194 static int	shutdown_interval;
195 static int	fan_slow_adjustment;
196 static int	fan_incr_limit;
197 static int	fan_decr_limit;
198 static int	disable_piclenvd;
199 static int	disable_warning;
200 static int	disable_power_off;
201 static int	disable_shutdown;
202 
203 static char	shutdown_cmd[128];
204 static char	devfsadm_cmd[128];
205 static char	fru_devfsadm_cmd[128];
206 static sensor_thresh_t cpu0_die_thresh, cpu0_amb_thresh;
207 static sensor_thresh_t cpu1_die_thresh, cpu1_amb_thresh;
208 
209 /*
210  * Temperature sensors
211  */
212 
213 static env_sensor_t envd_sensors[] = {
214 	{ SENSOR_CPU0_DIE, CPU0_DIE_SENSOR_DEVFS, &cpu0_die_thresh,
215 	    CPU0_FRU_DEVFS, CPU_FRU_DIE_SENSOR,
216 	    SFLAG_TARGET_TEMP | SFLAG_CPU_DIE_SENSOR, -1},
217 	{ SENSOR_CPU0_AMB, CPU0_AMB_SENSOR_DEVFS, &cpu0_amb_thresh,
218 	    CPU0_FRU_DEVFS, CPU_FRU_AMB_SENSOR, SFLAG_CPU_AMB_SENSOR, -1},
219 	{ SENSOR_CPU1_DIE, CPU1_DIE_SENSOR_DEVFS, &cpu1_die_thresh,
220 	    CPU1_FRU_DEVFS, CPU_FRU_DIE_SENSOR,
221 	    SFLAG_TARGET_TEMP | SFLAG_CPU_DIE_SENSOR, -1},
222 	{ SENSOR_CPU1_AMB, CPU1_AMB_SENSOR_DEVFS, &cpu1_amb_thresh,
223 	    CPU1_FRU_DEVFS, CPU_FRU_AMB_SENSOR, SFLAG_CPU_AMB_SENSOR, -1},
224 	{ NULL, NULL, NULL, NULL, 0, 0, -1}
225 };
226 
227 
228 /*
229  * Fan devices
230  */
231 static env_fan_t envd_system_fan = {
232 	ENV_SYSTEM_FAN, ENV_SYSTEM_FAN_DEVFS,
233 	SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1,
234 };
235 
236 static env_fan_t envd_cpu_fan = {
237 	ENV_CPU_FAN, ENV_CPU_FAN_DEVFS,
238 	CPU_FAN_SPEED_MIN, CPU_FAN_SPEED_MAX, -1, -1,
239 };
240 
241 static env_fan_t envd_psupply_fan = {
242 	ENV_PSUPPLY_FAN, ENV_PSUPPLY_FAN_DEVFS,
243 	PSUPPLY_FAN_SPEED_MIN, PSUPPLY_FAN_SPEED_MAX, -1, -1,
244 };
245 
246 static env_fan_t *envd_fans[] = {
247 	&envd_system_fan,
248 	&envd_cpu_fan,
249 	&envd_psupply_fan,
250 	NULL
251 };
252 
253 /*
254  * Linked list of devices advertising lpm-ranges
255  */
256 static lpm_dev_t	*lpm_devices = NULL;
257 
258 /*
259  * Excalibur lpm to system-fan speed
260  * lpm values must be monotonically increasing (avoid divide-by-zero)
261  */
262 static point_t	excal_lpm_system_fan_tbl[] = {
263 	/* {lpm, fspeed} */
264 	{18, 12},
265 	{25, 20},
266 	{33, 26},
267 	{44, 32},
268 	{51, 39},
269 	{63, 52},
270 	{64, 63}
271 };
272 
273 static table_t	lpm_fspeed = {
274 	sizeof (excal_lpm_system_fan_tbl)/ sizeof (point_t),
275 	excal_lpm_system_fan_tbl
276 };
277 
278 /*
279  * Sensor to fan map
280  */
281 typedef struct {
282 	char	*sensor_name;
283 	char	*fan_name;
284 } sensor_fan_map_t;
285 
286 static sensor_fan_map_t sensor_fan_map[] = {
287 	{SENSOR_CPU0_DIE, ENV_CPU_FAN},
288 	{SENSOR_CPU1_DIE, ENV_CPU_FAN},
289 	{SENSOR_CPU0_AMB, ENV_SYSTEM_FAN},
290 	{SENSOR_CPU1_AMB, ENV_SYSTEM_FAN},
291 	{NULL, NULL}
292 };
293 
294 /*
295  * Sensor to PM device map
296  */
297 struct sensor_pmdev {
298 	int		sensor_id;
299 	char		*sensor_name;
300 	char		*pmdev_name;
301 	char		*speed_comp_name;
302 	int		speed_comp;
303 	int		full_power;
304 	int		cur_power;
305 	env_sensor_t	*sensorp;
306 	sensor_pmdev_t	*next;
307 };
308 
309 #define	SPEED_COMPONENT_NAME	"CPU Speed"
310 
311 static sensor_pmdev_t sensor_pmdevs[] = {
312 	{SENSOR_CPU0_ID, SENSOR_CPU0_DIE, NULL, SPEED_COMPONENT_NAME},
313 	{SENSOR_CPU1_ID, SENSOR_CPU1_DIE, NULL, SPEED_COMPONENT_NAME},
314 	{-1, NULL, NULL, NULL}
315 };
316 
317 /*
318  * Environmental thread variables
319  */
320 static boolean_t	system_shutdown_started = B_FALSE;
321 static boolean_t	envthr_created = B_FALSE;	/* envthr created */
322 static pthread_t	envthr_tid;		/* envthr thread ID */
323 static pthread_attr_t	thr_attr;
324 
325 /*
326  * Power management thread (pmthr) variables
327  */
328 static boolean_t	pmdev_names_init = B_FALSE;
329 static pthread_t	pmthr_tid;		/* pmthr thread ID */
330 static int		pmthr_exists = B_FALSE;	/* pmthr exists */
331 static int		pm_fd = -1;		/* PM device file descriptor */
332 static int		cur_lpstate;		/* cur low power state */
333 
334 /*
335  * Miscellaneous variables and declarations
336  */
337 static int	fru_devfsadm_invoked = 0;
338 static int	devfsadm_invoked = 0;
339 static char	tokdel[] = " \t\n\r";
340 static uint_t	envd_sleep(uint_t);
341 
342 /*
343  * Tuneable data structure/array and processing functions
344  */
345 
346 typedef struct {
347 	char		*name;		/* keyword */
348 	int		(*func)(char *, char *, void *, int, char *, int);
349 					/* tuneable processing function */
350 	void		*arg1;		/* tuneable arg1 (memory address) */
351 	int		arg2;		/* tuneable arg2 (size or flags) */
352 } env_tuneable_t;
353 
354 static int process_int_tuneable(char *keyword, char *buf, void *addr,
355     int size, char *fname, int line);
356 static int process_string_tuneable(char *keyword, char *buf, void *addr,
357     int size, char *fname, int line);
358 static int process_threshold_tuneable(char *keyword, char *buf, void *addr,
359     int flags, char *fname, int line);
360 static void process_env_conf_file(void);
361 
362 static env_tuneable_t env_tuneables[] = {
363 	{"low_power_off", process_threshold_tuneable,
364 	    &dummy_thresh.low_power_off, 0},
365 	{"low_shutdown", process_threshold_tuneable,
366 	    &dummy_thresh.low_shutdown, 0},
367 	{"low_warning", process_threshold_tuneable,
368 	    &dummy_thresh.low_warning, 0},
369 	{"high_power_off", process_threshold_tuneable,
370 	    &dummy_thresh.high_power_off, 0},
371 	{"high_shutdown", process_threshold_tuneable,
372 	    &dummy_thresh.high_shutdown, 0},
373 	{"high_warning", process_threshold_tuneable,
374 	    &dummy_thresh.high_warning, 0},
375 	{"force_cpu_fan", process_int_tuneable, &envd_cpu_fan.forced_speed,
376 	    sizeof (envd_cpu_fan.forced_speed)},
377 	{"force_system_fan", process_int_tuneable,
378 	    &envd_system_fan.forced_speed,
379 	    sizeof (envd_system_fan.forced_speed)},
380 
381 	{"cpu_amb_low_power_off", process_threshold_tuneable,
382 	    &dummy_thresh.low_power_off, SFLAG_CPU_AMB_SENSOR},
383 	{"cpu_amb_low_shutdown", process_threshold_tuneable,
384 	    &dummy_thresh.low_shutdown, SFLAG_CPU_AMB_SENSOR},
385 	{"cpu_amb_low_warning", process_threshold_tuneable,
386 	    &dummy_thresh.low_warning, SFLAG_CPU_AMB_SENSOR},
387 	{"cpu_amb_low_nominal", process_threshold_tuneable,
388 	    &dummy_thresh.policy_data[LOW_NOMINAL_LOC], SFLAG_CPU_AMB_SENSOR},
389 	{"cpu_amb_high_power_off", process_threshold_tuneable,
390 	    &dummy_thresh.high_power_off, SFLAG_CPU_AMB_SENSOR},
391 	{"cpu_amb_high_shutdown", process_threshold_tuneable,
392 	    &dummy_thresh.high_shutdown, SFLAG_CPU_AMB_SENSOR},
393 	{"cpu_amb_high_warning", process_threshold_tuneable,
394 	    &dummy_thresh.high_warning, SFLAG_CPU_AMB_SENSOR},
395 	{"cpu_amb_high_nominal", process_threshold_tuneable,
396 	    &dummy_thresh.policy_data[HIGH_NOMINAL_LOC], SFLAG_CPU_AMB_SENSOR},
397 
398 	{"cpu_die_low_power_off", process_threshold_tuneable,
399 	    &dummy_thresh.low_power_off, SFLAG_CPU_DIE_SENSOR},
400 	{"cpu_die_low_shutdown", process_threshold_tuneable,
401 	    &dummy_thresh.low_shutdown, SFLAG_CPU_DIE_SENSOR},
402 	{"cpu_die_low_warning", process_threshold_tuneable,
403 	    &dummy_thresh.low_warning, SFLAG_CPU_DIE_SENSOR},
404 	{"cpu_die_normal_target", process_threshold_tuneable,
405 	    &dummy_thresh.policy_data[0], SFLAG_CPU_DIE_SENSOR},
406 	{"cpu_die_high_power_off", process_threshold_tuneable,
407 	    &dummy_thresh.high_power_off, SFLAG_CPU_DIE_SENSOR},
408 	{"cpu_die_high_shutdown", process_threshold_tuneable,
409 	    &dummy_thresh.high_shutdown, SFLAG_CPU_DIE_SENSOR},
410 	{"cpu_die_high_warning", process_threshold_tuneable,
411 	    &dummy_thresh.high_warning, SFLAG_CPU_DIE_SENSOR},
412 	{"cpu_die_other_target", process_threshold_tuneable,
413 	    &dummy_thresh.policy_data[1], SFLAG_CPU_DIE_SENSOR},
414 
415 	{"sensor_poll_interval", process_int_tuneable, &sensor_poll_interval,
416 	    sizeof (sensor_poll_interval)},
417 	{"warning_interval", process_int_tuneable, &warning_interval,
418 	    sizeof (warning_interval)},
419 	{"warning_duration", process_int_tuneable, &warning_duration,
420 	    sizeof (warning_duration)},
421 	{"disable_piclenvd", process_int_tuneable, &disable_piclenvd,
422 	    sizeof (disable_piclenvd)},
423 	{"disable_power_off", process_int_tuneable, &disable_power_off,
424 	    sizeof (disable_power_off)},
425 	{"disable_warning", process_int_tuneable, &disable_warning,
426 	    sizeof (disable_warning)},
427 	{"disable_shutdown", process_int_tuneable, &disable_shutdown,
428 	    sizeof (disable_shutdown)},
429 	{"shutdown_interval", process_int_tuneable, &shutdown_interval,
430 	    sizeof (shutdown_interval)},
431 	{"shutdown_cmd", process_string_tuneable, &shutdown_cmd[0],
432 	    sizeof (shutdown_cmd)},
433 	{"devfsadm_cmd", process_string_tuneable, &devfsadm_cmd[0],
434 	    sizeof (devfsadm_cmd)},
435 	{"fru_devfsadm_cmd", process_string_tuneable, &fru_devfsadm_cmd[0],
436 	    sizeof (fru_devfsadm_cmd)},
437 	{"fan_slow_adjustment", process_int_tuneable, &fan_slow_adjustment,
438 	    sizeof (fan_slow_adjustment)},
439 	{"fan_incr_limit", process_int_tuneable, &fan_incr_limit,
440 	    sizeof (fan_incr_limit)},
441 	{"fan_decr_limit", process_int_tuneable, &fan_decr_limit,
442 	    sizeof (fan_decr_limit)},
443 	{"env_debug", process_int_tuneable, &env_debug, sizeof (env_debug)},
444 	{ NULL, NULL, NULL, 0}
445 };
446 
447 static void
fini_table(table_t * tblp)448 fini_table(table_t *tblp)
449 {
450 	if (tblp == NULL)
451 		return;
452 	free(tblp->xymap);
453 	free(tblp);
454 }
455 
456 static table_t *
init_table(int npoints)457 init_table(int npoints)
458 {
459 	table_t		*tblp;
460 	point_t		*xy;
461 
462 	if (npoints == 0)
463 		return (NULL);
464 
465 	if ((tblp = malloc(sizeof (*tblp))) == NULL)
466 		return (NULL);
467 
468 	if ((xy = malloc(sizeof (*xy) * npoints)) == NULL) {
469 		free(tblp);
470 		return (NULL);
471 	}
472 
473 	tblp->nentries = npoints;
474 	tblp->xymap = xy;
475 
476 	return (tblp);
477 }
478 
479 /*
480  * Temp-LPM Table format:
481  * temp, lpm, temp, lpm, ...
482  */
483 static table_t *
parse_lpm_ranges(uint32_t * bufp,size_t nbytes)484 parse_lpm_ranges(uint32_t *bufp, size_t nbytes)
485 {
486 	int	nentries;
487 	table_t	*tblp = NULL;
488 	int	i;
489 
490 	if (bufp == NULL)
491 		return (NULL);
492 
493 	/*
494 	 * Table should have at least 2 points
495 	 * and all points should have x and y values
496 	 */
497 	if ((nbytes < (2 * sizeof (point_t))) ||
498 	    (nbytes & (sizeof (point_t) - 1))) {
499 		if (env_debug)
500 			envd_log(LOG_ERR, ENV_INVALID_PROPERTY_FORMAT,
501 			    LPM_RANGES_PROPERTY);
502 		return (NULL);
503 	}
504 
505 	/* number of entries in the temp-lpm table */
506 	nentries = nbytes/sizeof (point_t);
507 
508 	tblp = init_table(nentries);
509 	if (tblp == NULL)
510 		return (tblp);
511 
512 	/* copy the tuples */
513 	tblp->xymap[0].x = (int)*bufp++;
514 	tblp->xymap[0].y = (int)*bufp++;
515 	for (i = 1; i < nentries; ++i) {
516 		tblp->xymap[i].x = (int)*bufp++;
517 		tblp->xymap[i].y = (int)*bufp++;
518 		if (tblp->xymap[i].x <= tblp->xymap[i - 1].x) {
519 			fini_table(tblp);
520 			if (env_debug)
521 				envd_log(LOG_ERR, ENV_INVALID_PROPERTY_FORMAT,
522 				    LPM_RANGES_PROPERTY);
523 			return (NULL);
524 		}
525 	}
526 
527 	return (tblp);
528 }
529 
530 /*
531  * function: calculates y for a given x based on a table of points
532  * for monotonically increasing x values.
533  * 'tbl' specifies the table to use, 'val' specifies the 'x', returns 'y'
534  */
535 static int
y_of_x(table_t * tbl,int xval)536 y_of_x(table_t *tbl, int xval)
537 {
538 	int		i;
539 	int		entries;
540 	point_t		*xymap;
541 	float		newval;
542 	float		dy, dx, slope;
543 
544 	entries = tbl->nentries;
545 	xymap = tbl->xymap;
546 	if (xval <= xymap[0].x)
547 		return (xymap[0].y);
548 	else if (xval >= xymap[entries - 1].x)
549 		return (xymap[entries - 1].y);
550 
551 	for (i = 1; i < entries - 1; i++) {
552 		if (xval == xymap[i].x)
553 			return (xymap[i].y);
554 		if (xval < xymap[i].x)
555 			break;
556 	}
557 
558 	/*
559 	 * Use linear interpolation
560 	 */
561 	dy = (float)(xymap[i].y - xymap[i-1].y);
562 	dx = (float)(xymap[i].x - xymap[i-1].x);
563 	slope = dy/dx;
564 	newval = xymap[i - 1].y + slope * (xval - xymap[i - 1].x);
565 	return ((int)(newval + (newval >= 0 ? 0.5 : -0.5)));
566 }
567 
568 static int
get_lpm_speed(lpm_dev_t * lpmdevs,int temp)569 get_lpm_speed(lpm_dev_t *lpmdevs, int temp)
570 {
571 	lpm_dev_t	*devp;
572 	int		lpm;
573 	int		speed;
574 	int		maxspeed;
575 
576 	if (lpmdevs == NULL)
577 		return (0);
578 	maxspeed = 0;
579 	for (devp = lpmdevs; devp != NULL; devp = devp->next) {
580 		if (devp->temp_lpm_tbl == NULL)
581 			continue;
582 		lpm = y_of_x(devp->temp_lpm_tbl, temp);
583 		if (env_debug)
584 			envd_log(LOG_INFO, "ambient %d lpm %d\n", temp, lpm);
585 		speed = y_of_x(&lpm_fspeed, lpm);
586 		maxspeed = maxspeed > speed ? maxspeed : speed;
587 		if (env_debug)
588 			envd_log(LOG_INFO, "lpm %d fanspeed %d\n", lpm, speed);
589 	}
590 	return (maxspeed);
591 }
592 
593 /*
594  * Callback function used by ptree_walk_tree_by_class
595  */
596 static int
cb_lpm(picl_nodehdl_t nodeh,void * args)597 cb_lpm(picl_nodehdl_t nodeh, void *args)
598 {
599 	lpm_dev_t	**retp = (lpm_dev_t **)args;
600 	int		err;
601 	ptree_propinfo_t	pinfo;
602 	picl_prophdl_t		proph;
603 	size_t			psize;
604 	void			*bufp;
605 	table_t			*temp_lpm_tbl;
606 	lpm_dev_t		*newdev;
607 
608 	err = ptree_get_prop_by_name(nodeh, LPM_RANGES_PROPERTY, &proph);
609 	if (err != PICL_SUCCESS)
610 		return (PICL_WALK_CONTINUE);
611 
612 	err = ptree_get_propinfo(proph, &pinfo);
613 	if ((err != PICL_SUCCESS) ||
614 	    (pinfo.piclinfo.type != PICL_PTYPE_BYTEARRAY))
615 		return (PICL_WALK_CONTINUE);
616 	psize = pinfo.piclinfo.size;
617 	bufp = alloca(psize);
618 
619 	err = ptree_get_propval(proph, bufp, psize);
620 	if (err != PICL_SUCCESS)
621 		return (PICL_WALK_CONTINUE);
622 
623 	temp_lpm_tbl = parse_lpm_ranges(bufp, psize);
624 	if (temp_lpm_tbl == NULL) {
625 		return (PICL_WALK_CONTINUE);
626 	}
627 
628 	newdev = malloc(sizeof (*newdev));
629 	if (newdev == NULL) {
630 		fini_table(temp_lpm_tbl);
631 		return (PICL_WALK_TERMINATE);
632 	}
633 
634 	memset(newdev, 0, sizeof (*newdev));
635 
636 	newdev->nodeh = nodeh;
637 	newdev->temp_lpm_tbl = temp_lpm_tbl;
638 
639 	/* add newdev to the list */
640 	newdev->next = *retp;
641 	*retp = newdev;
642 
643 	return (PICL_WALK_CONTINUE);
644 }
645 
646 /*
647  * Find all devices advertising "lpm-ranges" property, parse and store
648  * the lpm tables for each device
649  */
650 static int
setup_lpm_devices(lpm_dev_t ** devpp)651 setup_lpm_devices(lpm_dev_t **devpp)
652 {
653 	picl_nodehdl_t	plath;
654 	int		err;
655 	lpm_dev_t	*lpmp;
656 
657 	err = ptree_get_node_by_path("/platform", &plath);
658 	if (err != PICL_SUCCESS)
659 		return (err);
660 
661 	lpmp = NULL;
662 	err = ptree_walk_tree_by_class(plath, NULL, (void *)&lpmp, cb_lpm);
663 	if (err == PICL_SUCCESS)
664 		*devpp = lpmp;
665 	return (err);
666 }
667 
668 /*
669  * Remove all lpm_devices and their tables.
670  */
671 static void
delete_lpm_devices(void)672 delete_lpm_devices(void)
673 {
674 	lpm_dev_t	*devp, *next;
675 
676 	(void) pthread_rwlock_wrlock(&envd_rwlock);
677 
678 	if (lpm_devices == NULL) {
679 		(void) pthread_rwlock_unlock(&envd_rwlock);
680 		return;
681 	}
682 
683 	devp = lpm_devices;
684 
685 	while (devp != NULL) {
686 		fini_table(devp->temp_lpm_tbl);
687 		next = devp->next;
688 		free(devp);
689 		devp = next;
690 	}
691 
692 	lpm_devices = NULL;
693 
694 	(void) pthread_rwlock_unlock(&envd_rwlock);
695 }
696 
697 /*
698  * Translate observed (measured) temperature into expected (correct)
699  * temperature
700  */
701 static int
xlate_obs2exp(env_sensor_t * sensorp,tempr_t temp)702 xlate_obs2exp(env_sensor_t *sensorp, tempr_t temp)
703 {
704 	int		i, entries, new_temp, denominator;
705 	tempr_map_t	*map;
706 	float		ftemp;
707 
708 	entries = sensorp->obs2exp_cnt;
709 	map = sensorp->obs2exp_map;
710 	if (entries < 2 || map == NULL)  {
711 		/* no map or can't map it */
712 		new_temp = temp;
713 	} else {
714 		/*
715 		 * Any point beyond the range specified by the map is
716 		 * extrapolated using either the first two or the last
717 		 * two entries in the map.
718 		 */
719 		for (i = 1; i < entries-1; i++)
720 			if (temp < map[i].observed)
721 				break;
722 		/*
723 		 * Interpolate/extrapolate the temperature using linear
724 		 * equation with map[i-1] and map[i] being the two ends
725 		 * of the line segment.
726 		 */
727 		denominator = map[i].observed - map[i-1].observed;
728 		if (denominator == 0) {
729 			/*
730 			 * Infinite slope. Since the temperature reading
731 			 * resolution is 1C, force denominator to 1 to
732 			 * avoid divide by zero.
733 			 */
734 			denominator = 1;
735 		}
736 		ftemp = map[i-1].expected +  (temp - map[i-1].observed) *
737 		    (float)(map[i].expected - map[i-1].expected)/denominator;
738 		new_temp = (int)(ftemp + (ftemp >= 0 ? 0.5 : -0.5));
739 	}
740 
741 	return (new_temp);
742 }
743 
744 
745 /*
746  * Translate expected (correct) temperature into observed (measured)
747  * temperature
748  */
749 static int
xlate_exp2obs(env_sensor_t * sensorp,tempr_t temp)750 xlate_exp2obs(env_sensor_t *sensorp, tempr_t temp)
751 {
752 	int		i, entries, new_temp, denominator;
753 	tempr_map_t	*map;
754 	float		ftemp;
755 	sensor_thresh_t	*threshp = sensorp->temp_thresh;
756 
757 	entries = sensorp->obs2exp_cnt;
758 	map = sensorp->obs2exp_map;
759 	if (entries < 2 || map == NULL)
760 		/* no map or can't map it */
761 		new_temp = temp;
762 	else {
763 		/*
764 		 * Any point beyond the range specified by the map is
765 		 * extrapolated using either the first two or the last
766 		 * two entries in the map.
767 		 */
768 		for (i = 1; i < entries-1; i++)
769 			if (temp < map[i].expected)
770 				break;
771 
772 		/*
773 		 * Interpolate/extrapolate the temperature using linear
774 		 * equation with map[i-1] and map[i] being the two ends
775 		 * of the line segment.
776 		 */
777 		denominator = map[i].expected - map[i-1].expected;
778 		if (denominator == 0) {
779 			/*
780 			 * Infinite slope. Since the temperature reading
781 			 * resolution is 1C, force denominator to 1 to
782 			 * avoid divide by zero.
783 			 */
784 			denominator = 1;
785 		}
786 		ftemp = map[i-1].observed + (temp - map[i-1].expected) *
787 		    (float)(map[i].observed - map[i-1].observed)/denominator;
788 		new_temp = (int)(ftemp + (ftemp >= 0 ? 0.5 : -0.5));
789 	}
790 
791 	if (threshp) {
792 		if (new_temp > threshp->max_limit)
793 			new_temp = threshp->max_limit;
794 		else if (new_temp < threshp->min_limit)
795 			new_temp = threshp->min_limit;
796 	}
797 
798 	return (new_temp);
799 }
800 
801 
802 /*
803  * Check if the specified FRU is present.
804  * Returns 1 if present; 0 otherwise.
805  */
806 static int
fru_present(char * path)807 fru_present(char *path)
808 {
809 	char		*p, physpath[PATH_MAX];
810 	di_node_t	root_node;
811 	int		fru_present = 0;
812 
813 	/*
814 	 * Construct FRU device path by stripping minor
815 	 * node name from the path and use di_init() to
816 	 * see if the node exists.
817 	 */
818 	(void) strlcpy(physpath, path, sizeof (physpath));
819 	p = strrchr(physpath, ':');
820 	if (p != NULL)
821 		*p = '\0';
822 	if ((root_node = di_init(physpath, DINFOMINOR)) != DI_NODE_NIL) {
823 		di_fini(root_node);
824 		fru_present = 1;
825 	}
826 	return (fru_present);
827 }
828 
829 
830 /*
831  * Get environmental segment from the specified FRU SEEPROM
832  */
833 static int
get_envseg(int fd,void ** envsegp,int * envseglenp)834 get_envseg(int fd, void **envsegp, int *envseglenp)
835 {
836 	int			i, segcnt, envseglen;
837 	section_layout_t	section;
838 	segment_layout_t	segment;
839 	uint8_t			*envseg;
840 
841 	if (lseek(fd, (long)SECTION_HDR_OFFSET, 0) == -1L ||
842 	    read(fd, &section, sizeof (section)) != sizeof (section)) {
843 		return (EINVAL);
844 	}
845 
846 	/*
847 	 * Verify we have the correct section and contents are valid
848 	 * For now, we don't verify the CRC.
849 	 */
850 	if (section.header_tag != SECTION_HDR_TAG ||
851 	    GET_UNALIGN16(&section.header_version[0]) != SECTION_HDR_VER) {
852 		if (env_debug)
853 			envd_log(LOG_INFO,
854 			    "Invalid section header tag:%x  version:%x\n",
855 			    section.header_tag,
856 			    GET_UNALIGN16(&section.header_version));
857 		return (EINVAL);
858 	}
859 
860 	/*
861 	 * Locate our environmental segment
862 	 */
863 	segcnt = section.segment_count;
864 	for (i = 0; i < segcnt; i++) {
865 		if (read(fd, &segment, sizeof (segment)) != sizeof (segment)) {
866 			return (errno);
867 		}
868 		if (env_debug > 1)
869 			envd_log(LOG_INFO,
870 			    "Seg name: %x  desc:%x off:%x  len:%x\n",
871 			    GET_UNALIGN16(&segment.name),
872 			    GET_UNALIGN32(&segment.descriptor[0]),
873 			    GET_UNALIGN16(&segment.offset),
874 			    GET_UNALIGN16(&segment.length));
875 
876 		if (GET_UNALIGN16(&segment.name) == ENVSEG_NAME)
877 			break;
878 	}
879 
880 	if (i >= segcnt) {
881 		return (ENOENT);
882 	}
883 
884 	/*
885 	 * Allocate memory to hold the environmental segment data.
886 	 */
887 	envseglen = GET_UNALIGN16(&segment.length);
888 	if ((envseg = malloc(envseglen)) == NULL) {
889 		return (ENOMEM);
890 	}
891 
892 	if (lseek(fd, (long)GET_UNALIGN16(&segment.offset), 0) == -1L ||
893 	    read(fd, envseg, envseglen) != envseglen) {
894 		(void) free(envseg);
895 		return (EIO);
896 	}
897 
898 	*envsegp = envseg;
899 	*envseglenp = envseglen;
900 
901 	if (env_debug > 1) {
902 		char	msgbuf[256];
903 		for (i = 0; i < envseglen; i++) {
904 			(void) sprintf(&msgbuf[3*(i&0xf)], "%2x ", envseg[i]);
905 			if ((i & 0xf) == 0xf || i == (envseglen-1))
906 				envd_log(LOG_INFO, "envseg[%2x]: %s\n",
907 				    (i & ~0xf), msgbuf);
908 		}
909 	}
910 
911 	return (0);
912 }
913 
914 
915 /*
916  * Get all environmental segments
917  */
918 static fruenvseg_t *
get_fru_envsegs(void)919 get_fru_envsegs(void)
920 {
921 	env_sensor_t		*sensorp;
922 	fruenvseg_t		*frup, *fruenvsegs;
923 	envseg_layout_t		*envsegp;
924 	void			*envsegbufp;
925 	int			fd, envseglen, hdrlen;
926 	char			path[PATH_MAX];
927 
928 	fruenvsegs = NULL;
929 	for (sensorp = &envd_sensors[0]; sensorp->name != NULL; sensorp++) {
930 		if (sensorp->fru == NULL)
931 			continue;
932 
933 		for (frup = fruenvsegs; frup != NULL; frup = frup->next)
934 			if (strcmp(frup->fru, sensorp->fru) == 0)
935 				break;
936 
937 		if (frup != NULL)
938 			continue;
939 
940 		frup = (fruenvseg_t *)malloc(sizeof (fruenvseg_t));
941 		if (frup == NULL)
942 			continue;
943 
944 		/* add this FRU to our list */
945 		frup->fru = sensorp->fru;
946 		frup->envsegbufp = NULL;
947 		frup->envseglen = 0;
948 		frup->next = fruenvsegs;
949 		fruenvsegs = frup;
950 
951 		/*
952 		 * Now get the environmental segment from this FRU
953 		 */
954 		(void) strcpy(path, "/devices");
955 		(void) strlcat(path, sensorp->fru, sizeof (path));
956 	retry:
957 		errno = 0;
958 		fd = open(path, O_RDONLY);
959 		if (env_debug > 1)
960 			envd_log(LOG_INFO,
961 			    "fru SEEPROM: %s fd: %d  errno:%d\n",
962 			    path, fd, errno);
963 		if (fd == -1 && errno == ENOENT && fru_present(frup->fru)) {
964 			if (fru_devfsadm_invoked ||
965 			    fru_devfsadm_cmd[0] == '\0') {
966 				envd_log(LOG_CRIT, ENV_FRU_OPEN_FAIL,
967 				    sensorp->fru, errno, strerror(errno));
968 				continue;
969 
970 			}
971 			/*
972 			 * FRU is present but no path exists as
973 			 * someone rebooted the system without
974 			 * "-r" option. Let's invoke "devfsadm"
975 			 * once to create seeprom nodes and try
976 			 * again so that we can monitor all
977 			 * accessible sensors properly and prevent
978 			 * any CPU overheating.
979 			 */
980 			if (env_debug)
981 				envd_log(LOG_INFO,
982 				    "Invoking '%s' to create FRU nodes\n",
983 				    fru_devfsadm_cmd);
984 			fru_devfsadm_invoked = 1;
985 			(void) system(fru_devfsadm_cmd);
986 			goto retry;
987 		}
988 
989 		/*
990 		 * Read environmental segment from this FRU SEEPROM
991 		 */
992 		if (get_envseg(fd, &envsegbufp, &envseglen) == 0) {
993 			/*
994 			 * Validate envseg version number and header length
995 			 */
996 			envsegp = (envseg_layout_t *)envsegbufp;
997 			hdrlen = sizeof (envseg_layout_t) -
998 			    sizeof (envseg_sensor_t) +
999 			    (envsegp->sensor_count) * sizeof (envseg_sensor_t);
1000 
1001 			if (envsegp->version != ENVSEG_VERSION ||
1002 			    envseglen < hdrlen) {
1003 				/*
1004 				 * version mismatch or header not big enough
1005 				 */
1006 				envd_log(LOG_CRIT, ENV_FRU_BAD_ENVSEG,
1007 				    sensorp->fru, errno, strerror(errno));
1008 				if (envsegbufp != NULL)
1009 					(void) free(envsegbufp);
1010 			} else {
1011 				frup->envseglen = envseglen;
1012 				frup->envsegbufp = envsegbufp;
1013 			}
1014 		}
1015 		(void) close(fd);
1016 	}
1017 	return (fruenvsegs);
1018 }
1019 
1020 /*
1021  * Process environmental segment for all FRUs.
1022  */
1023 static void
process_fru_envseg()1024 process_fru_envseg()
1025 {
1026 	env_sensor_t		*sensorp;
1027 	sensor_thresh_t		*threshp;
1028 	envseg_layout_t		*envsegp;
1029 	envseg_sensor_data_t	*datap;
1030 	fruenvseg_t		*frup, *fruenvsegs;
1031 	int			i, envseglen, sensorcnt;
1032 	uint_t			offset, length, mapentries;
1033 
1034 	/*
1035 	 * Lookup/read environmental segments from FRU SEEPROMs and
1036 	 * process it. Note that we read each SEEPROM once as it's
1037 	 * a slow device.
1038 	 */
1039 	fruenvsegs = get_fru_envsegs();
1040 
1041 	for (sensorp = &envd_sensors[0]; sensorp->name != NULL; sensorp++) {
1042 		if (sensorp->fru == NULL)
1043 			continue;
1044 
1045 		/*
1046 		 * Locate our FRU environmental segment
1047 		 */
1048 		for (frup = fruenvsegs; frup != NULL; frup = frup->next)
1049 			if (strcmp(frup->fru, sensorp->fru) == 0)
1050 				break;
1051 		if (frup == NULL || frup->envsegbufp == NULL)
1052 			continue;
1053 
1054 		envsegp = (envseg_layout_t *)frup->envsegbufp;
1055 		envseglen = frup->envseglen;
1056 		sensorcnt = envsegp->sensor_count;
1057 
1058 		/*
1059 		 * Locate our sensor data record entry
1060 		 */
1061 		for (i = 0; i < sensorcnt; i++) {
1062 			uint32_t	id;
1063 
1064 			id = GET_UNALIGN32(&envsegp->sensors[i].sensor_id[0]);
1065 			if (env_debug > 1)
1066 				envd_log(LOG_INFO, " sensor[%d]: id:%x\n",
1067 				    i, id);
1068 			if (id == sensorp->fru_sensor)
1069 				break;
1070 		}
1071 
1072 		if (i >= sensorcnt)
1073 			continue;
1074 
1075 		/*
1076 		 * Validate offset/length of our sensor data record
1077 		 */
1078 		offset = (uint_t)GET_UNALIGN16(&envsegp->sensors[i].offset);
1079 		datap =  (envseg_sensor_data_t *)((intptr_t)frup->envsegbufp +
1080 		    offset);
1081 		mapentries =  GET_UNALIGN16(&datap->obs2exp_cnt);
1082 		length = sizeof (envseg_sensor_data_t) - sizeof (envseg_map_t) +
1083 		    mapentries * sizeof (envseg_map_t);
1084 
1085 		if (env_debug > 1)
1086 			envd_log(LOG_INFO, "Found sensor_id:%x idx:%x "
1087 			"off:%x #maps:%x expected length:%x\n",
1088 				sensorp->fru_sensor, i, offset,
1089 				mapentries, length);
1090 
1091 		if (offset >= envseglen || (offset+length) > envseglen) {
1092 			/* corrupted sensor record */
1093 			envd_log(LOG_CRIT, ENV_FRU_BAD_SENSOR_ENTRY,
1094 			    sensorp->fru_sensor, sensorp->name, sensorp->fru);
1095 			continue;
1096 		}
1097 
1098 		if (env_debug > 1) {
1099 			/* print threshold values */
1100 			envd_log(LOG_INFO,
1101 			    "Thresholds: HPwrOff %d  HShutDn %d  HWarn %d\n",
1102 			    datap->high_power_off, datap->high_shutdown,
1103 			    datap->high_warning);
1104 			envd_log(LOG_INFO,
1105 			    "Thresholds: LWarn %d  LShutDn %d  LPwrOff %d\n",
1106 			    datap->low_warning, datap->low_shutdown,
1107 			    datap->low_power_off);
1108 
1109 			/* print policy data */
1110 			envd_log(LOG_INFO,
1111 			    " Policy type: %d #%d data: %x %x %x %x %x %x\n",
1112 			    datap->policy_type, datap->policy_entries,
1113 			    datap->policy_data[0], datap->policy_data[1],
1114 			    datap->policy_data[2], datap->policy_data[3],
1115 			    datap->policy_data[4], datap->policy_data[5]);
1116 
1117 			/* print map table */
1118 			for (i = 0; i < mapentries; i++) {
1119 				envd_log(LOG_INFO, " Map pair# %d: %d %d\n",
1120 				    i, datap->obs2exp_map[i].observed,
1121 				    datap->obs2exp_map[i].expected);
1122 			}
1123 		}
1124 
1125 
1126 		/*
1127 		 * Copy threshold values
1128 		 */
1129 		threshp = sensorp->temp_thresh;
1130 		threshp->high_power_off = datap->high_power_off;
1131 		threshp->high_shutdown = datap->high_shutdown;
1132 		threshp->high_warning = datap->high_warning;
1133 		threshp->low_warning = datap->low_warning;
1134 		threshp->low_shutdown = datap->low_shutdown;
1135 		threshp->low_power_off = datap->low_power_off;
1136 
1137 		/*
1138 		 * Copy policy data
1139 		 */
1140 		threshp->policy_type = datap->policy_type;
1141 		threshp->policy_entries = datap->policy_entries;
1142 		for (i = 0; i < MAX_POLICY_ENTRIES; i++)
1143 			threshp->policy_data[i] =
1144 			    (tempr_t)datap->policy_data[i];
1145 
1146 		/*
1147 		 * Copy temperature mapping info (discard duplicate entries)
1148 		 */
1149 		if (sensorp->obs2exp_map) {
1150 			(void) free(sensorp->obs2exp_map);
1151 			sensorp->obs2exp_map = NULL;
1152 			sensorp->obs2exp_cnt = 0;
1153 		}
1154 		if (mapentries > 0) {
1155 			tempr_map_t	*map;
1156 			int		cnt;
1157 			tempr_t		observed, expected;
1158 
1159 			map = (tempr_map_t *)malloc(mapentries *
1160 			    sizeof (tempr_map_t));
1161 
1162 			if (map == NULL) {
1163 				envd_log(LOG_CRIT, ENV_FRU_SENSOR_MAP_NOMEM,
1164 				    sensorp->fru_sensor, sensorp->name,
1165 				    sensorp->fru);
1166 				continue;
1167 			}
1168 
1169 			for (i = 0, cnt = 0; i < mapentries; i++) {
1170 
1171 				observed = (tempr_t)
1172 				    datap->obs2exp_map[i].observed;
1173 				expected = (tempr_t)
1174 				    datap->obs2exp_map[i].expected;
1175 
1176 				/* ignore if duplicate entry */
1177 				if (cnt > 0 &&
1178 				    observed == map[cnt-1].observed &&
1179 				    expected == map[cnt-1].expected) {
1180 					continue;
1181 				}
1182 				map[cnt].observed = observed;
1183 				map[cnt].expected = expected;
1184 				cnt++;
1185 			}
1186 			sensorp->obs2exp_cnt = cnt;
1187 			sensorp->obs2exp_map = map;
1188 		}
1189 
1190 		if (env_debug > 2 && sensorp->obs2exp_cnt > 1) {
1191 			char	msgbuf[256];
1192 
1193 			envd_log(LOG_INFO,
1194 			    "Measured --> Correct temperature table "
1195 			    "for sensor: %s\n", sensorp->name);
1196 			for (i = -128; i < 128; i++) {
1197 				(void) sprintf(&msgbuf[6*(i&0x7)], "%6d",
1198 				    xlate_obs2exp(sensorp, i));
1199 				if ((i &0x7) == 0x7)
1200 					envd_log(LOG_INFO,
1201 					    "%8d: %s\n", (i & ~0x7), msgbuf);
1202 			}
1203 			if ((i & 0x7) != 0)
1204 				(void) printf("%8d: %s\n", (i & ~0x7), msgbuf);
1205 
1206 			envd_log(LOG_INFO,
1207 			    "Correct --> Measured temperature table "
1208 			    "for sensor: %s\n", sensorp->name);
1209 			for (i = -128; i < 128; i++) {
1210 				(void) sprintf(&msgbuf[6*(i&0x7)], "%6d",
1211 				    xlate_exp2obs(sensorp, i));
1212 				if ((i &0x7) == 0x7)
1213 					envd_log(LOG_INFO,
1214 					    "%8d: %s\n", (i & ~0x7), msgbuf);
1215 			}
1216 			if ((i & 0x7) != 0)
1217 				envd_log(LOG_INFO,
1218 				    "%8d: %s\n", (i & ~0x7), msgbuf);
1219 		}
1220 	}
1221 
1222 	/*
1223 	 * Deallocate environmental segment list
1224 	 */
1225 	while (fruenvsegs) {
1226 		frup = fruenvsegs;
1227 		fruenvsegs = frup->next;
1228 		if (frup->envsegbufp != NULL)
1229 			(void) free(frup->envsegbufp);
1230 		(void) free(frup);
1231 	}
1232 }
1233 
1234 /*
1235  * Lookup fan and return a pointer to env_fan_t data structure.
1236  */
1237 env_fan_t *
fan_lookup(char * name)1238 fan_lookup(char *name)
1239 {
1240 	int		i;
1241 	env_fan_t	*fanp;
1242 
1243 	for (i = 0; (fanp = envd_fans[i]) != NULL; i++) {
1244 		if (strcmp(fanp->name, name) == 0)
1245 			return (fanp);
1246 	}
1247 	return (NULL);
1248 }
1249 
1250 /*
1251  * Lookup sensor and return a pointer to env_sensor_t data structure.
1252  */
1253 env_sensor_t *
sensor_lookup(char * name)1254 sensor_lookup(char *name)
1255 {
1256 	env_sensor_t	*sensorp;
1257 
1258 	for (sensorp = &envd_sensors[0]; sensorp->name != NULL; sensorp++) {
1259 		if (strcmp(sensorp->name, name) == 0)
1260 			return (sensorp);
1261 	}
1262 	return (NULL);
1263 }
1264 
1265 /*
1266  * Get current temperature
1267  * Returns -1 on error, 0 if successful
1268  */
1269 int
get_temperature(env_sensor_t * sensorp,tempr_t * temp)1270 get_temperature(env_sensor_t *sensorp, tempr_t *temp)
1271 {
1272 	int	fd = sensorp->fd;
1273 	int	retval = 0;
1274 	int	expected_temp;
1275 
1276 	if (fd == -1)
1277 		retval = -1;
1278 	else if (ioctl(fd, I2C_GET_TEMPERATURE, temp) == -1) {
1279 		retval = -1;
1280 		if (sensorp->error == 0) {
1281 			sensorp->error = 1;
1282 			envd_log(LOG_WARNING, ENV_SENSOR_ACCESS_FAIL,
1283 			    sensorp->name, errno, strerror(errno));
1284 		}
1285 	} else if (sensorp->error != 0) {
1286 		sensorp->error = 0;
1287 		envd_log(LOG_WARNING, ENV_SENSOR_ACCESS_OK, sensorp->name);
1288 	} else if (sensorp->obs2exp_map != NULL) {
1289 		expected_temp = xlate_obs2exp(sensorp, (tempr_t)*temp);
1290 		if (env_debug > 1)
1291 			envd_log(LOG_INFO,
1292 			    "sensor: %-13s temp:%d  CORRECED to %d\n",
1293 			    sensorp->name, *temp, (tempr_t)expected_temp);
1294 		*temp = (tempr_t)expected_temp;
1295 	}
1296 
1297 	return (retval);
1298 }
1299 
1300 /*
1301  * Get current fan speed
1302  * Returns -1 on error, 0 if successful
1303  */
1304 int
get_fan_speed(env_fan_t * fanp,fanspeed_t * fanspeedp)1305 get_fan_speed(env_fan_t *fanp, fanspeed_t *fanspeedp)
1306 {
1307 	int	fan_fd;
1308 	int	retval = 0;
1309 
1310 	fan_fd = fanp->fd;
1311 	if (fan_fd == -1 || read(fan_fd, fanspeedp, sizeof (fanspeed_t)) !=
1312 	    sizeof (fanspeed_t))
1313 		retval = -1;
1314 	return (retval);
1315 }
1316 
1317 /*
1318  * Set fan speed
1319  * Returns -1 on error, 0 if successful
1320  */
1321 static int
set_fan_speed(env_fan_t * fanp,fanspeed_t fanspeed)1322 set_fan_speed(env_fan_t *fanp, fanspeed_t fanspeed)
1323 {
1324 	int	fan_fd;
1325 	int	retval = 0;
1326 
1327 	fan_fd = fanp->fd;
1328 	if (fan_fd == -1 || write(fan_fd, &fanspeed, sizeof (fanspeed)) !=
1329 	    sizeof (fanspeed_t))
1330 		retval = -1;
1331 	return (retval);
1332 }
1333 
1334 
1335 /*
1336  * close all fan devices
1337  */
1338 static void
envd_close_fans(void)1339 envd_close_fans(void)
1340 {
1341 	int		i;
1342 	env_fan_t	*fanp;
1343 
1344 	for (i = 0; (fanp = envd_fans[i]) != NULL; i++) {
1345 		if (fanp->fd != -1) {
1346 			(void) close(fanp->fd);
1347 			fanp->fd = -1;
1348 		}
1349 	}
1350 }
1351 
1352 /*
1353  * Close sensor devices
1354  */
1355 static void
envd_close_sensors(void)1356 envd_close_sensors(void)
1357 {
1358 	env_sensor_t	*sensorp;
1359 
1360 	for (sensorp = &envd_sensors[0]; sensorp->name != NULL; sensorp++) {
1361 		if (sensorp->fd != -1) {
1362 			(void) close(sensorp->fd);
1363 			sensorp->fd = -1;
1364 		}
1365 	}
1366 }
1367 
1368 /*
1369  * Open PM device
1370  */
1371 static void
envd_open_pm(void)1372 envd_open_pm(void)
1373 {
1374 	pm_fd = open(PM_DEVICE, O_RDONLY);
1375 	if (pm_fd != -1)
1376 		(void) fcntl(pm_fd, F_SETFD, FD_CLOEXEC);
1377 }
1378 
1379 /*
1380  * Close PM device
1381  */
1382 static void
envd_close_pm(void)1383 envd_close_pm(void)
1384 {
1385 	if (pm_fd != -1) {
1386 		(void) close(pm_fd);
1387 		pm_fd = -1;
1388 	}
1389 }
1390 
1391 /*
1392  * Open fan devices and initialize per fan data structure.
1393  * Returns #fans found.
1394  */
1395 static int
envd_setup_fans(void)1396 envd_setup_fans(void)
1397 {
1398 	int		i, fd;
1399 	fanspeed_t	speed;
1400 	env_fan_t	*fanp;
1401 	char		path[PATH_MAX];
1402 	int		fancnt = 0;
1403 	char		*fan_name;
1404 	sensor_fan_map_t *sfmap;
1405 	env_sensor_t	*sensorp;
1406 	int		sensor_cnt;
1407 
1408 	for (i = 0; (fanp = envd_fans[i]) != NULL; i++) {
1409 		if (fanp->fd == -1) {
1410 			fanp->sensor_cnt = 0;
1411 			fanp->cur_speed = 0;
1412 			fanp->prev_speed = 0;
1413 
1414 			(void) strcpy(path, "/devices");
1415 			(void) strlcat(path, fanp->devfs_path, sizeof (path));
1416 			fd = open(path, O_RDWR);
1417 			if (fd == -1) {
1418 				envd_log(LOG_CRIT,
1419 				    ENV_FAN_OPEN_FAIL, fanp->name,
1420 				    fanp->devfs_path, errno, strerror(errno));
1421 				fanp->present = B_FALSE;
1422 				continue;
1423 			}
1424 			(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
1425 			fanp->fd = fd;
1426 			fanp->present = B_TRUE;
1427 		}
1428 		fancnt++;
1429 
1430 		/*
1431 		 * Set initial speed and update cur_speed/prev_speed
1432 		 */
1433 		if (fanp->forced_speed >= 0) {
1434 			speed = (fanspeed_t)fanp->forced_speed;
1435 			if (speed > fanp->speed_max)
1436 				speed = fanp->speed_max;
1437 			if (!disable_piclenvd)
1438 				(void) set_fan_speed(fanp, speed);
1439 		} else if (get_fan_speed(fanp, &speed) == -1) {
1440 			/*
1441 			 * The Fan driver does not know the current fan speed.
1442 			 * Initialize all ON/OFF fans to ON state and all
1443 			 * variable speed fans under software control to 50%
1444 			 * of the max speed and reread the fan to get the
1445 			 * current speed.
1446 			 */
1447 			speed = (fanp == &envd_psupply_fan) ?
1448 				fanp->speed_max : fanp->speed_max/2;
1449 			if (!disable_piclenvd) {
1450 				(void) set_fan_speed(fanp, speed);
1451 				if (get_fan_speed(fanp, &speed) == -1)
1452 					continue;
1453 			}
1454 		}
1455 		fanp->cur_speed = speed;
1456 		fanp->prev_speed = speed;
1457 
1458 		/*
1459 		 * Process sensor_fan_map[] table and initialize sensors[]
1460 		 * array for this fan.
1461 		 */
1462 		fan_name = fanp->name;
1463 		for (sensor_cnt = 0, sfmap = &sensor_fan_map[0];
1464 		    sfmap->sensor_name != NULL; sfmap++) {
1465 			if (strcmp(sfmap->fan_name, fan_name) != 0)
1466 				continue;
1467 			sensorp = sensor_lookup(sfmap->sensor_name);
1468 			if (sensorp != NULL && sensor_cnt < SENSORS_PER_FAN) {
1469 				fanp->sensors[sensor_cnt] = sensorp;
1470 				sensor_cnt++;
1471 			}
1472 		}
1473 		fanp->sensor_cnt = sensor_cnt;
1474 	}
1475 
1476 	return (fancnt);
1477 }
1478 
1479 
1480 /*
1481  * Adjust specified sensor target temperature and fan adjustment rate
1482  */
1483 
1484 static void
adjust_sensor_target(env_sensor_t * sensorp)1485 adjust_sensor_target(env_sensor_t *sensorp)
1486 {
1487 	int		target, index;
1488 	sensor_pmdev_t	*pmdevp;
1489 	sensor_thresh_t	*threshp;
1490 	float		rate;
1491 
1492 	/*
1493 	 * Look at current power state of all power managed devices
1494 	 * associated with this sensor and look up the desired target
1495 	 * temperature and pick the lowest one of those values. Also,
1496 	 * calculate the rate of change based upon whether one or more
1497 	 * of the associated power managed devices are not running at
1498 	 * full power mode.
1499 	 */
1500 
1501 	if (sensorp == NULL || (threshp = sensorp->temp_thresh) == NULL ||
1502 	    threshp->policy_type != POLICY_TARGET_TEMP)
1503 		return;
1504 
1505 	target = threshp->policy_data[0];
1506 	rate = 1.0;
1507 	for (pmdevp = sensorp->pmdevp; pmdevp != NULL; pmdevp = pmdevp->next) {
1508 		index = pmdevp->full_power - pmdevp->cur_power;
1509 		if (index <= 0)
1510 			continue;
1511 
1512 		/* not running at full power */
1513 		if (index >= threshp->policy_entries)
1514 			index = threshp->policy_entries - 1;
1515 		if (target > threshp->policy_data[index])
1516 			target = threshp->policy_data[index];
1517 		if (rate > (float)fan_slow_adjustment/100)
1518 			rate = (float)fan_slow_adjustment/100;
1519 		if (env_debug > 1)
1520 			envd_log(LOG_INFO,
1521 			    "pmdev: %-13s new_target:%d  cur:%d power:%d/%d\n",
1522 			    pmdevp->pmdev_name, target, sensorp->target_temp,
1523 			    pmdevp->cur_power, pmdevp->full_power);
1524 	}
1525 
1526 	if (env_debug)
1527 		envd_log(LOG_INFO,
1528 		    "sensor: %-13s new_target:%d  cur:%d power:%d/%d\n",
1529 		    sensorp->name, target, sensorp->target_temp,
1530 		    ((sensorp->pmdevp) ? sensorp->pmdevp->cur_power : -1),
1531 		    ((sensorp->pmdevp) ? sensorp->pmdevp->full_power : -1));
1532 
1533 	sensorp->fan_adjustment_rate = rate;
1534 	sensorp->target_temp = target;
1535 }
1536 
1537 /*
1538  * Update current power level of all PM devices we are tracking and adjust
1539  * the target temperature associated with the corresponding sensor.
1540  *
1541  * Returns 1 if one or more pmdev power level was adjusted; 0 otherwise.
1542  */
1543 static int
update_pmdev_power()1544 update_pmdev_power()
1545 {
1546 	sensor_pmdev_t	*pmdevp;
1547 	pm_req_t	pmreq;
1548 	int		cur_power;
1549 	int		updated = 0;
1550 
1551 	for (pmdevp = sensor_pmdevs; pmdevp->pmdev_name != NULL; pmdevp++) {
1552 		pmreq.physpath = pmdevp->pmdev_name;
1553 		pmreq.data = NULL;
1554 		pmreq.datasize = 0;
1555 		pmreq.component = pmdevp->speed_comp;
1556 		cur_power = ioctl(pm_fd, PM_GET_CURRENT_POWER, &pmreq);
1557 		if (pmdevp->cur_power != cur_power) {
1558 			pmdevp->cur_power = cur_power;
1559 			if (pmdevp->sensorp) {
1560 				adjust_sensor_target(pmdevp->sensorp);
1561 				updated = 1;
1562 			}
1563 		}
1564 	}
1565 	return (updated);
1566 }
1567 
1568 /*
1569  * Check if the specified sensor is present.
1570  * Returns 1 if present; 0 otherwise.
1571  *
1572  * Note that we don't use ptree_get_node_by_path() here to detect
1573  * if a temperature device is present as we don't want to make
1574  * "devtree" a critical plugin.
1575  */
1576 static int
envd_sensor_present(env_sensor_t * sensorp)1577 envd_sensor_present(env_sensor_t *sensorp)
1578 {
1579 	char		*p, physpath[PATH_MAX];
1580 	di_node_t	root_node;
1581 	int		sensor_present = 0;
1582 
1583 	/*
1584 	 * Construct temperature device path by stripping minor
1585 	 * node name from the devfs_path and use di_init() to
1586 	 * see if the node exists.
1587 	 */
1588 	(void) strcpy(physpath, sensorp->devfs_path);
1589 	p = strrchr(physpath, ':');
1590 	if (p != NULL)
1591 		*p = '\0';
1592 	if ((root_node = di_init(physpath, DINFOMINOR)) != DI_NODE_NIL) {
1593 		di_fini(root_node);
1594 		sensor_present = 1;
1595 	}
1596 	return (sensor_present);
1597 }
1598 
1599 /*
1600  * Open temperature sensor devices and initialize per sensor data structure.
1601  * Returns #sensors found.
1602  */
1603 static int
envd_setup_sensors(void)1604 envd_setup_sensors(void)
1605 {
1606 	tempr_t		temp;
1607 	env_sensor_t	*sensorp;
1608 	char		path[PATH_MAX];
1609 	int		sensorcnt = 0;
1610 	int		sensor_present;
1611 	sensor_thresh_t	*threshp;
1612 	sensor_pmdev_t	*pmdevp;
1613 
1614 	for (sensorp = &envd_sensors[0]; sensorp->name != NULL; sensorp++) {
1615 		if (sensorp->fd != -1) {
1616 			/* Don't reinitialize opened sensor */
1617 			threshp = sensorp->temp_thresh;
1618 			sensorp->pmdevp = NULL;
1619 		} else {
1620 			/* Initialize sensor's initial state */
1621 			sensorp->shutdown_initiated = B_FALSE;
1622 			sensorp->warning_tstamp = 0;
1623 			sensorp->warning_start = 0;
1624 			sensorp->shutdown_tstamp = 0;
1625 			sensorp->pmdevp = NULL;
1626 			sensorp->fan_adjustment_rate = 1.0;
1627 
1628 			threshp = sensorp->temp_thresh;
1629 			temp = (threshp && threshp->policy_entries > 0) ?
1630 			    threshp->policy_data[0] : 0;
1631 			sensorp->target_temp = temp;
1632 			sensorp->cur_temp = temp;
1633 			sensorp->avg_temp = temp;
1634 			sensorp->prev_avg_temp = temp;
1635 			sensorp->error = 0;
1636 
1637 			(void) strcpy(path, "/devices");
1638 			(void) strlcat(path, sensorp->devfs_path,
1639 			    sizeof (path));
1640 		retry:
1641 			sensorp->fd = open(path, O_RDWR);
1642 			if (sensorp->fd == -1) {
1643 				sensor_present = envd_sensor_present(sensorp);
1644 				if (sensor_present && !devfsadm_invoked &&
1645 				    devfsadm_cmd[0] != '\0') {
1646 					/*
1647 					 * Sensor is present but no path
1648 					 * exists as someone rebooted the
1649 					 * system without "-r" option. Let's
1650 					 * invoke "devfsadm" once to create
1651 					 * max1617 sensors paths in /devices
1652 					 * subtree and try again so that we
1653 					 * can monitor all accessible sensors
1654 					 * and prevent any CPU overheating.
1655 					 *
1656 					 * Note that this routine is always
1657 					 * called in main thread context and
1658 					 * serialized with respect to other
1659 					 * plugins' initialization. Hence, it's
1660 					 * safe to use system(3C) call here.
1661 					 */
1662 					devfsadm_invoked = 1;
1663 					(void) system(devfsadm_cmd);
1664 					goto retry;
1665 				}
1666 				if (sensor_present)
1667 					envd_log(LOG_CRIT,
1668 					    ENV_SENSOR_OPEN_FAIL,
1669 					    sensorp->name,
1670 					    sensorp->devfs_path, errno,
1671 					    strerror(errno));
1672 				sensorp->present = B_FALSE;
1673 				continue;
1674 			}
1675 			(void) fcntl(sensorp->fd, F_SETFD, FD_CLOEXEC);
1676 			sensorp->present = B_TRUE;
1677 
1678 			/*
1679 			 * Set cur_temp field to the current temperature value
1680 			 */
1681 			if (get_temperature(sensorp, &temp) == 0) {
1682 				sensorp->cur_temp = temp;
1683 				sensorp->avg_temp = temp;
1684 			}
1685 		}
1686 		sensorcnt++;
1687 
1688 		/*
1689 		 * Set low_power_off and high_power_off limits
1690 		 */
1691 		if (threshp && !disable_power_off) {
1692 			temp = xlate_exp2obs(sensorp, threshp->low_power_off);
1693 			if (env_debug > 1)
1694 				envd_log(LOG_INFO, "sensor: %-13s low_power_"
1695 				"off set to %d (real %d)\n", sensorp->name,
1696 				    (int)temp, threshp->low_power_off);
1697 			(void) ioctl(sensorp->fd, MAX1617_SET_LOW_LIMIT, &temp);
1698 
1699 			temp = xlate_exp2obs(sensorp, threshp->high_power_off);
1700 			if (env_debug > 1)
1701 				envd_log(LOG_INFO, "sensor: %-13s high_power_"
1702 				"off set to %d (real %d)\n", sensorp->name,
1703 				    (int)temp, threshp->high_power_off);
1704 			(void) ioctl(sensorp->fd, MAX1617_SET_HIGH_LIMIT,
1705 			    &temp);
1706 		}
1707 	}
1708 
1709 	/*
1710 	 * Locate "CPU Speed" component for any PM devices associated with
1711 	 * the sensors.
1712 	 */
1713 	for (pmdevp = sensor_pmdevs; pmdevp->sensor_name; pmdevp++) {
1714 		int		i, ncomp;
1715 		char		physpath[PATH_MAX];
1716 		pm_req_t	pmreq;
1717 
1718 		pmdevp->speed_comp = -1;
1719 		pmdevp->full_power = -1;
1720 		pmdevp->cur_power = -1;
1721 		pmdevp->next = NULL;
1722 		pmdevp->sensorp = sensorp = sensor_lookup(pmdevp->sensor_name);
1723 
1724 		/*
1725 		 * Lookup speed component and get full and current power
1726 		 * level for that component.
1727 		 */
1728 		pmreq.physpath = pmdevp->pmdev_name;
1729 		pmreq.data = physpath;
1730 		pmreq.datasize = sizeof (physpath);
1731 
1732 		ncomp = ioctl(pm_fd, PM_GET_NUM_COMPONENTS, &pmreq);
1733 		for (i = 0; i < ncomp; i++) {
1734 			pmreq.component = i;
1735 			physpath[0] = '\0';
1736 			if (ioctl(pm_fd, PM_GET_COMPONENT_NAME, &pmreq) <= 0)
1737 				continue;
1738 			if (strcasecmp(pmreq.data, pmdevp->speed_comp_name))
1739 				continue;
1740 			pmdevp->speed_comp = i;
1741 
1742 
1743 			/*
1744 			 * Get full power and current power level
1745 			 */
1746 			pmdevp->full_power = ioctl(pm_fd, PM_GET_FULL_POWER,
1747 			    &pmreq);
1748 
1749 			pmdevp->cur_power = ioctl(pm_fd, PM_GET_CURRENT_POWER,
1750 			    &pmreq);
1751 
1752 			if (sensorp) {
1753 				pmdevp->next = sensorp->pmdevp;
1754 				sensorp->pmdevp = pmdevp;
1755 				adjust_sensor_target(sensorp);
1756 			}
1757 			break;
1758 		}
1759 		if (env_debug > 1)
1760 			envd_log(LOG_INFO,
1761 			    "sensor:%s %p pmdev:%s comp:%s %d power:%d/%d\n",
1762 			    pmdevp->sensor_name, pmdevp->sensorp,
1763 			    pmdevp->pmdev_name, pmdevp->speed_comp_name,
1764 			    pmdevp->speed_comp, pmdevp->cur_power,
1765 			    pmdevp->full_power);
1766 	}
1767 	return (sensorcnt);
1768 }
1769 
1770 /*
1771  * Read all temperature sensors and take appropriate action based
1772  * upon temperature threshols associated with each sensor. Possible
1773  * actions are:
1774  *
1775  *	temperature > high_shutdown
1776  *	temperature < low_shutdown
1777  *		Gracefully shutdown the system and log/print a message
1778  *		on the system console provided the temperature has been
1779  *		in shutdown range for "shutdown_interval" seconds.
1780  *
1781  *	high_warning < temperature <= high_shutdown
1782  *	low_warning  > temperature >= low_shutdown
1783  *		Log/print a warning message on the system console at most
1784  *		once every "warning_interval" seconds.
1785  *
1786  * Note that the current temperature is recorded in the "cur_temp" field
1787  * within each env_sensor_t structure.
1788  */
1789 static void
monitor_sensors(void)1790 monitor_sensors(void)
1791 {
1792 	tempr_t 	temp;
1793 	env_sensor_t	*sensorp;
1794 	sensor_thresh_t	*threshp;
1795 	time_t		ct;
1796 	char		msgbuf[BUFSIZ];
1797 	char		syscmd[BUFSIZ];
1798 
1799 	for (sensorp = &envd_sensors[0]; sensorp->name != NULL; sensorp++) {
1800 		if (get_temperature(sensorp, &temp) < 0)
1801 			continue;
1802 
1803 		sensorp->prev_avg_temp = sensorp->avg_temp;
1804 		sensorp->cur_temp = temp;
1805 		sensorp->avg_temp = (sensorp->avg_temp + temp)/2;
1806 		threshp = sensorp->temp_thresh;
1807 
1808 		if (env_debug)
1809 			envd_log(LOG_INFO,
1810 			"sensor: %-13s temp  prev_avg:%6.2f  "
1811 			"cur:%d avg_temp:%6.2f power:%d/%d target:%d\n",
1812 			    sensorp->name, sensorp->prev_avg_temp,
1813 			    temp, sensorp->avg_temp, ((sensorp->pmdevp) ?
1814 			    sensorp->pmdevp->cur_power : -1),
1815 			    ((sensorp->pmdevp) ? sensorp->pmdevp->full_power :
1816 			    -1), sensorp->target_temp);
1817 
1818 
1819 		/*
1820 		 * If this sensor already triggered system shutdown, don't
1821 		 * log any more shutdown/warning messages for it.
1822 		 */
1823 		if (sensorp->shutdown_initiated || threshp == NULL)
1824 			continue;
1825 
1826 		/*
1827 		 * Check for the temperature in warning and shutdown range
1828 		 * and take appropriate action.
1829 		 */
1830 		if (TEMP_IN_WARNING_RANGE(temp, threshp) && !disable_warning) {
1831 			/*
1832 			 * Check if the temperature has been in warning
1833 			 * range during last warning_duration interval.
1834 			 * If so, the temperature is truly in warning
1835 			 * range and we need to log a warning message,
1836 			 * but no more than once every warning_interval
1837 			 * seconds.
1838 			 */
1839 			time_t	wtstamp = sensorp->warning_tstamp;
1840 
1841 			ct = (time_t)(gethrtime() / NANOSEC);
1842 			if (sensorp->warning_start == 0)
1843 				sensorp->warning_start = ct;
1844 			if (((ct - sensorp->warning_start) >=
1845 			    warning_duration) && (wtstamp == 0 ||
1846 			    (ct - wtstamp) >= warning_interval)) {
1847 				envd_log(LOG_CRIT, ENV_WARNING_MSG,
1848 				    sensorp->name, temp,
1849 				    threshp->low_warning,
1850 				    threshp->high_warning);
1851 				sensorp->warning_tstamp = ct;
1852 			}
1853 		} else if (sensorp->warning_start != 0)
1854 			sensorp->warning_start = 0;
1855 
1856 		if (TEMP_IN_SHUTDOWN_RANGE(temp, threshp) &&
1857 		    !disable_shutdown) {
1858 			ct = (time_t)(gethrtime() / NANOSEC);
1859 			if (sensorp->shutdown_tstamp == 0)
1860 				sensorp->shutdown_tstamp = ct;
1861 
1862 			/*
1863 			 * Shutdown the system if the temperature remains
1864 			 * in the shutdown range for over shutdown_interval
1865 			 * seconds.
1866 			 */
1867 			if ((ct - sensorp->shutdown_tstamp) >=
1868 			    shutdown_interval) {
1869 				/* log error */
1870 				sensorp->shutdown_initiated = B_TRUE;
1871 				(void) snprintf(msgbuf, sizeof (msgbuf),
1872 				    ENV_SHUTDOWN_MSG, sensorp->name,
1873 				    temp, threshp->low_shutdown,
1874 				    threshp->high_shutdown);
1875 				envd_log(LOG_ALERT, msgbuf);
1876 
1877 				/* shutdown the system (only once) */
1878 				if (system_shutdown_started == B_FALSE) {
1879 					(void) snprintf(syscmd, sizeof (syscmd),
1880 					    "%s \"%s\"", shutdown_cmd, msgbuf);
1881 					envd_log(LOG_ALERT, syscmd);
1882 					system_shutdown_started = B_TRUE;
1883 					(void) system(syscmd);
1884 				}
1885 			}
1886 		} else if (sensorp->shutdown_tstamp != 0)
1887 			sensorp->shutdown_tstamp = 0;
1888 	}
1889 }
1890 
1891 
1892 /*
1893  * Adjust fan speed based upon the current temperature value of various
1894  * sensors affected by the specified fan.
1895  */
1896 static int
adjust_fan_speed(env_fan_t * fanp,lpm_dev_t * devp)1897 adjust_fan_speed(env_fan_t *fanp, lpm_dev_t *devp)
1898 {
1899 	int		i;
1900 	fanspeed_t	fanspeed;
1901 	float		speed, cur_speed, new_speed, max_speed, min_speed;
1902 	env_sensor_t	*sensorp;
1903 	sensor_thresh_t	*threshp;
1904 	tempr_t		temp;
1905 	float		avg_temp, tempdiff, targetdiff;
1906 	int		av_ambient;
1907 	int		amb_cnt;
1908 
1909 
1910 	/*
1911 	 * Get current fan speed
1912 	 */
1913 	if (get_fan_speed(fanp, &fanspeed) < 0)
1914 		return (-1);
1915 	cur_speed = fanp->cur_speed;
1916 	if (fanspeed != (int)cur_speed)
1917 		cur_speed = (float)fanspeed;
1918 
1919 	/*
1920 	 * Calculate new fan speed for each sensor and pick the largest one.
1921 	 */
1922 	min_speed = fanp->speed_min;
1923 	max_speed = fanp->speed_max;
1924 	speed = 0;
1925 	av_ambient = 0;
1926 	amb_cnt = 0;
1927 
1928 	for (i = 0; i < fanp->sensor_cnt; i++) {
1929 		sensorp = fanp->sensors[i];
1930 		if (sensorp == NULL || sensorp->fd == -1 ||
1931 		    sensorp->temp_thresh == NULL)
1932 			continue;
1933 
1934 		temp = sensorp->cur_temp;
1935 		avg_temp = sensorp->avg_temp;
1936 		threshp = sensorp->temp_thresh;
1937 
1938 		/*
1939 		 * Note ambient temperatures to determine lpm for system fan
1940 		 */
1941 		if ((devp != NULL) &&
1942 		    (sensorp->flags & SFLAG_CPU_AMB_SENSOR)) {
1943 			av_ambient += temp;
1944 			amb_cnt++;
1945 		}
1946 
1947 		/*
1948 		 * If the current temperature is above the warning
1949 		 * threshold, use max fan speed.
1950 		 */
1951 		if (temp >= threshp->high_warning) {
1952 			speed = max_speed;
1953 			break;
1954 		} else if (temp <= threshp->low_warning) {
1955 			speed = min_speed;
1956 			break;
1957 		}
1958 
1959 		if (threshp->policy_type == POLICY_TARGET_TEMP) {
1960 			/*
1961 			 * Try to achieve the desired target temperature.
1962 			 * Calculate new fan speed based upon whether the
1963 			 * temperature is rising, falling or steady state.
1964 			 * Also take into consideration the current fan
1965 			 * speed as well as the desired target temperature.
1966 			 */
1967 			float	delta, speed_change;
1968 			float	multiplier;
1969 
1970 			targetdiff = avg_temp - sensorp->target_temp;
1971 			tempdiff = avg_temp - sensorp->prev_avg_temp;
1972 
1973 			if (tempdiff > AVG_TEMP_HYSTERESIS) {
1974 				/*
1975 				 * Temperature is rising. Increase fan
1976 				 * speed 0.5% for every 1C above the
1977 				 * (target - RISING_TEMP_MARGIN) limit.
1978 				 * Also take into consideration temperature
1979 				 * rising rate and the current fan speed.
1980 				 */
1981 				delta = max_speed * .005 *
1982 				    (RISING_TEMP_MARGIN + targetdiff);
1983 				if (delta <= 0)
1984 					multiplier = 0;
1985 				else
1986 					multiplier = tempdiff/4 +
1987 					    ((cur_speed < max_speed/2) ?
1988 					    2 : 1);
1989 			} else if (tempdiff < -AVG_TEMP_HYSTERESIS) {
1990 				/*
1991 				 * Temperature is falling. Decrease fan
1992 				 * speed 0.5% for every 1C below the
1993 				 * (target + FALLING_TEMP_MARGIN) limit.
1994 				 * Also take into consideration temperature
1995 				 * falling rate and the current fan speed.
1996 				 */
1997 				delta = -max_speed * .005 *
1998 				    (FALLING_TEMP_MARGIN - targetdiff);
1999 				if (delta >= 0)
2000 					multiplier = 0;
2001 				else
2002 					multiplier = -tempdiff/4 +
2003 					    ((cur_speed > max_speed/2) ?
2004 					    2 : 1);
2005 			} else {
2006 				/*
2007 				 * Temperature is changing very slowly.
2008 				 * Adjust fan speed by 0.4% for every 1C
2009 				 * below/above the target temperature.
2010 				 */
2011 				delta = max_speed * .004 * targetdiff;
2012 				multiplier = 1.0;
2013 			}
2014 
2015 
2016 			/*
2017 			 * Enforece some bounds on multiplier and the
2018 			 * speed change.
2019 			 */
2020 			multiplier = MIN(multiplier, 3.0);
2021 			speed_change = delta * multiplier *
2022 			    sensorp->fan_adjustment_rate;
2023 			speed_change = MIN(speed_change, fan_incr_limit);
2024 			speed_change = MAX(speed_change, -fan_decr_limit);
2025 			new_speed = cur_speed + speed_change;
2026 
2027 			if (env_debug > 1)
2028 				envd_log(LOG_INFO,
2029 				"sensor: %-8s temp/diff:%d/%3.1f  "
2030 				"target/diff:%d/%3.1f  change:%4.2f x "
2031 				"%4.2f x %4.2f speed %5.2f -> %5.2f\n",
2032 				    sensorp->name, temp, tempdiff,
2033 				    sensorp->target_temp, targetdiff, delta,
2034 				    multiplier, sensorp->fan_adjustment_rate,
2035 				    cur_speed, new_speed);
2036 		} else if (threshp->policy_type == POLICY_LINEAR) {
2037 			/*
2038 			 * Set fan speed linearly within the operating
2039 			 * range specified by the policy_data[LOW_NOMINAL_LOC]
2040 			 * and policy_data[HIGH_NOMINAL_LOC] threshold values.
2041 			 * Fan speed is set to minimum value at LOW_NOMINAL
2042 			 * and to maximum value at HIGH_NOMINAL value.
2043 			 */
2044 			new_speed = min_speed + (max_speed - min_speed) *
2045 			    (avg_temp - threshp->policy_data[LOW_NOMINAL_LOC])/
2046 			    (threshp->policy_data[HIGH_NOMINAL_LOC] -
2047 			    threshp->policy_data[LOW_NOMINAL_LOC]);
2048 			if (env_debug > 1)
2049 				envd_log(LOG_INFO,
2050 				"sensor: %-8s policy: linear, cur_speed %5.2f"\
2051 				" new_speed: %5.2f\n", sensorp->name, cur_speed,
2052 				    new_speed);
2053 		} else {
2054 			new_speed = cur_speed;
2055 		}
2056 		speed = MAX(speed, new_speed);
2057 	}
2058 
2059 	/*
2060 	 * Adjust speed using lpm tables
2061 	 */
2062 	if (amb_cnt > 0) {
2063 		av_ambient = (av_ambient >= 0 ?
2064 			(int)(0.5 + (float)av_ambient/(float)amb_cnt):
2065 			(int)(-0.5 + (float)av_ambient/(float)amb_cnt));
2066 		speed = MAX(speed, (fanspeed_t)get_lpm_speed(devp, av_ambient));
2067 	}
2068 
2069 	speed = MIN(speed, max_speed);
2070 	speed = MAX(speed, min_speed);
2071 
2072 	/*
2073 	 * Record and update fan speed, if different.
2074 	 */
2075 	fanp->prev_speed = fanp->cur_speed;
2076 	fanp->cur_speed = speed;
2077 	if ((fanspeed_t)speed != fanspeed) {
2078 		fanspeed = (fanspeed_t)speed;
2079 		(void) set_fan_speed(fanp, fanspeed);
2080 	}
2081 	if (env_debug)
2082 		envd_log(LOG_INFO,
2083 		    "fan: %-16s speed cur:%6.2f  new:%6.2f\n",
2084 		    fanp->name, fanp->prev_speed, fanp->cur_speed);
2085 
2086 	return (0);
2087 }
2088 /*
2089  * This is the environment thread, which monitors the current temperature
2090  * and power managed state and controls system fan speed.  Temperature is
2091  * polled every sensor-poll_interval seconds duration.
2092  */
2093 /*ARGSUSED*/
2094 static void *
envthr(void * args)2095 envthr(void *args)
2096 {
2097 	env_sensor_t	*sensorp;
2098 	fanspeed_t 	fan_speed;
2099 	env_fan_t	*pmfanp = &envd_psupply_fan;
2100 	int		to;
2101 	int		xwd = -1;
2102 
2103 	for (sensorp = &envd_sensors[0]; sensorp->name != NULL;
2104 	    sensorp++) {
2105 		if (sensorp->obs2exp_map)
2106 			(void) free(sensorp->obs2exp_map);
2107 		sensorp->obs2exp_map = NULL;
2108 		sensorp->obs2exp_cnt = 0;
2109 	}
2110 
2111 	/*
2112 	 * Process environmental segment data, if present,
2113 	 * in the FRU SEEPROM.
2114 	 */
2115 	process_fru_envseg();
2116 
2117 	/*
2118 	 * Process tuneable parameters
2119 	 */
2120 	process_env_conf_file();
2121 
2122 	/*
2123 	 * Setup temperature sensors and fail if we can't open
2124 	 * at least one sensor.
2125 	 */
2126 	if (envd_setup_sensors() <= 0) {
2127 		envd_close_pm();
2128 		return (NULL);
2129 	}
2130 
2131 	to = 3 * sensor_poll_interval + 1;
2132 	xwd = open(XCALWD_DEVFS, O_RDONLY);
2133 	if (xwd < 0) {
2134 		envd_log(LOG_CRIT, ENV_WATCHDOG_INIT_FAIL, errno,
2135 		    strerror(errno));
2136 	} else if (ioctl(xwd, XCALWD_STOPWATCHDOG) < 0 ||
2137 	    ioctl(xwd, XCALWD_STARTWATCHDOG, &to) < 0) {
2138 		envd_log(LOG_CRIT, ENV_WATCHDOG_INIT_FAIL, errno,
2139 		    strerror(errno));
2140 		(void) close(xwd);
2141 		xwd = -1;
2142 	}
2143 
2144 	/*
2145 	 * Setup fan device (don't fail even if we can't access
2146 	 * the fan as we can still monitor temeperature.
2147 	 */
2148 	(void) envd_setup_fans();
2149 
2150 	for (;;) {
2151 		(void) pthread_rwlock_rdlock(&envd_rwlock);
2152 
2153 		/*
2154 		 * If no "pmthr" thread, then we need to update the
2155 		 * current power level for all power managed deviecs
2156 		 * so that we can determine correct target temperature.
2157 		 */
2158 		if (pmthr_exists == B_FALSE)
2159 			(void) update_pmdev_power();
2160 
2161 		if (xwd >= 0)
2162 			(void) ioctl(xwd, XCALWD_KEEPALIVE);
2163 
2164 		if (!disable_piclenvd) {
2165 			/*
2166 			 * Monitor current temperature for all sensors
2167 			 * (current temperature is recorded in the "cur_temp"
2168 			 * field within each sensor data structure)
2169 			 */
2170 			monitor_sensors();
2171 
2172 			/*
2173 			 * Adjust CPU and system fan speed
2174 			 */
2175 			if (envd_cpu_fan.forced_speed < 0)
2176 				(void) adjust_fan_speed(&envd_cpu_fan, NULL);
2177 			if (envd_system_fan.forced_speed < 0)
2178 				(void) adjust_fan_speed(&envd_system_fan,
2179 					lpm_devices);
2180 
2181 			/*
2182 			 * Turn off power supply fan if in lowest power state.
2183 			 */
2184 			fan_speed = (cur_lpstate) ? pmfanp->speed_min :
2185 			    pmfanp->speed_max;
2186 
2187 			if (env_debug)
2188 				envd_log(LOG_INFO,
2189 				"fan: %-16s speed cur:%6.2f  new:%6.2f "
2190 				"low-power:%d\n", pmfanp->name,
2191 				    (float)pmfanp->cur_speed,
2192 				    (float)fan_speed, cur_lpstate);
2193 
2194 			if (fan_speed != (fanspeed_t)pmfanp->cur_speed &&
2195 			    set_fan_speed(pmfanp, fan_speed) == 0)
2196 				pmfanp->cur_speed = fan_speed;
2197 		}
2198 		(void) pthread_rwlock_unlock(&envd_rwlock);
2199 
2200 		/*
2201 		 * Wait for sensor_poll_interval seconds before polling
2202 		 * again. Note that we use our own envd_sleep() routine
2203 		 * as sleep() in POSIX thread library gets affected by
2204 		 * the wall clock time being set back.
2205 		 */
2206 		(void) envd_sleep(sensor_poll_interval);
2207 	}
2208 	/*NOTREACHED*/
2209 	return (NULL);
2210 }
2211 
2212 /*
2213  * This is the power management thread, which monitors all power state
2214  * change events and wakes up the "envthr" thread when the system enters
2215  * or exits the lowest power state.
2216  */
2217 /*ARGSUSED*/
2218 static void *
pmthr(void * args)2219 pmthr(void *args)
2220 {
2221 	pm_state_change_t	pmstate;
2222 	char			physpath[PATH_MAX];
2223 
2224 	pmstate.physpath = physpath;
2225 	pmstate.size = sizeof (physpath);
2226 	cur_lpstate = 0;
2227 
2228 	for (;;) {
2229 		/*
2230 		 * Get PM state change events to check if the system
2231 		 * is in lowest power state and wake up the "envthr"
2232 		 * thread when the power state changes.
2233 		 *
2234 		 * To minimize polling, we use the blocking interface
2235 		 * to get the power state change event here.
2236 		 */
2237 		if (ioctl(pm_fd, PM_GET_STATE_CHANGE_WAIT, &pmstate) != 0) {
2238 			if (errno != EINTR)
2239 				break;
2240 			continue;
2241 		}
2242 
2243 		/*
2244 		 * Extract the lowest power state from the last queued
2245 		 * state change events. We pick up queued state change
2246 		 * events using the non-blocking interface and wake up
2247 		 * the "envthr" thread only after consuming all the
2248 		 * state change events queued at that time.
2249 		 */
2250 		do {
2251 			if (env_debug > 1)  {
2252 				envd_log(LOG_INFO,
2253 				"pmstate event:0x%x flags:%x comp:%d "
2254 				"oldval:%d newval:%d path:%s\n",
2255 				    pmstate.event, pmstate.flags,
2256 				    pmstate.component, pmstate.old_level,
2257 				    pmstate.new_level, pmstate.physpath);
2258 			}
2259 			cur_lpstate =
2260 			    (pmstate.flags & PSC_ALL_LOWEST) ? 1 : 0;
2261 		} while (ioctl(pm_fd, PM_GET_STATE_CHANGE, &pmstate) == 0);
2262 
2263 		/*
2264 		 * Update current PM state for the components we are
2265 		 * tracking. In case of CPU devices, PM state change
2266 		 * event can be generated even before the state change
2267 		 * takes effect, hence we need to get the current state
2268 		 * for all CPU devices every time and recalculate the
2269 		 * target temperature. We do this once after consuming
2270 		 * all the queued events.
2271 		 */
2272 
2273 		(void) pthread_rwlock_rdlock(&envd_rwlock);
2274 		(void) update_pmdev_power();
2275 		(void) pthread_rwlock_unlock(&envd_rwlock);
2276 	}
2277 
2278 	/*
2279 	 * We won't be able to monitor lowest power state any longer,
2280 	 * hence reset it.
2281 	 */
2282 	cur_lpstate = 0;
2283 	envd_log(LOG_ERR, PM_THREAD_EXITING, errno, strerror(errno));
2284 	pmthr_exists = B_FALSE;
2285 	return (NULL);
2286 }
2287 
2288 
2289 /*
2290  * Process sensor threshold related tuneables
2291  */
2292 static int
process_threshold_tuneable(char * keyword,char * buf,void * dummy_thresh_addr,int flags,char * fname,int line)2293 process_threshold_tuneable(char *keyword, char *buf, void *dummy_thresh_addr,
2294     int flags, char *fname, int line)
2295 {
2296 	int		retval = 0;
2297 	long		val;
2298 	void		*addr;
2299 	char		*endp, *sname;
2300 	env_sensor_t	*sensorp;
2301 
2302 	/*
2303 	 * Tuneable entry can be in one of the following formats:
2304 	 *
2305 	 *	threshold-keyword <int-value>
2306 	 *	threshold-keyword <int-value> <sensor-name> ...
2307 	 *
2308 	 * Convert threshold value into integer value and check for
2309 	 * optional sensor name. If no sensor name is specified, then
2310 	 * the tuneable applies to all sensors specified by the "flags".
2311 	 * Otherwise, it is applicable to the specified sensors.
2312 	 *
2313 	 * Note that the dummy_thresh_addr is the address of the threshold
2314 	 * to be changed and is converted into offset by subtracting the
2315 	 * base dummy_thresh address. This offset is added to the base
2316 	 * address of the threshold structure to be update to determine
2317 	 * the final memory address to be modified.
2318 	 */
2319 
2320 	errno = 0;
2321 	val = strtol(buf, &endp, 0);
2322 	sname = strtok(endp, tokdel);
2323 
2324 	if (errno != 0 || val != (tempr_t)val) {
2325 		retval = -1;
2326 		envd_log(LOG_INFO, ENV_CONF_INT_EXPECTED, fname, line, keyword);
2327 	} else if (flags == 0 && sname == NULL) {
2328 		envd_log(LOG_INFO, "SUNW_piclenvd: file:%s line:%d SKIPPED"
2329 		    " as no sensor specified.\n", fname, line, keyword);
2330 		retval = -1;
2331 	} else if (sname == NULL) {
2332 		int	cnt = 0;
2333 
2334 		for (sensorp = &envd_sensors[0]; sensorp->name; sensorp++) {
2335 			if (sensorp->temp_thresh == NULL ||
2336 			    (sensorp->flags & flags) == 0)
2337 				continue;
2338 
2339 			/*
2340 			 * Convert dummy_thresh_addr into memory address
2341 			 * for this sensor threshold values.
2342 			 */
2343 			addr = (char *)sensorp->temp_thresh +
2344 			    (int)((char *)dummy_thresh_addr -
2345 			    (char *)&dummy_thresh);
2346 
2347 			*(tempr_t *)addr = (tempr_t)val;
2348 			cnt++;
2349 			if (env_debug)
2350 				envd_log(LOG_INFO, "SUNW_piclenvd: file:%s "
2351 				"line:%d %s = %d for sensor: '%s'\n",
2352 				    fname, line, keyword, val, sensorp->name);
2353 		}
2354 		if (cnt == 0)
2355 			envd_log(LOG_INFO, "SUNW_piclenvd: file:%s line:%d "
2356 			"%s SKIPPED as no matching sensor found.\n",
2357 			    fname, line, keyword);
2358 	} else {
2359 		/* apply threshold value to the specified sensors */
2360 		do {
2361 			sensorp = sensor_lookup(sname);
2362 			if (sensorp == NULL || sensorp->temp_thresh == NULL ||
2363 			    (flags && (sensorp->flags & flags) == 0)) {
2364 				envd_log(LOG_INFO,
2365 				"SUNW_piclenvd: file:%s line:%d %s SKIPPED"
2366 				" for '%s' as not a valid sensor.\n",
2367 				    fname, line, keyword, sname);
2368 				continue;
2369 			}
2370 			/*
2371 			 * Convert dummy_thresh_addr into memory address
2372 			 * for this sensor threshold values.
2373 			 */
2374 			addr = (char *)sensorp->temp_thresh +
2375 			    (int)((char *)dummy_thresh_addr -
2376 			    (char *)&dummy_thresh);
2377 
2378 			*(tempr_t *)addr = (tempr_t)val;
2379 			if (env_debug)
2380 				envd_log(LOG_INFO, "SUNW_piclenvd: file:%s "
2381 				"line:%d %s = %d for sensor: '%s'\n",
2382 				    fname, line, keyword, val, sensorp->name);
2383 		} while ((sname = strtok(NULL, tokdel)) != NULL);
2384 	}
2385 	return (retval);
2386 }
2387 
2388 
2389 /*
2390  * Process integer tuneables
2391  */
2392 static int
process_int_tuneable(char * keyword,char * buf,void * addr,int size,char * fname,int line)2393 process_int_tuneable(char *keyword, char *buf, void *addr, int size,
2394     char *fname, int line)
2395 {
2396 	int	retval = 0;
2397 	char	*endp;
2398 	long	val;
2399 
2400 	/*
2401 	 * Convert input into integer value and ensure that there is
2402 	 * no other token in the buffer.
2403 	 */
2404 	errno = 0;
2405 	val = strtol(buf, &endp, 0);
2406 	if (errno != 0 || strtok(endp, tokdel) != NULL)
2407 		retval = -1;
2408 	else {
2409 		switch (size) {
2410 		case 1:
2411 			if (val != (int8_t)val)
2412 				retval = -1;
2413 			else
2414 				*(int8_t *)addr = (int8_t)val;
2415 			break;
2416 		case 2:
2417 			if (val != (short)val)
2418 				retval = -1;
2419 			else
2420 				*(short *)addr = (short)val;
2421 			break;
2422 		case 4:
2423 			*(int *)addr = (int)val;
2424 			break;
2425 		default:
2426 			retval = -1;
2427 		}
2428 	}
2429 
2430 	if (retval == -1)
2431 		envd_log(LOG_INFO, ENV_CONF_INT_EXPECTED,
2432 		    fname, line, keyword);
2433 	else if (env_debug)
2434 		envd_log(LOG_INFO, "SUNW_piclenvd: file:%s line:%d %s = %d\n",
2435 		    fname, line, keyword, val);
2436 
2437 	return (retval);
2438 }
2439 
2440 
2441 /*
2442  * Process string tuneables
2443  *
2444  * String value must be within double quotes.  Skip over initial white
2445  * spaces before looking for string value.
2446  */
2447 static int
process_string_tuneable(char * keyword,char * buf,void * addr,int size,char * fname,int line)2448 process_string_tuneable(char *keyword, char *buf, void *addr, int size,
2449     char *fname, int line)
2450 {
2451 	int	retval = 0;
2452 	char	c, *p, *strend;
2453 
2454 	/* Skip over white spaces */
2455 	buf += strspn(buf, tokdel);
2456 
2457 	/*
2458 	 * Parse srting and locate string end (handling escaped double quotes
2459 	 * and other characters)
2460 	 */
2461 	if (buf[0] != '"')
2462 		strend = NULL;
2463 	else {
2464 		for (p = buf+1; (c = *p) != '\0'; p++)
2465 			if (c == '"' || (c == '\\' && *++p == '\0'))
2466 				break;
2467 		strend = (*p == '"') ? p : NULL;
2468 	}
2469 
2470 	if (strend == NULL || (strend-buf) > size ||
2471 	    strtok(strend+1, tokdel) != NULL) {
2472 		envd_log(LOG_WARNING, ENV_CONF_STRING_EXPECTED,
2473 		    fname, line, keyword, size);
2474 		retval = -1;
2475 	} else {
2476 		*strend = '\0';
2477 		(void) strcpy(addr, (caddr_t)buf+1);
2478 		if (env_debug)
2479 			envd_log(LOG_INFO, "SUNW_piclenvd: file:%s line:%d "
2480 			    "%s = \"%s\"\n", fname, line, keyword, buf+1);
2481 	}
2482 
2483 	return (retval);
2484 }
2485 
2486 
2487 /*
2488  * Process configuration file
2489  */
2490 static void
process_env_conf_file(void)2491 process_env_conf_file(void)
2492 {
2493 	int		line, len, toklen;
2494 	char		buf[BUFSIZ];
2495 	FILE		*fp;
2496 	env_tuneable_t	*tunep;
2497 	char		nmbuf[SYS_NMLN];
2498 	char		fname[PATH_MAX];
2499 	char		*tok, *valuep;
2500 	int		skip_line = 0;
2501 
2502 	if (sysinfo(SI_PLATFORM, nmbuf, sizeof (nmbuf)) == -1)
2503 		return;
2504 
2505 	(void) snprintf(fname, sizeof (fname), PICLD_PLAT_PLUGIN_DIRF, nmbuf);
2506 	(void) strlcat(fname, ENV_CONF_FILE, sizeof (fname));
2507 	fp = fopen(fname, "r");
2508 	if (fp == NULL)
2509 		return;
2510 
2511 	/*
2512 	 * Blank lines or lines starting with "#" or "*" in the first
2513 	 * column are ignored. All other lines are assumed to contain
2514 	 * input in the following format:
2515 	 *
2516 	 *	keyword value
2517 	 *
2518 	 * where the "value" can be a signed integer or string (in
2519 	 * double quotes) depending upon the keyword.
2520 	 */
2521 
2522 	for (line = 1; fgets(buf, sizeof (buf), fp) != NULL; line++) {
2523 		len = strlen(buf);
2524 		if (len <= 0)
2525 			continue;
2526 
2527 		/* skip long lines */
2528 		if (buf[len-1] != '\n') {
2529 			skip_line = 1;
2530 			continue;
2531 		} else if (skip_line) {
2532 			skip_line = 0;
2533 			continue;
2534 		} else
2535 			buf[len-1] = '\0';
2536 
2537 		/* skip comments */
2538 		if (buf[0] == '*' || buf[0] == '#')
2539 			continue;
2540 
2541 		/*
2542 		 * Skip over white space to get the keyword
2543 		 */
2544 		tok = buf + strspn(buf, tokdel);
2545 		if (*tok == '\0')
2546 			continue;			/* blank line */
2547 
2548 		toklen = strcspn(tok, tokdel);
2549 		tok[toklen] = '\0';
2550 
2551 		/* Get possible location for value (within current line) */
2552 		valuep = tok + toklen + 1;
2553 		if (valuep > buf+len)
2554 			valuep = buf + len;
2555 
2556 		/*
2557 		 * Lookup the keyword and process value accordingly
2558 		 */
2559 		for (tunep = &env_tuneables[0]; tunep->name != NULL; tunep++) {
2560 			if (strcasecmp(tunep->name, tok) == 0) {
2561 				(void) (*tunep->func)(tok, valuep,
2562 				    tunep->arg1, tunep->arg2, fname, line);
2563 				break;
2564 			}
2565 		}
2566 
2567 		if (tunep->name == NULL)
2568 			envd_log(LOG_INFO, ENV_CONF_UNSUPPORTED_KEYWORD,
2569 			    fname, line, tok);
2570 	}
2571 	(void) fclose(fp);
2572 }
2573 
2574 /*
2575  * Setup envrionmental monitor state and start threads to monitor
2576  * temperature and power management state.
2577  * Returns -1 on error, 0 if successful.
2578  */
2579 
2580 static int
envd_setup(void)2581 envd_setup(void)
2582 {
2583 	char		*valp, *endp;
2584 	int		val;
2585 	int		err;
2586 
2587 	if (pthread_attr_init(&thr_attr) != 0 ||
2588 	    pthread_attr_setscope(&thr_attr, PTHREAD_SCOPE_SYSTEM) != 0)
2589 		return (-1);
2590 
2591 	if (pm_fd == -1)
2592 		envd_open_pm();
2593 
2594 	/*
2595 	 * Setup lpm devices
2596 	 */
2597 	lpm_devices = NULL;
2598 	if ((err = setup_lpm_devices(&lpm_devices)) != PICL_SUCCESS) {
2599 		if (env_debug)
2600 			envd_log(LOG_ERR, "setup_lpm_devices failed err = %d\n",
2601 				err);
2602 	}
2603 
2604 	/*
2605 	 * Initialize global state to initial startup values
2606 	 */
2607 	sensor_poll_interval = SENSOR_POLL_INTERVAL;
2608 	fan_slow_adjustment = FAN_SLOW_ADJUSTMENT;
2609 	fan_incr_limit = FAN_INCREMENT_LIMIT;
2610 	fan_decr_limit = FAN_DECREMENT_LIMIT;
2611 	warning_interval = WARNING_INTERVAL;
2612 	warning_duration = WARNING_DURATION;
2613 	shutdown_interval = SHUTDOWN_INTERVAL;
2614 	disable_piclenvd = 0;
2615 	disable_power_off = 0;
2616 	disable_shutdown = 0;
2617 	disable_warning = 0;
2618 
2619 	(void) strlcpy(shutdown_cmd, SHUTDOWN_CMD, sizeof (shutdown_cmd));
2620 	(void) strlcpy(devfsadm_cmd, DEVFSADM_CMD, sizeof (devfsadm_cmd));
2621 	(void) strlcpy(fru_devfsadm_cmd, FRU_DEVFSADM_CMD,
2622 	    sizeof (fru_devfsadm_cmd));
2623 	envd_cpu_fan.forced_speed = -1;
2624 	envd_system_fan.forced_speed = -1;
2625 
2626 	(void) memcpy(&cpu0_die_thresh, &cpu_die_thresh_default,
2627 	    sizeof (cpu_die_thresh_default));
2628 	(void) memcpy(&cpu0_amb_thresh, &cpu_amb_thresh_default,
2629 	    sizeof (cpu_amb_thresh_default));
2630 	(void) memcpy(&cpu1_die_thresh, &cpu_die_thresh_default,
2631 	    sizeof (cpu_die_thresh_default));
2632 	(void) memcpy(&cpu1_amb_thresh, &cpu_amb_thresh_default,
2633 	    sizeof (cpu_amb_thresh_default));
2634 
2635 	if ((valp = getenv("SUNW_piclenvd_debug")) != NULL) {
2636 		val = strtol(valp, &endp, 0);
2637 		if (strtok(endp, tokdel) == NULL)
2638 			env_debug = val;
2639 	}
2640 
2641 	/*
2642 	 * Create a thread to monitor temperature and control fan
2643 	 * speed.
2644 	 */
2645 	if (envthr_created == B_FALSE && pthread_create(&envthr_tid,
2646 	    &thr_attr, envthr, (void *)NULL) != 0) {
2647 		envd_close_fans();
2648 		envd_close_sensors();
2649 		envd_close_pm();
2650 		envd_log(LOG_CRIT, ENV_THREAD_CREATE_FAILED);
2651 		return (-1);
2652 	}
2653 	envthr_created = B_TRUE;
2654 
2655 	/*
2656 	 * Create a thread to monitor PM state
2657 	 */
2658 	if (pmthr_exists == B_FALSE) {
2659 		if (pm_fd == -1 || pthread_create(&pmthr_tid, &thr_attr,
2660 		    pmthr, (void *)NULL) != 0) {
2661 			envd_log(LOG_CRIT, PM_THREAD_CREATE_FAILED);
2662 		} else
2663 			pmthr_exists = B_TRUE;
2664 	}
2665 	return (0);
2666 }
2667 
2668 /*
2669  * Callback function used by ptree_walk_tree_by_class for the cpu class
2670  */
2671 static int
cb_cpu(picl_nodehdl_t nodeh,void * args)2672 cb_cpu(picl_nodehdl_t nodeh, void *args)
2673 {
2674 	sensor_pmdev_t		*pmdevp;
2675 	int			err;
2676 	ptree_propinfo_t	pinfo;
2677 	picl_prophdl_t		proph;
2678 	size_t			psize;
2679 	int			id;
2680 
2681 	/* Get CPU's ID, it is an int */
2682 	err = ptree_get_propval_by_name(nodeh, PICL_PROP_ID, &id, sizeof (int));
2683 	if (err != PICL_SUCCESS)
2684 		return (PICL_WALK_CONTINUE);
2685 
2686 	/* Get the pmdevp for the CPU */
2687 	pmdevp = sensor_pmdevs;
2688 	while (pmdevp->sensor_id != -1) {
2689 		if (id == pmdevp->sensor_id)
2690 			break;
2691 		pmdevp++;
2692 	}
2693 
2694 	/* Return if didn't find the pmdevp for the cpu id */
2695 	if (pmdevp->sensor_id == -1)
2696 		return (PICL_WALK_CONTINUE);
2697 
2698 	/* Get the devfs-path property */
2699 	err = ptree_get_prop_by_name(nodeh, PICL_PROP_DEVFS_PATH, &proph);
2700 	if (err != PICL_SUCCESS)
2701 		return (PICL_WALK_CONTINUE);
2702 
2703 	err = ptree_get_propinfo(proph, &pinfo);
2704 	if ((err != PICL_SUCCESS) ||
2705 	    (pinfo.piclinfo.type != PICL_PTYPE_CHARSTRING))
2706 		return (PICL_WALK_CONTINUE);
2707 
2708 	psize = pinfo.piclinfo.size;
2709 	pmdevp->pmdev_name = malloc(psize);
2710 	if (pmdevp->pmdev_name == NULL)
2711 		return (PICL_WALK_CONTINUE);
2712 
2713 	err = ptree_get_propval(proph, pmdevp->pmdev_name, psize);
2714 	if (err != PICL_SUCCESS)
2715 		return (PICL_WALK_CONTINUE);
2716 
2717 	return (PICL_WALK_CONTINUE);
2718 }
2719 
2720 /*
2721  * Find the CPU's in the picl tree, set the devfs-path for pmdev_name
2722  */
2723 static void
setup_pmdev_names()2724 setup_pmdev_names()
2725 {
2726 	picl_nodehdl_t	plath;
2727 	int		err;
2728 
2729 	err = ptree_get_node_by_path(PLATFORM_PATH, &plath);
2730 	if (err != PICL_SUCCESS)
2731 		return;
2732 
2733 	err = ptree_walk_tree_by_class(plath, PICL_CLASS_CPU, NULL, cb_cpu);
2734 }
2735 
2736 
2737 static void
piclenvd_register(void)2738 piclenvd_register(void)
2739 {
2740 	picld_plugin_register(&my_reg_info);
2741 }
2742 
2743 static void
piclenvd_init(void)2744 piclenvd_init(void)
2745 {
2746 	/*
2747 	 * Setup the names for the pm sensors, we do it just the first time
2748 	 */
2749 	if (pmdev_names_init == B_FALSE) {
2750 		(void) setup_pmdev_names();
2751 		pmdev_names_init = B_TRUE;
2752 	}
2753 
2754 	/*
2755 	 * Start environmental monitor/threads
2756 	 */
2757 	(void) pthread_rwlock_wrlock(&envd_rwlock);
2758 	if (envd_setup() != 0) {
2759 		(void) pthread_rwlock_unlock(&envd_rwlock);
2760 		envd_log(LOG_CRIT, ENVD_PLUGIN_INIT_FAILED);
2761 		return;
2762 	}
2763 	(void) pthread_rwlock_unlock(&envd_rwlock);
2764 
2765 	/*
2766 	 * Now setup/populate PICL tree
2767 	 */
2768 	env_picl_setup();
2769 }
2770 
2771 static void
piclenvd_fini(void)2772 piclenvd_fini(void)
2773 {
2774 	/*
2775 	 * Delete the lpm device list. After this the lpm information
2776 	 * will not be used in determining the fan speed, till the lpm
2777 	 * device information is initialized by setup_lpm_devices called
2778 	 * by envd_setup.
2779 	 */
2780 	delete_lpm_devices();
2781 
2782 	/*
2783 	 * Invoke env_picl_destroy() to remove any PICL nodes/properties
2784 	 * (including volatile properties) we created. Once this call
2785 	 * returns, there can't be any more calls from the PICL framework
2786 	 * to get current temperature or fan speed.
2787 	 */
2788 	env_picl_destroy();
2789 
2790 	/*
2791 	 * Since this is a critical plug-in, we know that it won't be
2792 	 * unloaded and will be reinited again unless picld process is
2793 	 * going away. Therefore, it's okay to let "envthr" and "pmthr"
2794 	 * continue so that we can monitor the environment during SIGHUP
2795 	 * handling also.
2796 	 */
2797 }
2798 
2799 /*VARARGS2*/
2800 void
envd_log(int pri,const char * fmt,...)2801 envd_log(int pri, const char *fmt, ...)
2802 {
2803 	va_list	ap;
2804 
2805 	va_start(ap, fmt);
2806 	vsyslog(pri, fmt, ap);
2807 	va_end(ap);
2808 }
2809 
2810 #ifdef __lint
2811 /*
2812  * Redefine sigwait to posix style external declaration so that LINT
2813  * does not check against libc version of sigwait() and complain as
2814  * it uses different number of arguments.
2815  */
2816 #define	sigwait	my_posix_sigwait
2817 extern int my_posix_sigwait(const sigset_t *set, int *sig);
2818 #endif
2819 
2820 /*
2821  * sleep() in libpthread gets affected by time being set back, hence
2822  * can cause the "envthr" not to wakeup for extended duration. For
2823  * now, we implement our own sleep() routine below using alarm().
2824  * This will work only if SIGALRM is masked off in all other threads.
2825  * Note that SIGALRM signal is masked off in the main thread, hence
2826  * in all threads, including the envthr, the one calling this routine.
2827  *
2828  * Note that SIGALRM and alarm() can't be used by any other thread
2829  * in this manner.
2830  */
2831 
2832 static unsigned int
envd_sleep(unsigned int sleep_tm)2833 envd_sleep(unsigned int sleep_tm)
2834 {
2835 	int  		sig;
2836 	unsigned int	unslept;
2837 	sigset_t	alrm_mask;
2838 
2839 	if (sleep_tm == 0)
2840 		return (0);
2841 
2842 	(void) sigemptyset(&alrm_mask);
2843 	(void) sigaddset(&alrm_mask, SIGALRM);
2844 
2845 	(void) alarm(sleep_tm);
2846 	(void) sigwait(&alrm_mask, &sig);
2847 
2848 	unslept = alarm(0);
2849 	return (unslept);
2850 }
2851