1bcde4861SRafael Vanoni Polanczyk /*
2b47b5b34SRafael Vanoni  * Copyright 2009, Intel Corporation
3b47b5b34SRafael Vanoni  * Copyright 2009, Sun Microsystems, Inc
4bcde4861SRafael Vanoni Polanczyk  *
5bcde4861SRafael Vanoni Polanczyk  * This file is part of PowerTOP
6bcde4861SRafael Vanoni Polanczyk  *
7bcde4861SRafael Vanoni Polanczyk  * This program file is free software; you can redistribute it and/or modify it
8bcde4861SRafael Vanoni Polanczyk  * under the terms of the GNU General Public License as published by the
9bcde4861SRafael Vanoni Polanczyk  * Free Software Foundation; version 2 of the License.
10bcde4861SRafael Vanoni Polanczyk  *
11bcde4861SRafael Vanoni Polanczyk  * This program is distributed in the hope that it will be useful, but WITHOUT
12bcde4861SRafael Vanoni Polanczyk  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13bcde4861SRafael Vanoni Polanczyk  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14bcde4861SRafael Vanoni Polanczyk  * for more details.
15bcde4861SRafael Vanoni Polanczyk  *
16bcde4861SRafael Vanoni Polanczyk  * You should have received a copy of the GNU General Public License
17bcde4861SRafael Vanoni Polanczyk  * along with this program in a file named COPYING; if not, write to the
18bcde4861SRafael Vanoni Polanczyk  * Free Software Foundation, Inc.,
19bcde4861SRafael Vanoni Polanczyk  * 51 Franklin Street, Fifth Floor,
20bcde4861SRafael Vanoni Polanczyk  * Boston, MA 02110-1301 USA
21bcde4861SRafael Vanoni Polanczyk  *
22bcde4861SRafael Vanoni Polanczyk  * Authors:
23bcde4861SRafael Vanoni Polanczyk  *	Arjan van de Ven <arjan@linux.intel.com>
24bcde4861SRafael Vanoni Polanczyk  *	Eric C Saxe <eric.saxe@sun.com>
25bcde4861SRafael Vanoni Polanczyk  *	Aubrey Li <aubrey.li@intel.com>
26bcde4861SRafael Vanoni Polanczyk  */
27bcde4861SRafael Vanoni Polanczyk 
28bcde4861SRafael Vanoni Polanczyk /*
29bcde4861SRafael Vanoni Polanczyk  * GPL Disclaimer
30bcde4861SRafael Vanoni Polanczyk  *
31bcde4861SRafael Vanoni Polanczyk  * For the avoidance of doubt, except that if any license choice other
32bcde4861SRafael Vanoni Polanczyk  * than GPL or LGPL is available it will apply instead, Sun elects to
33bcde4861SRafael Vanoni Polanczyk  * use only the General Public License version 2 (GPLv2) at this time
34bcde4861SRafael Vanoni Polanczyk  * for any software where a choice of GPL license versions is made
35bcde4861SRafael Vanoni Polanczyk  * available with the language indicating that GPLv2 or any later
36bcde4861SRafael Vanoni Polanczyk  * version may be used, or where a choice of which version of the GPL
37bcde4861SRafael Vanoni Polanczyk  * is applied is otherwise unspecified.
38bcde4861SRafael Vanoni Polanczyk  */
39bcde4861SRafael Vanoni Polanczyk 
40bcde4861SRafael Vanoni Polanczyk #include <stdlib.h>
41bcde4861SRafael Vanoni Polanczyk #include <string.h>
42bcde4861SRafael Vanoni Polanczyk #include <dtrace.h>
43bcde4861SRafael Vanoni Polanczyk #include <kstat.h>
44bcde4861SRafael Vanoni Polanczyk #include <errno.h>
45bcde4861SRafael Vanoni Polanczyk #include "powertop.h"
46bcde4861SRafael Vanoni Polanczyk 
47636423dbSRafael Vanoni #define	HZ2MHZ(speed)	((speed) / MICROSEC)
48b47b5b34SRafael Vanoni #define	DTP_ARG_COUNT	2
49b47b5b34SRafael Vanoni #define	DTP_ARG_LENGTH	5
50bcde4861SRafael Vanoni Polanczyk 
51bcde4861SRafael Vanoni Polanczyk static uint64_t		max_cpufreq = 0;
52b47b5b34SRafael Vanoni static dtrace_hdl_t	*dtp;
53b47b5b34SRafael Vanoni static char		**dtp_argv;
54bcde4861SRafael Vanoni Polanczyk 
55bcde4861SRafael Vanoni Polanczyk /*
56bcde4861SRafael Vanoni Polanczyk  * Enabling PM through /etc/power.conf
579bbf5ba1SRafael Vanoni  * See pt_cpufreq_suggest()
58bcde4861SRafael Vanoni Polanczyk  */
59bcde4861SRafael Vanoni Polanczyk static char default_conf[]	= "/etc/power.conf";
60bcde4861SRafael Vanoni Polanczyk static char default_pmconf[]	= "/usr/sbin/pmconfig";
619bbf5ba1SRafael Vanoni static char cpupm_enable[]	= "echo cpupm enable >> /etc/power.conf";
629bbf5ba1SRafael Vanoni static char cpupm_treshold[]	= "echo cpu-threshold 1s >> /etc/power.conf";
63bcde4861SRafael Vanoni Polanczyk 
64bcde4861SRafael Vanoni Polanczyk /*
65bcde4861SRafael Vanoni Polanczyk  * Buffer containing DTrace program to track CPU frequency transitions
66bcde4861SRafael Vanoni Polanczyk  */
67b47b5b34SRafael Vanoni static const char *dtp_cpufreq =
68b47b5b34SRafael Vanoni "hrtime_t last[$0];"
69bcde4861SRafael Vanoni Polanczyk ""
70bcde4861SRafael Vanoni Polanczyk "BEGIN"
71bcde4861SRafael Vanoni Polanczyk "{"
72bcde4861SRafael Vanoni Polanczyk "	begin = timestamp;"
73bcde4861SRafael Vanoni Polanczyk "}"
74bcde4861SRafael Vanoni Polanczyk ""
75bcde4861SRafael Vanoni Polanczyk ":::cpu-change-speed"
760e751525SEric Saxe "/last[(processorid_t)arg0] != 0/"
77bcde4861SRafael Vanoni Polanczyk "{"
780e751525SEric Saxe "	this->cpu = (processorid_t)arg0;"
79636423dbSRafael Vanoni "	this->oldspeed = (uint64_t)arg1;"
80bcde4861SRafael Vanoni Polanczyk "	@times[this->cpu, this->oldspeed] = sum(timestamp - last[this->cpu]);"
81bcde4861SRafael Vanoni Polanczyk "	last[this->cpu] = timestamp;"
82bcde4861SRafael Vanoni Polanczyk "}"
83bcde4861SRafael Vanoni Polanczyk ":::cpu-change-speed"
840e751525SEric Saxe "/last[(processorid_t)arg0] == 0/"
85bcde4861SRafael Vanoni Polanczyk "{"
860e751525SEric Saxe "	this->cpu = (processorid_t)arg0;"
87636423dbSRafael Vanoni "	this->oldspeed = (uint64_t)arg1;"
88bcde4861SRafael Vanoni Polanczyk "	@times[this->cpu, this->oldspeed] = sum(timestamp - begin);"
89bcde4861SRafael Vanoni Polanczyk "	last[this->cpu] = timestamp;"
90bcde4861SRafael Vanoni Polanczyk "}";
91bcde4861SRafael Vanoni Polanczyk 
92b47b5b34SRafael Vanoni /*
93b47b5b34SRafael Vanoni  * Same as above, but only for a specific CPU
94b47b5b34SRafael Vanoni  */
95b47b5b34SRafael Vanoni static const char *dtp_cpufreq_c =
96b47b5b34SRafael Vanoni "hrtime_t last;"
97b47b5b34SRafael Vanoni ""
98b47b5b34SRafael Vanoni "BEGIN"
99b47b5b34SRafael Vanoni "{"
100b47b5b34SRafael Vanoni "	begin = timestamp;"
101b47b5b34SRafael Vanoni "}"
102b47b5b34SRafael Vanoni ""
103b47b5b34SRafael Vanoni ":::cpu-change-speed"
104b47b5b34SRafael Vanoni "/(processorid_t)arg0 == $1 &&"
105b47b5b34SRafael Vanoni " last != 0/"
106b47b5b34SRafael Vanoni "{"
107b47b5b34SRafael Vanoni "	this->cpu = (processorid_t)arg0;"
108636423dbSRafael Vanoni "	this->oldspeed = (uint64_t)arg1;"
109b47b5b34SRafael Vanoni "	@times[this->cpu, this->oldspeed] = sum(timestamp - last);"
110b47b5b34SRafael Vanoni "	last = timestamp;"
111b47b5b34SRafael Vanoni "}"
112b47b5b34SRafael Vanoni ":::cpu-change-speed"
113b47b5b34SRafael Vanoni "/(processorid_t)arg0 == $1 &&"
114b47b5b34SRafael Vanoni " last == 0/"
115b47b5b34SRafael Vanoni "{"
116b47b5b34SRafael Vanoni "	this->cpu = (processorid_t)arg0;"
117636423dbSRafael Vanoni "	this->oldspeed = (uint64_t)arg1;"
118b47b5b34SRafael Vanoni "	@times[this->cpu, this->oldspeed] = sum(timestamp - begin);"
119b47b5b34SRafael Vanoni "	last = timestamp;"
120b47b5b34SRafael Vanoni "}";
121b47b5b34SRafael Vanoni 
122b47b5b34SRafael Vanoni static int	pt_cpufreq_setup(void);
123bcde4861SRafael Vanoni Polanczyk static int	pt_cpufreq_snapshot(void);
124bcde4861SRafael Vanoni Polanczyk static int	pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *, void *);
125b47b5b34SRafael Vanoni static void	pt_cpufreq_stat_account(double, uint_t);
1269bbf5ba1SRafael Vanoni static int	pt_cpufreq_snapshot_cpu(kstat_ctl_t *, uint_t);
1279bbf5ba1SRafael Vanoni static int	pt_cpufreq_check_pm(void);
1289bbf5ba1SRafael Vanoni static void	pt_cpufreq_enable(void);
129b47b5b34SRafael Vanoni 
130b47b5b34SRafael Vanoni static int
pt_cpufreq_setup(void)131b47b5b34SRafael Vanoni pt_cpufreq_setup(void)
132b47b5b34SRafael Vanoni {
133b47b5b34SRafael Vanoni 	if ((dtp_argv = malloc(sizeof (char *) * DTP_ARG_COUNT)) == NULL)
134*2d83778aSRafael Vanoni 		return (1);
135b47b5b34SRafael Vanoni 
136b47b5b34SRafael Vanoni 	if ((dtp_argv[0] = malloc(sizeof (char) * DTP_ARG_LENGTH)) == NULL) {
137b47b5b34SRafael Vanoni 		free(dtp_argv);
138*2d83778aSRafael Vanoni 		return (1);
139b47b5b34SRafael Vanoni 	}
140b47b5b34SRafael Vanoni 
141b47b5b34SRafael Vanoni 	(void) snprintf(dtp_argv[0], 5, "%d\0", g_ncpus_observed);
142b47b5b34SRafael Vanoni 
143636423dbSRafael Vanoni 	if (PT_ON_CPU) {
144b47b5b34SRafael Vanoni 		if ((dtp_argv[1] = malloc(sizeof (char) * DTP_ARG_LENGTH))
145b47b5b34SRafael Vanoni 		    == NULL) {
146b47b5b34SRafael Vanoni 			free(dtp_argv[0]);
147b47b5b34SRafael Vanoni 			free(dtp_argv);
148*2d83778aSRafael Vanoni 			return (1);
149b47b5b34SRafael Vanoni 		}
150b47b5b34SRafael Vanoni 		(void) snprintf(dtp_argv[1], 5, "%d\0", g_observed_cpu);
151b47b5b34SRafael Vanoni 	}
152b47b5b34SRafael Vanoni 
153b47b5b34SRafael Vanoni 	return (0);
154b47b5b34SRafael Vanoni }
155bcde4861SRafael Vanoni Polanczyk 
156bcde4861SRafael Vanoni Polanczyk /*
157bcde4861SRafael Vanoni Polanczyk  * Perform setup necessary to enumerate and track CPU speed changes
158bcde4861SRafael Vanoni Polanczyk  */
159bcde4861SRafael Vanoni Polanczyk int
pt_cpufreq_stat_prepare(void)160bcde4861SRafael Vanoni Polanczyk pt_cpufreq_stat_prepare(void)
161bcde4861SRafael Vanoni Polanczyk {
162bcde4861SRafael Vanoni Polanczyk 	dtrace_prog_t 		*prog;
163bcde4861SRafael Vanoni Polanczyk 	dtrace_proginfo_t 	info;
164bcde4861SRafael Vanoni Polanczyk 	dtrace_optval_t 	statustime;
165bcde4861SRafael Vanoni Polanczyk 	kstat_ctl_t 		*kc;
166bcde4861SRafael Vanoni Polanczyk 	kstat_t 		*ksp;
167bcde4861SRafael Vanoni Polanczyk 	kstat_named_t 		*knp;
168b47b5b34SRafael Vanoni 	freq_state_info_t 	*state;
169b47b5b34SRafael Vanoni 	char 			*s, *token, *prog_ptr;
170bcde4861SRafael Vanoni Polanczyk 	int 			err;
171bcde4861SRafael Vanoni Polanczyk 
172b47b5b34SRafael Vanoni 	if ((err = pt_cpufreq_setup()) != 0) {
173*2d83778aSRafael Vanoni 		pt_error("failed to setup %s report (couldn't allocate "
174*2d83778aSRafael Vanoni 		    "memory)\n", g_msg_freq_state);
175b47b5b34SRafael Vanoni 		return (errno);
176b47b5b34SRafael Vanoni 	}
177b47b5b34SRafael Vanoni 
178b47b5b34SRafael Vanoni 	state = g_pstate_info;
179b47b5b34SRafael Vanoni 	if ((g_cpu_power_states = calloc((size_t)g_ncpus,
180b47b5b34SRafael Vanoni 	    sizeof (cpu_power_info_t))) == NULL)
181b47b5b34SRafael Vanoni 		return (-1);
182bcde4861SRafael Vanoni Polanczyk 
183bcde4861SRafael Vanoni Polanczyk 	/*
184bcde4861SRafael Vanoni Polanczyk 	 * Enumerate the CPU frequencies
185bcde4861SRafael Vanoni Polanczyk 	 */
186bcde4861SRafael Vanoni Polanczyk 	if ((kc = kstat_open()) == NULL)
187bcde4861SRafael Vanoni Polanczyk 		return (errno);
188bcde4861SRafael Vanoni Polanczyk 
189b47b5b34SRafael Vanoni 	ksp = kstat_lookup(kc, "cpu_info", g_cpu_table[g_observed_cpu], NULL);
190bcde4861SRafael Vanoni Polanczyk 
191b47b5b34SRafael Vanoni 	if (ksp == NULL) {
192b47b5b34SRafael Vanoni 		err = errno;
193b47b5b34SRafael Vanoni 		(void) kstat_close(kc);
194b47b5b34SRafael Vanoni 		return (err);
195b47b5b34SRafael Vanoni 	}
196bcde4861SRafael Vanoni Polanczyk 
197bcde4861SRafael Vanoni Polanczyk 	(void) kstat_read(kc, ksp, NULL);
198bcde4861SRafael Vanoni Polanczyk 
199bcde4861SRafael Vanoni Polanczyk 	knp = kstat_data_lookup(ksp, "supported_frequencies_Hz");
200bcde4861SRafael Vanoni Polanczyk 	s = knp->value.str.addr.ptr;
201bcde4861SRafael Vanoni Polanczyk 
202b47b5b34SRafael Vanoni 	g_npstates = 0;
203bcde4861SRafael Vanoni Polanczyk 
204bcde4861SRafael Vanoni Polanczyk 	for (token = strtok(s, ":"), s = NULL;
205*2d83778aSRafael Vanoni 	    token != NULL && g_npstates < NSTATES;
206bcde4861SRafael Vanoni Polanczyk 	    token = strtok(NULL, ":")) {
207bcde4861SRafael Vanoni Polanczyk 
208bcde4861SRafael Vanoni Polanczyk 		state->speed = HZ2MHZ(atoll(token));
209bcde4861SRafael Vanoni Polanczyk 
210bcde4861SRafael Vanoni Polanczyk 		if (state->speed > max_cpufreq)
211bcde4861SRafael Vanoni Polanczyk 			max_cpufreq = state->speed;
212bcde4861SRafael Vanoni Polanczyk 
213bcde4861SRafael Vanoni Polanczyk 		state->total_time = (uint64_t)0;
214bcde4861SRafael Vanoni Polanczyk 
215b47b5b34SRafael Vanoni 		g_npstates++;
216bcde4861SRafael Vanoni Polanczyk 		state++;
217bcde4861SRafael Vanoni Polanczyk 	}
218bcde4861SRafael Vanoni Polanczyk 
219bcde4861SRafael Vanoni Polanczyk 	if (token != NULL)
220*2d83778aSRafael Vanoni 		pt_error("CPU exceeds the supported number of %s\n",
221*2d83778aSRafael Vanoni 		    g_msg_freq_state);
222bcde4861SRafael Vanoni Polanczyk 
223bcde4861SRafael Vanoni Polanczyk 	(void) kstat_close(kc);
224bcde4861SRafael Vanoni Polanczyk 
225bcde4861SRafael Vanoni Polanczyk 	/*
226bcde4861SRafael Vanoni Polanczyk 	 * Return if speed transition is not supported
227bcde4861SRafael Vanoni Polanczyk 	 */
228b47b5b34SRafael Vanoni 	if (g_npstates < 2)
229bcde4861SRafael Vanoni Polanczyk 		return (-1);
230bcde4861SRafael Vanoni Polanczyk 
231bcde4861SRafael Vanoni Polanczyk 	/*
232bcde4861SRafael Vanoni Polanczyk 	 * Setup DTrace to look for CPU frequency changes
233bcde4861SRafael Vanoni Polanczyk 	 */
234b47b5b34SRafael Vanoni 	if ((dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
235*2d83778aSRafael Vanoni 		pt_error("cannot open dtrace library for the %s report: %s\n",
236*2d83778aSRafael Vanoni 		    g_msg_freq_state, dtrace_errmsg(NULL, err));
237bcde4861SRafael Vanoni Polanczyk 		return (-2);
238bcde4861SRafael Vanoni Polanczyk 	}
239b47b5b34SRafael Vanoni 
240b47b5b34SRafael Vanoni 	/*
241b47b5b34SRafael Vanoni 	 * Execute different scripts (defined above) depending on
242b47b5b34SRafael Vanoni 	 * user specified options. Default mode uses dtp_cpufreq.
243b47b5b34SRafael Vanoni 	 */
244636423dbSRafael Vanoni 	if (PT_ON_CPU)
245b47b5b34SRafael Vanoni 		prog_ptr = (char *)dtp_cpufreq_c;
246b47b5b34SRafael Vanoni 	else
247b47b5b34SRafael Vanoni 		prog_ptr = (char *)dtp_cpufreq;
248b47b5b34SRafael Vanoni 
249b47b5b34SRafael Vanoni 	if ((prog = dtrace_program_strcompile(dtp, prog_ptr,
250b47b5b34SRafael Vanoni 	    DTRACE_PROBESPEC_NAME, 0, (1 + g_argc), dtp_argv)) == NULL) {
251*2d83778aSRafael Vanoni 		pt_error("failed to compile %s program\n", g_msg_freq_state);
252b47b5b34SRafael Vanoni 		return (dtrace_errno(dtp));
253bcde4861SRafael Vanoni Polanczyk 	}
254b47b5b34SRafael Vanoni 
255b47b5b34SRafael Vanoni 	if (dtrace_program_exec(dtp, prog, &info) == -1) {
256*2d83778aSRafael Vanoni 		pt_error("failed to enable %s probes\n", g_msg_freq_state);
257b47b5b34SRafael Vanoni 		return (dtrace_errno(dtp));
258bcde4861SRafael Vanoni Polanczyk 	}
259b47b5b34SRafael Vanoni 
260*2d83778aSRafael Vanoni 	if (dtrace_setopt(dtp, "aggsize", "128k") == -1)
261*2d83778aSRafael Vanoni 		pt_error("failed to set %s 'aggsize'\n", g_msg_freq_state);
262b47b5b34SRafael Vanoni 
263*2d83778aSRafael Vanoni 	if (dtrace_setopt(dtp, "aggrate", "0") == -1)
264*2d83778aSRafael Vanoni 		pt_error("failed to set %s 'aggrate'\n", g_msg_freq_state);
265b47b5b34SRafael Vanoni 
266*2d83778aSRafael Vanoni 	if (dtrace_setopt(dtp, "aggpercpu", 0) == -1)
267*2d83778aSRafael Vanoni 		pt_error("failed to set %s 'aggpercpu'\n", g_msg_freq_state);
268b47b5b34SRafael Vanoni 
269b47b5b34SRafael Vanoni 	if (dtrace_go(dtp) != 0) {
270*2d83778aSRafael Vanoni 		pt_error("failed to start %s observation\n", g_msg_freq_state);
271b47b5b34SRafael Vanoni 		return (dtrace_errno(dtp));
272bcde4861SRafael Vanoni Polanczyk 	}
273b47b5b34SRafael Vanoni 
274b47b5b34SRafael Vanoni 	if (dtrace_getopt(dtp, "statusrate", &statustime) == -1) {
275*2d83778aSRafael Vanoni 		pt_error("failed to get %s 'statusrate'\n", g_msg_freq_state);
276b47b5b34SRafael Vanoni 		return (dtrace_errno(dtp));
277bcde4861SRafael Vanoni Polanczyk 	}
278bcde4861SRafael Vanoni Polanczyk 
279bcde4861SRafael Vanoni Polanczyk 	return (0);
280bcde4861SRafael Vanoni Polanczyk }
281bcde4861SRafael Vanoni Polanczyk 
282bcde4861SRafael Vanoni Polanczyk /*
283bcde4861SRafael Vanoni Polanczyk  * The DTrace probes have already been enabled, and are tracking
284bcde4861SRafael Vanoni Polanczyk  * CPU speed transitions. Take a snapshot of the aggregations, and
285bcde4861SRafael Vanoni Polanczyk  * look for any CPUs that have made a speed transition over the last
286bcde4861SRafael Vanoni Polanczyk  * sampling interval. Note that the aggregations may be empty if no
287bcde4861SRafael Vanoni Polanczyk  * speed transitions took place over the last interval. In that case,
288bcde4861SRafael Vanoni Polanczyk  * notate that we have already accounted for the time, so that when
289bcde4861SRafael Vanoni Polanczyk  * we do encounter a speed transition in a future sampling interval
290bcde4861SRafael Vanoni Polanczyk  * we can subtract that time back out.
291bcde4861SRafael Vanoni Polanczyk  */
292bcde4861SRafael Vanoni Polanczyk int
pt_cpufreq_stat_collect(double interval)293bcde4861SRafael Vanoni Polanczyk pt_cpufreq_stat_collect(double interval)
294bcde4861SRafael Vanoni Polanczyk {
295636423dbSRafael Vanoni 	int i, ret;
296bcde4861SRafael Vanoni Polanczyk 
297bcde4861SRafael Vanoni Polanczyk 	/*
298bcde4861SRafael Vanoni Polanczyk 	 * Zero out the interval time reported by DTrace for
299bcde4861SRafael Vanoni Polanczyk 	 * this interval
300bcde4861SRafael Vanoni Polanczyk 	 */
301b47b5b34SRafael Vanoni 	for (i = 0; i < g_npstates; i++)
302b47b5b34SRafael Vanoni 		g_pstate_info[i].total_time = 0;
303bcde4861SRafael Vanoni Polanczyk 
304bcde4861SRafael Vanoni Polanczyk 	for (i = 0; i < g_ncpus; i++)
305b47b5b34SRafael Vanoni 		g_cpu_power_states[i].dtrace_time = 0;
306bcde4861SRafael Vanoni Polanczyk 
307b47b5b34SRafael Vanoni 	if (dtrace_status(dtp) == -1)
308bcde4861SRafael Vanoni Polanczyk 		return (-1);
309bcde4861SRafael Vanoni Polanczyk 
310b47b5b34SRafael Vanoni 	if (dtrace_aggregate_snap(dtp) != 0)
311*2d83778aSRafael Vanoni 		pt_error("failed to collect data for %s\n", g_msg_freq_state);
312bcde4861SRafael Vanoni Polanczyk 
313b47b5b34SRafael Vanoni 	if (dtrace_aggregate_walk_keyvarsorted(dtp, pt_cpufreq_dtrace_walk,
314bcde4861SRafael Vanoni Polanczyk 	    NULL) != 0)
315*2d83778aSRafael Vanoni 		pt_error("failed to sort data for %s\n", g_msg_freq_state);
316bcde4861SRafael Vanoni Polanczyk 
317b47b5b34SRafael Vanoni 	dtrace_aggregate_clear(dtp);
318bcde4861SRafael Vanoni Polanczyk 
319bcde4861SRafael Vanoni Polanczyk 	if ((ret = pt_cpufreq_snapshot()) != 0) {
320*2d83778aSRafael Vanoni 		pt_error("failed to snapshot %s state\n", g_msg_freq_state);
321bcde4861SRafael Vanoni Polanczyk 		return (ret);
322bcde4861SRafael Vanoni Polanczyk 	}
323bcde4861SRafael Vanoni Polanczyk 
324b47b5b34SRafael Vanoni 	switch (g_op_mode) {
325636423dbSRafael Vanoni 	case PT_MODE_CPU:
326b47b5b34SRafael Vanoni 		pt_cpufreq_stat_account(interval, g_observed_cpu);
327b47b5b34SRafael Vanoni 		break;
328636423dbSRafael Vanoni 	case PT_MODE_DEFAULT:
329b47b5b34SRafael Vanoni 	default:
330b47b5b34SRafael Vanoni 		for (i = 0; i < g_ncpus_observed; i++)
331b47b5b34SRafael Vanoni 			pt_cpufreq_stat_account(interval, i);
332b47b5b34SRafael Vanoni 		break;
333b47b5b34SRafael Vanoni 	}
334bcde4861SRafael Vanoni Polanczyk 
335b47b5b34SRafael Vanoni 	return (0);
336b47b5b34SRafael Vanoni }
337bcde4861SRafael Vanoni Polanczyk 
338b47b5b34SRafael Vanoni static void
pt_cpufreq_stat_account(double interval,uint_t cpu)339b47b5b34SRafael Vanoni pt_cpufreq_stat_account(double interval, uint_t cpu)
340b47b5b34SRafael Vanoni {
341636423dbSRafael Vanoni 	cpu_power_info_t 	*cpu_pow;
342b47b5b34SRafael Vanoni 	uint64_t 		speed;
343b47b5b34SRafael Vanoni 	hrtime_t 		duration;
344b47b5b34SRafael Vanoni 	int			i;
345bcde4861SRafael Vanoni Polanczyk 
346b47b5b34SRafael Vanoni 	cpu_pow = &g_cpu_power_states[cpu];
347b47b5b34SRafael Vanoni 	speed = cpu_pow->current_pstate;
348b47b5b34SRafael Vanoni 
349636423dbSRafael Vanoni 	duration = (hrtime_t)(interval * NANOSEC) - cpu_pow->dtrace_time;
350636423dbSRafael Vanoni 
351636423dbSRafael Vanoni 	/*
352636423dbSRafael Vanoni 	 * 'duration' may be a negative value when we're using or forcing a
353636423dbSRafael Vanoni 	 * small interval, and the amount of time already accounted ends up
354636423dbSRafael Vanoni 	 * being larger than the the former.
355636423dbSRafael Vanoni 	 */
356636423dbSRafael Vanoni 	if (duration < 0)
357636423dbSRafael Vanoni 		return;
358b47b5b34SRafael Vanoni 
359b47b5b34SRafael Vanoni 	for (i = 0; i < g_npstates; i++) {
360b47b5b34SRafael Vanoni 		if (g_pstate_info[i].speed == speed) {
361b47b5b34SRafael Vanoni 			g_pstate_info[i].total_time += duration;
362b47b5b34SRafael Vanoni 			cpu_pow->time_accounted += duration;
363636423dbSRafael Vanoni 			cpu_pow->speed_accounted = speed;
364bcde4861SRafael Vanoni Polanczyk 		}
365bcde4861SRafael Vanoni Polanczyk 	}
366bcde4861SRafael Vanoni Polanczyk }
367bcde4861SRafael Vanoni Polanczyk 
368bcde4861SRafael Vanoni Polanczyk /*
369bcde4861SRafael Vanoni Polanczyk  * Take a snapshot of each CPU's speed by looking through the cpu_info kstats.
370bcde4861SRafael Vanoni Polanczyk  */
371bcde4861SRafael Vanoni Polanczyk static int
pt_cpufreq_snapshot(void)372bcde4861SRafael Vanoni Polanczyk pt_cpufreq_snapshot(void)
373bcde4861SRafael Vanoni Polanczyk {
374636423dbSRafael Vanoni 	kstat_ctl_t 	*kc;
375636423dbSRafael Vanoni 	int 		ret;
376636423dbSRafael Vanoni 	uint_t		i;
377bcde4861SRafael Vanoni Polanczyk 
378bcde4861SRafael Vanoni Polanczyk 	if ((kc = kstat_open()) == NULL)
379bcde4861SRafael Vanoni Polanczyk 		return (errno);
380bcde4861SRafael Vanoni Polanczyk 
381b47b5b34SRafael Vanoni 	switch (g_op_mode) {
382636423dbSRafael Vanoni 	case PT_MODE_CPU:
383b47b5b34SRafael Vanoni 		ret = pt_cpufreq_snapshot_cpu(kc, g_observed_cpu);
384b47b5b34SRafael Vanoni 		break;
385636423dbSRafael Vanoni 	case PT_MODE_DEFAULT:
386b47b5b34SRafael Vanoni 	default:
387b47b5b34SRafael Vanoni 		for (i = 0; i < g_ncpus_observed; i++)
388b47b5b34SRafael Vanoni 			if ((ret = pt_cpufreq_snapshot_cpu(kc, i)) != 0)
389b47b5b34SRafael Vanoni 				break;
390b47b5b34SRafael Vanoni 		break;
391b47b5b34SRafael Vanoni 	}
392bcde4861SRafael Vanoni Polanczyk 
393b47b5b34SRafael Vanoni 	if (kstat_close(kc) != 0)
394*2d83778aSRafael Vanoni 		pt_error("couldn't close %s kstat\n", g_msg_freq_state);
395bcde4861SRafael Vanoni Polanczyk 
396b47b5b34SRafael Vanoni 	return (ret);
397b47b5b34SRafael Vanoni }
398bcde4861SRafael Vanoni Polanczyk 
399b47b5b34SRafael Vanoni static int
pt_cpufreq_snapshot_cpu(kstat_ctl_t * kc,uint_t cpu)400b47b5b34SRafael Vanoni pt_cpufreq_snapshot_cpu(kstat_ctl_t *kc, uint_t cpu)
401b47b5b34SRafael Vanoni {
402b47b5b34SRafael Vanoni 	kstat_t 		*ksp;
403b47b5b34SRafael Vanoni 	kstat_named_t 		*knp;
404b47b5b34SRafael Vanoni 
405b47b5b34SRafael Vanoni 	ksp = kstat_lookup(kc, "cpu_info", g_cpu_table[cpu], NULL);
406b47b5b34SRafael Vanoni 	if (ksp == NULL) {
407*2d83778aSRafael Vanoni 		pt_error("couldn't find 'cpu_info' kstat for CPU %d\n while "
408*2d83778aSRafael Vanoni 		    "taking a snapshot of %s\n", cpu, g_msg_freq_state);
409b47b5b34SRafael Vanoni 		return (1);
410bcde4861SRafael Vanoni Polanczyk 	}
411bcde4861SRafael Vanoni Polanczyk 
412b47b5b34SRafael Vanoni 	if (kstat_read(kc, ksp, NULL) == -1) {
413*2d83778aSRafael Vanoni 		pt_error("couldn't read 'cpu_info' kstat for CPU %d\n while "
414*2d83778aSRafael Vanoni 		    "taking a snapshot of %s\n", cpu, g_msg_freq_state);
415b47b5b34SRafael Vanoni 		return (2);
416b47b5b34SRafael Vanoni 	}
417b47b5b34SRafael Vanoni 
418b47b5b34SRafael Vanoni 	knp = kstat_data_lookup(ksp, "current_clock_Hz");
419b47b5b34SRafael Vanoni 	if (knp == NULL) {
420*2d83778aSRafael Vanoni 		pt_error("couldn't find 'current_clock_Hz' kstat for CPU %d "
421*2d83778aSRafael Vanoni 		    "while taking a snapshot of %s\n", cpu, g_msg_freq_state);
422b47b5b34SRafael Vanoni 		return (3);
423b47b5b34SRafael Vanoni 	}
424b47b5b34SRafael Vanoni 
425b47b5b34SRafael Vanoni 	g_cpu_power_states[cpu].current_pstate = HZ2MHZ(knp->value.ui64);
426bcde4861SRafael Vanoni Polanczyk 
427bcde4861SRafael Vanoni Polanczyk 	return (0);
428bcde4861SRafael Vanoni Polanczyk }
429bcde4861SRafael Vanoni Polanczyk 
430bcde4861SRafael Vanoni Polanczyk /*
431bcde4861SRafael Vanoni Polanczyk  * DTrace aggregation walker that sorts through a snapshot of the
432bcde4861SRafael Vanoni Polanczyk  * aggregation data collected during firings of the cpu-change-speed
433bcde4861SRafael Vanoni Polanczyk  * probe.
434bcde4861SRafael Vanoni Polanczyk  */
435bcde4861SRafael Vanoni Polanczyk /*ARGSUSED*/
436bcde4861SRafael Vanoni Polanczyk static int
pt_cpufreq_dtrace_walk(const dtrace_aggdata_t * data,void * arg)437bcde4861SRafael Vanoni Polanczyk pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *data, void *arg)
438bcde4861SRafael Vanoni Polanczyk {
439bcde4861SRafael Vanoni Polanczyk 	dtrace_aggdesc_t 	*aggdesc = data->dtada_desc;
440bcde4861SRafael Vanoni Polanczyk 	dtrace_recdesc_t 	*cpu_rec, *speed_rec;
441636423dbSRafael Vanoni 	cpu_power_info_t 	*cp;
442bcde4861SRafael Vanoni Polanczyk 	int32_t 		cpu;
443bcde4861SRafael Vanoni Polanczyk 	uint64_t 		speed;
444636423dbSRafael Vanoni 	hrtime_t 		res;
445bcde4861SRafael Vanoni Polanczyk 	int 			i;
446bcde4861SRafael Vanoni Polanczyk 
447bcde4861SRafael Vanoni Polanczyk 	if (strcmp(aggdesc->dtagd_name, "times") == 0) {
448bcde4861SRafael Vanoni Polanczyk 		cpu_rec = &aggdesc->dtagd_rec[1];
449bcde4861SRafael Vanoni Polanczyk 		speed_rec = &aggdesc->dtagd_rec[2];
450bcde4861SRafael Vanoni Polanczyk 
451bcde4861SRafael Vanoni Polanczyk 		/* LINTED - alignment */
452bcde4861SRafael Vanoni Polanczyk 		cpu = *(int32_t *)(data->dtada_data + cpu_rec->dtrd_offset);
453636423dbSRafael Vanoni 
454636423dbSRafael Vanoni 		/* LINTED - alignment */
455636423dbSRafael Vanoni 		res = *((hrtime_t *)(data->dtada_percpu[cpu]));
456636423dbSRafael Vanoni 
457bcde4861SRafael Vanoni Polanczyk 		/* LINTED - alignment */
458bcde4861SRafael Vanoni Polanczyk 		speed = *(uint64_t *)(data->dtada_data +
459bcde4861SRafael Vanoni Polanczyk 		    speed_rec->dtrd_offset);
460bcde4861SRafael Vanoni Polanczyk 
461636423dbSRafael Vanoni 		if (speed == 0)
462bcde4861SRafael Vanoni Polanczyk 			speed = max_cpufreq;
463636423dbSRafael Vanoni 		else
464636423dbSRafael Vanoni 			speed = HZ2MHZ(speed);
465bcde4861SRafael Vanoni Polanczyk 
466bcde4861SRafael Vanoni Polanczyk 		/*
467bcde4861SRafael Vanoni Polanczyk 		 * We have an aggregation record for "cpu" being at "speed"
468bcde4861SRafael Vanoni Polanczyk 		 * for an interval of "n" nanoseconds. The reported interval
469bcde4861SRafael Vanoni Polanczyk 		 * may exceed the powertop sampling interval, since we only
470bcde4861SRafael Vanoni Polanczyk 		 * notice during potentially infrequent firings of the
471bcde4861SRafael Vanoni Polanczyk 		 * "speed change" DTrace probe. In this case powertop would
472bcde4861SRafael Vanoni Polanczyk 		 * have already accounted for the portions of the interval
473b47b5b34SRafael Vanoni 		 * that happened during prior powertop samplings, so subtract
474bcde4861SRafael Vanoni Polanczyk 		 * out time already accounted.
475bcde4861SRafael Vanoni Polanczyk 		 */
476636423dbSRafael Vanoni 		cp = &g_cpu_power_states[cpu];
477bcde4861SRafael Vanoni Polanczyk 
478b47b5b34SRafael Vanoni 		for (i = 0; i < g_npstates; i++) {
479b47b5b34SRafael Vanoni 			if (g_pstate_info[i].speed == speed) {
480636423dbSRafael Vanoni 
481636423dbSRafael Vanoni 				if (cp->time_accounted > 0 &&
482636423dbSRafael Vanoni 				    cp->speed_accounted == speed) {
483636423dbSRafael Vanoni 					if (res > cp->time_accounted) {
484636423dbSRafael Vanoni 						res -= cp->time_accounted;
485636423dbSRafael Vanoni 						cp->time_accounted = 0;
486636423dbSRafael Vanoni 						cp->speed_accounted = 0;
487636423dbSRafael Vanoni 					} else {
488636423dbSRafael Vanoni 						return (DTRACE_AGGWALK_NEXT);
489bcde4861SRafael Vanoni Polanczyk 					}
490bcde4861SRafael Vanoni Polanczyk 				}
491636423dbSRafael Vanoni 
492636423dbSRafael Vanoni 				g_pstate_info[i].total_time += res;
493636423dbSRafael Vanoni 				cp->dtrace_time += res;
494bcde4861SRafael Vanoni Polanczyk 			}
495bcde4861SRafael Vanoni Polanczyk 		}
496bcde4861SRafael Vanoni Polanczyk 	}
497636423dbSRafael Vanoni 
498bcde4861SRafael Vanoni Polanczyk 	return (DTRACE_AGGWALK_NEXT);
499bcde4861SRafael Vanoni Polanczyk }
500bcde4861SRafael Vanoni Polanczyk 
501bcde4861SRafael Vanoni Polanczyk /*
5029bbf5ba1SRafael Vanoni  * Checks if PM is enabled in /etc/power.conf, enabling if not
503bcde4861SRafael Vanoni Polanczyk  */
504bcde4861SRafael Vanoni Polanczyk void
pt_cpufreq_suggest(void)5059bbf5ba1SRafael Vanoni pt_cpufreq_suggest(void)
506bcde4861SRafael Vanoni Polanczyk {
5079bbf5ba1SRafael Vanoni 	int ret = pt_cpufreq_check_pm();
5089bbf5ba1SRafael Vanoni 
5099bbf5ba1SRafael Vanoni 	switch (ret) {
5109bbf5ba1SRafael Vanoni 	case 0:
5119bbf5ba1SRafael Vanoni 		pt_sugg_add("Suggestion: enable CPU power management by "
5129bbf5ba1SRafael Vanoni 		    "pressing the P key", 40, 'P', (char *)g_msg_freq_enable,
5139bbf5ba1SRafael Vanoni 		    pt_cpufreq_enable);
5149bbf5ba1SRafael Vanoni 		break;
5159bbf5ba1SRafael Vanoni 	}
516bcde4861SRafael Vanoni Polanczyk }
517bcde4861SRafael Vanoni Polanczyk 
518bcde4861SRafael Vanoni Polanczyk /*
5199bbf5ba1SRafael Vanoni  * Checks /etc/power.conf and returns:
5209bbf5ba1SRafael Vanoni  *
5219bbf5ba1SRafael Vanoni  *     0 if CPUPM is not enabled
5229bbf5ba1SRafael Vanoni  *     1 if there's nothing for us to do because:
5239bbf5ba1SRafael Vanoni  *         (a) the system does not support frequency scaling
5249bbf5ba1SRafael Vanoni  *         (b) there's no power.conf.
5259bbf5ba1SRafael Vanoni  *     2 if CPUPM is enabled
5269bbf5ba1SRafael Vanoni  *     3 if the system is running in poll-mode, as opposed to event-mode
5279bbf5ba1SRafael Vanoni  *
5289bbf5ba1SRafael Vanoni  * Notice the ordering of the return values, they will be picked up and
5299bbf5ba1SRafael Vanoni  * switched upon ascendingly.
530bcde4861SRafael Vanoni Polanczyk  */
5319bbf5ba1SRafael Vanoni static int
pt_cpufreq_check_pm(void)5329bbf5ba1SRafael Vanoni pt_cpufreq_check_pm(void)
533bcde4861SRafael Vanoni Polanczyk {
5349bbf5ba1SRafael Vanoni 	char line[1024];
5359bbf5ba1SRafael Vanoni 	FILE *file;
5369bbf5ba1SRafael Vanoni 	int ret = 0;
537bcde4861SRafael Vanoni Polanczyk 
5389bbf5ba1SRafael Vanoni 	if (g_npstates < 2 || (file = fopen(default_conf, "r")) == NULL)
5399bbf5ba1SRafael Vanoni 		return (1);
540bcde4861SRafael Vanoni Polanczyk 
541bcde4861SRafael Vanoni Polanczyk 	(void) memset(line, 0, 1024);
542bcde4861SRafael Vanoni Polanczyk 
5439bbf5ba1SRafael Vanoni 	while (fgets(line, 1024, file)) {
544bcde4861SRafael Vanoni Polanczyk 		if (strstr(line, "cpupm")) {
545bcde4861SRafael Vanoni Polanczyk 			if (strstr(line, "enable")) {
546bcde4861SRafael Vanoni Polanczyk 				(void) fclose(file);
5479bbf5ba1SRafael Vanoni 				return (2);
548bcde4861SRafael Vanoni Polanczyk 			}
549bcde4861SRafael Vanoni Polanczyk 		}
5509bbf5ba1SRafael Vanoni 		if (strstr(line, "poll"))
5519bbf5ba1SRafael Vanoni 			ret = 3;
552bcde4861SRafael Vanoni Polanczyk 	}
553bcde4861SRafael Vanoni Polanczyk 
554bcde4861SRafael Vanoni Polanczyk 	(void) fclose(file);
5559bbf5ba1SRafael Vanoni 
5569bbf5ba1SRafael Vanoni 	return (ret);
5579bbf5ba1SRafael Vanoni }
5589bbf5ba1SRafael Vanoni 
5599bbf5ba1SRafael Vanoni /*
5609bbf5ba1SRafael Vanoni  * Used as a suggestion, sets PM in /etc/power.conf and
5619bbf5ba1SRafael Vanoni  * a 1sec threshold, then calls /usr/sbin/pmconfig
5629bbf5ba1SRafael Vanoni  */
5639bbf5ba1SRafael Vanoni static void
pt_cpufreq_enable(void)5649bbf5ba1SRafael Vanoni pt_cpufreq_enable(void)
5659bbf5ba1SRafael Vanoni {
5669bbf5ba1SRafael Vanoni 	(void) system(cpupm_enable);
5679bbf5ba1SRafael Vanoni 	(void) system(cpupm_treshold);
5689bbf5ba1SRafael Vanoni 	(void) system(default_pmconf);
5699bbf5ba1SRafael Vanoni 
5709bbf5ba1SRafael Vanoni 	if (pt_sugg_remove(pt_cpufreq_enable) == 0)
571*2d83778aSRafael Vanoni 		pt_error("failed to remove a %s suggestion\n",
572*2d83778aSRafael Vanoni 		    g_msg_freq_state);
573bcde4861SRafael Vanoni Polanczyk }
574