1bcde4861SRafael Vanoni Polanczyk /*
2b47b5b34SRafael Vanoni * Copyright 2009, Intel Corporation
3b47b5b34SRafael Vanoni * Copyright 2009, Sun Microsystems, Inc
4bcde4861SRafael Vanoni Polanczyk *
5bcde4861SRafael Vanoni Polanczyk * This file is part of PowerTOP
6bcde4861SRafael Vanoni Polanczyk *
7bcde4861SRafael Vanoni Polanczyk * This program file is free software; you can redistribute it and/or modify it
8bcde4861SRafael Vanoni Polanczyk * under the terms of the GNU General Public License as published by the
9bcde4861SRafael Vanoni Polanczyk * Free Software Foundation; version 2 of the License.
10bcde4861SRafael Vanoni Polanczyk *
11bcde4861SRafael Vanoni Polanczyk * This program is distributed in the hope that it will be useful, but WITHOUT
12bcde4861SRafael Vanoni Polanczyk * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13bcde4861SRafael Vanoni Polanczyk * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14bcde4861SRafael Vanoni Polanczyk * for more details.
15bcde4861SRafael Vanoni Polanczyk *
16bcde4861SRafael Vanoni Polanczyk * You should have received a copy of the GNU General Public License
17bcde4861SRafael Vanoni Polanczyk * along with this program in a file named COPYING; if not, write to the
18bcde4861SRafael Vanoni Polanczyk * Free Software Foundation, Inc.,
19bcde4861SRafael Vanoni Polanczyk * 51 Franklin Street, Fifth Floor,
20bcde4861SRafael Vanoni Polanczyk * Boston, MA 02110-1301 USA
21bcde4861SRafael Vanoni Polanczyk *
22bcde4861SRafael Vanoni Polanczyk * Authors:
23bcde4861SRafael Vanoni Polanczyk * Arjan van de Ven <arjan@linux.intel.com>
24bcde4861SRafael Vanoni Polanczyk * Eric C Saxe <eric.saxe@sun.com>
25bcde4861SRafael Vanoni Polanczyk * Aubrey Li <aubrey.li@intel.com>
26bcde4861SRafael Vanoni Polanczyk */
27bcde4861SRafael Vanoni Polanczyk
28bcde4861SRafael Vanoni Polanczyk /*
29bcde4861SRafael Vanoni Polanczyk * GPL Disclaimer
30bcde4861SRafael Vanoni Polanczyk *
31bcde4861SRafael Vanoni Polanczyk * For the avoidance of doubt, except that if any license choice other
32bcde4861SRafael Vanoni Polanczyk * than GPL or LGPL is available it will apply instead, Sun elects to
33bcde4861SRafael Vanoni Polanczyk * use only the General Public License version 2 (GPLv2) at this time
34bcde4861SRafael Vanoni Polanczyk * for any software where a choice of GPL license versions is made
35bcde4861SRafael Vanoni Polanczyk * available with the language indicating that GPLv2 or any later
36bcde4861SRafael Vanoni Polanczyk * version may be used, or where a choice of which version of the GPL
37bcde4861SRafael Vanoni Polanczyk * is applied is otherwise unspecified.
38bcde4861SRafael Vanoni Polanczyk */
39bcde4861SRafael Vanoni Polanczyk
40bcde4861SRafael Vanoni Polanczyk #include <stdlib.h>
41bcde4861SRafael Vanoni Polanczyk #include <string.h>
42bcde4861SRafael Vanoni Polanczyk #include <dtrace.h>
43bcde4861SRafael Vanoni Polanczyk #include <kstat.h>
44bcde4861SRafael Vanoni Polanczyk #include <errno.h>
45bcde4861SRafael Vanoni Polanczyk #include "powertop.h"
46bcde4861SRafael Vanoni Polanczyk
47636423dbSRafael Vanoni #define HZ2MHZ(speed) ((speed) / MICROSEC)
48b47b5b34SRafael Vanoni #define DTP_ARG_COUNT 2
49b47b5b34SRafael Vanoni #define DTP_ARG_LENGTH 5
50bcde4861SRafael Vanoni Polanczyk
51bcde4861SRafael Vanoni Polanczyk static uint64_t max_cpufreq = 0;
52b47b5b34SRafael Vanoni static dtrace_hdl_t *dtp;
53b47b5b34SRafael Vanoni static char **dtp_argv;
54bcde4861SRafael Vanoni Polanczyk
55bcde4861SRafael Vanoni Polanczyk /*
56bcde4861SRafael Vanoni Polanczyk * Enabling PM through /etc/power.conf
579bbf5ba1SRafael Vanoni * See pt_cpufreq_suggest()
58bcde4861SRafael Vanoni Polanczyk */
59bcde4861SRafael Vanoni Polanczyk static char default_conf[] = "/etc/power.conf";
60bcde4861SRafael Vanoni Polanczyk static char default_pmconf[] = "/usr/sbin/pmconfig";
619bbf5ba1SRafael Vanoni static char cpupm_enable[] = "echo cpupm enable >> /etc/power.conf";
629bbf5ba1SRafael Vanoni static char cpupm_treshold[] = "echo cpu-threshold 1s >> /etc/power.conf";
63bcde4861SRafael Vanoni Polanczyk
64bcde4861SRafael Vanoni Polanczyk /*
65bcde4861SRafael Vanoni Polanczyk * Buffer containing DTrace program to track CPU frequency transitions
66bcde4861SRafael Vanoni Polanczyk */
67b47b5b34SRafael Vanoni static const char *dtp_cpufreq =
68b47b5b34SRafael Vanoni "hrtime_t last[$0];"
69bcde4861SRafael Vanoni Polanczyk ""
70bcde4861SRafael Vanoni Polanczyk "BEGIN"
71bcde4861SRafael Vanoni Polanczyk "{"
72bcde4861SRafael Vanoni Polanczyk " begin = timestamp;"
73bcde4861SRafael Vanoni Polanczyk "}"
74bcde4861SRafael Vanoni Polanczyk ""
75bcde4861SRafael Vanoni Polanczyk ":::cpu-change-speed"
760e751525SEric Saxe "/last[(processorid_t)arg0] != 0/"
77bcde4861SRafael Vanoni Polanczyk "{"
780e751525SEric Saxe " this->cpu = (processorid_t)arg0;"
79636423dbSRafael Vanoni " this->oldspeed = (uint64_t)arg1;"
80bcde4861SRafael Vanoni Polanczyk " @times[this->cpu, this->oldspeed] = sum(timestamp - last[this->cpu]);"
81bcde4861SRafael Vanoni Polanczyk " last[this->cpu] = timestamp;"
82bcde4861SRafael Vanoni Polanczyk "}"
83bcde4861SRafael Vanoni Polanczyk ":::cpu-change-speed"
840e751525SEric Saxe "/last[(processorid_t)arg0] == 0/"
85bcde4861SRafael Vanoni Polanczyk "{"
860e751525SEric Saxe " this->cpu = (processorid_t)arg0;"
87636423dbSRafael Vanoni " this->oldspeed = (uint64_t)arg1;"
88bcde4861SRafael Vanoni Polanczyk " @times[this->cpu, this->oldspeed] = sum(timestamp - begin);"
89bcde4861SRafael Vanoni Polanczyk " last[this->cpu] = timestamp;"
90bcde4861SRafael Vanoni Polanczyk "}";
91bcde4861SRafael Vanoni Polanczyk
92b47b5b34SRafael Vanoni /*
93b47b5b34SRafael Vanoni * Same as above, but only for a specific CPU
94b47b5b34SRafael Vanoni */
95b47b5b34SRafael Vanoni static const char *dtp_cpufreq_c =
96b47b5b34SRafael Vanoni "hrtime_t last;"
97b47b5b34SRafael Vanoni ""
98b47b5b34SRafael Vanoni "BEGIN"
99b47b5b34SRafael Vanoni "{"
100b47b5b34SRafael Vanoni " begin = timestamp;"
101b47b5b34SRafael Vanoni "}"
102b47b5b34SRafael Vanoni ""
103b47b5b34SRafael Vanoni ":::cpu-change-speed"
104b47b5b34SRafael Vanoni "/(processorid_t)arg0 == $1 &&"
105b47b5b34SRafael Vanoni " last != 0/"
106b47b5b34SRafael Vanoni "{"
107b47b5b34SRafael Vanoni " this->cpu = (processorid_t)arg0;"
108636423dbSRafael Vanoni " this->oldspeed = (uint64_t)arg1;"
109b47b5b34SRafael Vanoni " @times[this->cpu, this->oldspeed] = sum(timestamp - last);"
110b47b5b34SRafael Vanoni " last = timestamp;"
111b47b5b34SRafael Vanoni "}"
112b47b5b34SRafael Vanoni ":::cpu-change-speed"
113b47b5b34SRafael Vanoni "/(processorid_t)arg0 == $1 &&"
114b47b5b34SRafael Vanoni " last == 0/"
115b47b5b34SRafael Vanoni "{"
116b47b5b34SRafael Vanoni " this->cpu = (processorid_t)arg0;"
117636423dbSRafael Vanoni " this->oldspeed = (uint64_t)arg1;"
118b47b5b34SRafael Vanoni " @times[this->cpu, this->oldspeed] = sum(timestamp - begin);"
119b47b5b34SRafael Vanoni " last = timestamp;"
120b47b5b34SRafael Vanoni "}";
121b47b5b34SRafael Vanoni
122b47b5b34SRafael Vanoni static int pt_cpufreq_setup(void);
123bcde4861SRafael Vanoni Polanczyk static int pt_cpufreq_snapshot(void);
124bcde4861SRafael Vanoni Polanczyk static int pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *, void *);
125b47b5b34SRafael Vanoni static void pt_cpufreq_stat_account(double, uint_t);
1269bbf5ba1SRafael Vanoni static int pt_cpufreq_snapshot_cpu(kstat_ctl_t *, uint_t);
1279bbf5ba1SRafael Vanoni static int pt_cpufreq_check_pm(void);
1289bbf5ba1SRafael Vanoni static void pt_cpufreq_enable(void);
129b47b5b34SRafael Vanoni
130b47b5b34SRafael Vanoni static int
pt_cpufreq_setup(void)131b47b5b34SRafael Vanoni pt_cpufreq_setup(void)
132b47b5b34SRafael Vanoni {
133b47b5b34SRafael Vanoni if ((dtp_argv = malloc(sizeof (char *) * DTP_ARG_COUNT)) == NULL)
134*2d83778aSRafael Vanoni return (1);
135b47b5b34SRafael Vanoni
136b47b5b34SRafael Vanoni if ((dtp_argv[0] = malloc(sizeof (char) * DTP_ARG_LENGTH)) == NULL) {
137b47b5b34SRafael Vanoni free(dtp_argv);
138*2d83778aSRafael Vanoni return (1);
139b47b5b34SRafael Vanoni }
140b47b5b34SRafael Vanoni
141b47b5b34SRafael Vanoni (void) snprintf(dtp_argv[0], 5, "%d\0", g_ncpus_observed);
142b47b5b34SRafael Vanoni
143636423dbSRafael Vanoni if (PT_ON_CPU) {
144b47b5b34SRafael Vanoni if ((dtp_argv[1] = malloc(sizeof (char) * DTP_ARG_LENGTH))
145b47b5b34SRafael Vanoni == NULL) {
146b47b5b34SRafael Vanoni free(dtp_argv[0]);
147b47b5b34SRafael Vanoni free(dtp_argv);
148*2d83778aSRafael Vanoni return (1);
149b47b5b34SRafael Vanoni }
150b47b5b34SRafael Vanoni (void) snprintf(dtp_argv[1], 5, "%d\0", g_observed_cpu);
151b47b5b34SRafael Vanoni }
152b47b5b34SRafael Vanoni
153b47b5b34SRafael Vanoni return (0);
154b47b5b34SRafael Vanoni }
155bcde4861SRafael Vanoni Polanczyk
156bcde4861SRafael Vanoni Polanczyk /*
157bcde4861SRafael Vanoni Polanczyk * Perform setup necessary to enumerate and track CPU speed changes
158bcde4861SRafael Vanoni Polanczyk */
159bcde4861SRafael Vanoni Polanczyk int
pt_cpufreq_stat_prepare(void)160bcde4861SRafael Vanoni Polanczyk pt_cpufreq_stat_prepare(void)
161bcde4861SRafael Vanoni Polanczyk {
162bcde4861SRafael Vanoni Polanczyk dtrace_prog_t *prog;
163bcde4861SRafael Vanoni Polanczyk dtrace_proginfo_t info;
164bcde4861SRafael Vanoni Polanczyk dtrace_optval_t statustime;
165bcde4861SRafael Vanoni Polanczyk kstat_ctl_t *kc;
166bcde4861SRafael Vanoni Polanczyk kstat_t *ksp;
167bcde4861SRafael Vanoni Polanczyk kstat_named_t *knp;
168b47b5b34SRafael Vanoni freq_state_info_t *state;
169b47b5b34SRafael Vanoni char *s, *token, *prog_ptr;
170bcde4861SRafael Vanoni Polanczyk int err;
171bcde4861SRafael Vanoni Polanczyk
172b47b5b34SRafael Vanoni if ((err = pt_cpufreq_setup()) != 0) {
173*2d83778aSRafael Vanoni pt_error("failed to setup %s report (couldn't allocate "
174*2d83778aSRafael Vanoni "memory)\n", g_msg_freq_state);
175b47b5b34SRafael Vanoni return (errno);
176b47b5b34SRafael Vanoni }
177b47b5b34SRafael Vanoni
178b47b5b34SRafael Vanoni state = g_pstate_info;
179b47b5b34SRafael Vanoni if ((g_cpu_power_states = calloc((size_t)g_ncpus,
180b47b5b34SRafael Vanoni sizeof (cpu_power_info_t))) == NULL)
181b47b5b34SRafael Vanoni return (-1);
182bcde4861SRafael Vanoni Polanczyk
183bcde4861SRafael Vanoni Polanczyk /*
184bcde4861SRafael Vanoni Polanczyk * Enumerate the CPU frequencies
185bcde4861SRafael Vanoni Polanczyk */
186bcde4861SRafael Vanoni Polanczyk if ((kc = kstat_open()) == NULL)
187bcde4861SRafael Vanoni Polanczyk return (errno);
188bcde4861SRafael Vanoni Polanczyk
189b47b5b34SRafael Vanoni ksp = kstat_lookup(kc, "cpu_info", g_cpu_table[g_observed_cpu], NULL);
190bcde4861SRafael Vanoni Polanczyk
191b47b5b34SRafael Vanoni if (ksp == NULL) {
192b47b5b34SRafael Vanoni err = errno;
193b47b5b34SRafael Vanoni (void) kstat_close(kc);
194b47b5b34SRafael Vanoni return (err);
195b47b5b34SRafael Vanoni }
196bcde4861SRafael Vanoni Polanczyk
197bcde4861SRafael Vanoni Polanczyk (void) kstat_read(kc, ksp, NULL);
198bcde4861SRafael Vanoni Polanczyk
199bcde4861SRafael Vanoni Polanczyk knp = kstat_data_lookup(ksp, "supported_frequencies_Hz");
200bcde4861SRafael Vanoni Polanczyk s = knp->value.str.addr.ptr;
201bcde4861SRafael Vanoni Polanczyk
202b47b5b34SRafael Vanoni g_npstates = 0;
203bcde4861SRafael Vanoni Polanczyk
204bcde4861SRafael Vanoni Polanczyk for (token = strtok(s, ":"), s = NULL;
205*2d83778aSRafael Vanoni token != NULL && g_npstates < NSTATES;
206bcde4861SRafael Vanoni Polanczyk token = strtok(NULL, ":")) {
207bcde4861SRafael Vanoni Polanczyk
208bcde4861SRafael Vanoni Polanczyk state->speed = HZ2MHZ(atoll(token));
209bcde4861SRafael Vanoni Polanczyk
210bcde4861SRafael Vanoni Polanczyk if (state->speed > max_cpufreq)
211bcde4861SRafael Vanoni Polanczyk max_cpufreq = state->speed;
212bcde4861SRafael Vanoni Polanczyk
213bcde4861SRafael Vanoni Polanczyk state->total_time = (uint64_t)0;
214bcde4861SRafael Vanoni Polanczyk
215b47b5b34SRafael Vanoni g_npstates++;
216bcde4861SRafael Vanoni Polanczyk state++;
217bcde4861SRafael Vanoni Polanczyk }
218bcde4861SRafael Vanoni Polanczyk
219bcde4861SRafael Vanoni Polanczyk if (token != NULL)
220*2d83778aSRafael Vanoni pt_error("CPU exceeds the supported number of %s\n",
221*2d83778aSRafael Vanoni g_msg_freq_state);
222bcde4861SRafael Vanoni Polanczyk
223bcde4861SRafael Vanoni Polanczyk (void) kstat_close(kc);
224bcde4861SRafael Vanoni Polanczyk
225bcde4861SRafael Vanoni Polanczyk /*
226bcde4861SRafael Vanoni Polanczyk * Return if speed transition is not supported
227bcde4861SRafael Vanoni Polanczyk */
228b47b5b34SRafael Vanoni if (g_npstates < 2)
229bcde4861SRafael Vanoni Polanczyk return (-1);
230bcde4861SRafael Vanoni Polanczyk
231bcde4861SRafael Vanoni Polanczyk /*
232bcde4861SRafael Vanoni Polanczyk * Setup DTrace to look for CPU frequency changes
233bcde4861SRafael Vanoni Polanczyk */
234b47b5b34SRafael Vanoni if ((dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
235*2d83778aSRafael Vanoni pt_error("cannot open dtrace library for the %s report: %s\n",
236*2d83778aSRafael Vanoni g_msg_freq_state, dtrace_errmsg(NULL, err));
237bcde4861SRafael Vanoni Polanczyk return (-2);
238bcde4861SRafael Vanoni Polanczyk }
239b47b5b34SRafael Vanoni
240b47b5b34SRafael Vanoni /*
241b47b5b34SRafael Vanoni * Execute different scripts (defined above) depending on
242b47b5b34SRafael Vanoni * user specified options. Default mode uses dtp_cpufreq.
243b47b5b34SRafael Vanoni */
244636423dbSRafael Vanoni if (PT_ON_CPU)
245b47b5b34SRafael Vanoni prog_ptr = (char *)dtp_cpufreq_c;
246b47b5b34SRafael Vanoni else
247b47b5b34SRafael Vanoni prog_ptr = (char *)dtp_cpufreq;
248b47b5b34SRafael Vanoni
249b47b5b34SRafael Vanoni if ((prog = dtrace_program_strcompile(dtp, prog_ptr,
250b47b5b34SRafael Vanoni DTRACE_PROBESPEC_NAME, 0, (1 + g_argc), dtp_argv)) == NULL) {
251*2d83778aSRafael Vanoni pt_error("failed to compile %s program\n", g_msg_freq_state);
252b47b5b34SRafael Vanoni return (dtrace_errno(dtp));
253bcde4861SRafael Vanoni Polanczyk }
254b47b5b34SRafael Vanoni
255b47b5b34SRafael Vanoni if (dtrace_program_exec(dtp, prog, &info) == -1) {
256*2d83778aSRafael Vanoni pt_error("failed to enable %s probes\n", g_msg_freq_state);
257b47b5b34SRafael Vanoni return (dtrace_errno(dtp));
258bcde4861SRafael Vanoni Polanczyk }
259b47b5b34SRafael Vanoni
260*2d83778aSRafael Vanoni if (dtrace_setopt(dtp, "aggsize", "128k") == -1)
261*2d83778aSRafael Vanoni pt_error("failed to set %s 'aggsize'\n", g_msg_freq_state);
262b47b5b34SRafael Vanoni
263*2d83778aSRafael Vanoni if (dtrace_setopt(dtp, "aggrate", "0") == -1)
264*2d83778aSRafael Vanoni pt_error("failed to set %s 'aggrate'\n", g_msg_freq_state);
265b47b5b34SRafael Vanoni
266*2d83778aSRafael Vanoni if (dtrace_setopt(dtp, "aggpercpu", 0) == -1)
267*2d83778aSRafael Vanoni pt_error("failed to set %s 'aggpercpu'\n", g_msg_freq_state);
268b47b5b34SRafael Vanoni
269b47b5b34SRafael Vanoni if (dtrace_go(dtp) != 0) {
270*2d83778aSRafael Vanoni pt_error("failed to start %s observation\n", g_msg_freq_state);
271b47b5b34SRafael Vanoni return (dtrace_errno(dtp));
272bcde4861SRafael Vanoni Polanczyk }
273b47b5b34SRafael Vanoni
274b47b5b34SRafael Vanoni if (dtrace_getopt(dtp, "statusrate", &statustime) == -1) {
275*2d83778aSRafael Vanoni pt_error("failed to get %s 'statusrate'\n", g_msg_freq_state);
276b47b5b34SRafael Vanoni return (dtrace_errno(dtp));
277bcde4861SRafael Vanoni Polanczyk }
278bcde4861SRafael Vanoni Polanczyk
279bcde4861SRafael Vanoni Polanczyk return (0);
280bcde4861SRafael Vanoni Polanczyk }
281bcde4861SRafael Vanoni Polanczyk
282bcde4861SRafael Vanoni Polanczyk /*
283bcde4861SRafael Vanoni Polanczyk * The DTrace probes have already been enabled, and are tracking
284bcde4861SRafael Vanoni Polanczyk * CPU speed transitions. Take a snapshot of the aggregations, and
285bcde4861SRafael Vanoni Polanczyk * look for any CPUs that have made a speed transition over the last
286bcde4861SRafael Vanoni Polanczyk * sampling interval. Note that the aggregations may be empty if no
287bcde4861SRafael Vanoni Polanczyk * speed transitions took place over the last interval. In that case,
288bcde4861SRafael Vanoni Polanczyk * notate that we have already accounted for the time, so that when
289bcde4861SRafael Vanoni Polanczyk * we do encounter a speed transition in a future sampling interval
290bcde4861SRafael Vanoni Polanczyk * we can subtract that time back out.
291bcde4861SRafael Vanoni Polanczyk */
292bcde4861SRafael Vanoni Polanczyk int
pt_cpufreq_stat_collect(double interval)293bcde4861SRafael Vanoni Polanczyk pt_cpufreq_stat_collect(double interval)
294bcde4861SRafael Vanoni Polanczyk {
295636423dbSRafael Vanoni int i, ret;
296bcde4861SRafael Vanoni Polanczyk
297bcde4861SRafael Vanoni Polanczyk /*
298bcde4861SRafael Vanoni Polanczyk * Zero out the interval time reported by DTrace for
299bcde4861SRafael Vanoni Polanczyk * this interval
300bcde4861SRafael Vanoni Polanczyk */
301b47b5b34SRafael Vanoni for (i = 0; i < g_npstates; i++)
302b47b5b34SRafael Vanoni g_pstate_info[i].total_time = 0;
303bcde4861SRafael Vanoni Polanczyk
304bcde4861SRafael Vanoni Polanczyk for (i = 0; i < g_ncpus; i++)
305b47b5b34SRafael Vanoni g_cpu_power_states[i].dtrace_time = 0;
306bcde4861SRafael Vanoni Polanczyk
307b47b5b34SRafael Vanoni if (dtrace_status(dtp) == -1)
308bcde4861SRafael Vanoni Polanczyk return (-1);
309bcde4861SRafael Vanoni Polanczyk
310b47b5b34SRafael Vanoni if (dtrace_aggregate_snap(dtp) != 0)
311*2d83778aSRafael Vanoni pt_error("failed to collect data for %s\n", g_msg_freq_state);
312bcde4861SRafael Vanoni Polanczyk
313b47b5b34SRafael Vanoni if (dtrace_aggregate_walk_keyvarsorted(dtp, pt_cpufreq_dtrace_walk,
314bcde4861SRafael Vanoni Polanczyk NULL) != 0)
315*2d83778aSRafael Vanoni pt_error("failed to sort data for %s\n", g_msg_freq_state);
316bcde4861SRafael Vanoni Polanczyk
317b47b5b34SRafael Vanoni dtrace_aggregate_clear(dtp);
318bcde4861SRafael Vanoni Polanczyk
319bcde4861SRafael Vanoni Polanczyk if ((ret = pt_cpufreq_snapshot()) != 0) {
320*2d83778aSRafael Vanoni pt_error("failed to snapshot %s state\n", g_msg_freq_state);
321bcde4861SRafael Vanoni Polanczyk return (ret);
322bcde4861SRafael Vanoni Polanczyk }
323bcde4861SRafael Vanoni Polanczyk
324b47b5b34SRafael Vanoni switch (g_op_mode) {
325636423dbSRafael Vanoni case PT_MODE_CPU:
326b47b5b34SRafael Vanoni pt_cpufreq_stat_account(interval, g_observed_cpu);
327b47b5b34SRafael Vanoni break;
328636423dbSRafael Vanoni case PT_MODE_DEFAULT:
329b47b5b34SRafael Vanoni default:
330b47b5b34SRafael Vanoni for (i = 0; i < g_ncpus_observed; i++)
331b47b5b34SRafael Vanoni pt_cpufreq_stat_account(interval, i);
332b47b5b34SRafael Vanoni break;
333b47b5b34SRafael Vanoni }
334bcde4861SRafael Vanoni Polanczyk
335b47b5b34SRafael Vanoni return (0);
336b47b5b34SRafael Vanoni }
337bcde4861SRafael Vanoni Polanczyk
338b47b5b34SRafael Vanoni static void
pt_cpufreq_stat_account(double interval,uint_t cpu)339b47b5b34SRafael Vanoni pt_cpufreq_stat_account(double interval, uint_t cpu)
340b47b5b34SRafael Vanoni {
341636423dbSRafael Vanoni cpu_power_info_t *cpu_pow;
342b47b5b34SRafael Vanoni uint64_t speed;
343b47b5b34SRafael Vanoni hrtime_t duration;
344b47b5b34SRafael Vanoni int i;
345bcde4861SRafael Vanoni Polanczyk
346b47b5b34SRafael Vanoni cpu_pow = &g_cpu_power_states[cpu];
347b47b5b34SRafael Vanoni speed = cpu_pow->current_pstate;
348b47b5b34SRafael Vanoni
349636423dbSRafael Vanoni duration = (hrtime_t)(interval * NANOSEC) - cpu_pow->dtrace_time;
350636423dbSRafael Vanoni
351636423dbSRafael Vanoni /*
352636423dbSRafael Vanoni * 'duration' may be a negative value when we're using or forcing a
353636423dbSRafael Vanoni * small interval, and the amount of time already accounted ends up
354636423dbSRafael Vanoni * being larger than the the former.
355636423dbSRafael Vanoni */
356636423dbSRafael Vanoni if (duration < 0)
357636423dbSRafael Vanoni return;
358b47b5b34SRafael Vanoni
359b47b5b34SRafael Vanoni for (i = 0; i < g_npstates; i++) {
360b47b5b34SRafael Vanoni if (g_pstate_info[i].speed == speed) {
361b47b5b34SRafael Vanoni g_pstate_info[i].total_time += duration;
362b47b5b34SRafael Vanoni cpu_pow->time_accounted += duration;
363636423dbSRafael Vanoni cpu_pow->speed_accounted = speed;
364bcde4861SRafael Vanoni Polanczyk }
365bcde4861SRafael Vanoni Polanczyk }
366bcde4861SRafael Vanoni Polanczyk }
367bcde4861SRafael Vanoni Polanczyk
368bcde4861SRafael Vanoni Polanczyk /*
369bcde4861SRafael Vanoni Polanczyk * Take a snapshot of each CPU's speed by looking through the cpu_info kstats.
370bcde4861SRafael Vanoni Polanczyk */
371bcde4861SRafael Vanoni Polanczyk static int
pt_cpufreq_snapshot(void)372bcde4861SRafael Vanoni Polanczyk pt_cpufreq_snapshot(void)
373bcde4861SRafael Vanoni Polanczyk {
374636423dbSRafael Vanoni kstat_ctl_t *kc;
375636423dbSRafael Vanoni int ret;
376636423dbSRafael Vanoni uint_t i;
377bcde4861SRafael Vanoni Polanczyk
378bcde4861SRafael Vanoni Polanczyk if ((kc = kstat_open()) == NULL)
379bcde4861SRafael Vanoni Polanczyk return (errno);
380bcde4861SRafael Vanoni Polanczyk
381b47b5b34SRafael Vanoni switch (g_op_mode) {
382636423dbSRafael Vanoni case PT_MODE_CPU:
383b47b5b34SRafael Vanoni ret = pt_cpufreq_snapshot_cpu(kc, g_observed_cpu);
384b47b5b34SRafael Vanoni break;
385636423dbSRafael Vanoni case PT_MODE_DEFAULT:
386b47b5b34SRafael Vanoni default:
387b47b5b34SRafael Vanoni for (i = 0; i < g_ncpus_observed; i++)
388b47b5b34SRafael Vanoni if ((ret = pt_cpufreq_snapshot_cpu(kc, i)) != 0)
389b47b5b34SRafael Vanoni break;
390b47b5b34SRafael Vanoni break;
391b47b5b34SRafael Vanoni }
392bcde4861SRafael Vanoni Polanczyk
393b47b5b34SRafael Vanoni if (kstat_close(kc) != 0)
394*2d83778aSRafael Vanoni pt_error("couldn't close %s kstat\n", g_msg_freq_state);
395bcde4861SRafael Vanoni Polanczyk
396b47b5b34SRafael Vanoni return (ret);
397b47b5b34SRafael Vanoni }
398bcde4861SRafael Vanoni Polanczyk
399b47b5b34SRafael Vanoni static int
pt_cpufreq_snapshot_cpu(kstat_ctl_t * kc,uint_t cpu)400b47b5b34SRafael Vanoni pt_cpufreq_snapshot_cpu(kstat_ctl_t *kc, uint_t cpu)
401b47b5b34SRafael Vanoni {
402b47b5b34SRafael Vanoni kstat_t *ksp;
403b47b5b34SRafael Vanoni kstat_named_t *knp;
404b47b5b34SRafael Vanoni
405b47b5b34SRafael Vanoni ksp = kstat_lookup(kc, "cpu_info", g_cpu_table[cpu], NULL);
406b47b5b34SRafael Vanoni if (ksp == NULL) {
407*2d83778aSRafael Vanoni pt_error("couldn't find 'cpu_info' kstat for CPU %d\n while "
408*2d83778aSRafael Vanoni "taking a snapshot of %s\n", cpu, g_msg_freq_state);
409b47b5b34SRafael Vanoni return (1);
410bcde4861SRafael Vanoni Polanczyk }
411bcde4861SRafael Vanoni Polanczyk
412b47b5b34SRafael Vanoni if (kstat_read(kc, ksp, NULL) == -1) {
413*2d83778aSRafael Vanoni pt_error("couldn't read 'cpu_info' kstat for CPU %d\n while "
414*2d83778aSRafael Vanoni "taking a snapshot of %s\n", cpu, g_msg_freq_state);
415b47b5b34SRafael Vanoni return (2);
416b47b5b34SRafael Vanoni }
417b47b5b34SRafael Vanoni
418b47b5b34SRafael Vanoni knp = kstat_data_lookup(ksp, "current_clock_Hz");
419b47b5b34SRafael Vanoni if (knp == NULL) {
420*2d83778aSRafael Vanoni pt_error("couldn't find 'current_clock_Hz' kstat for CPU %d "
421*2d83778aSRafael Vanoni "while taking a snapshot of %s\n", cpu, g_msg_freq_state);
422b47b5b34SRafael Vanoni return (3);
423b47b5b34SRafael Vanoni }
424b47b5b34SRafael Vanoni
425b47b5b34SRafael Vanoni g_cpu_power_states[cpu].current_pstate = HZ2MHZ(knp->value.ui64);
426bcde4861SRafael Vanoni Polanczyk
427bcde4861SRafael Vanoni Polanczyk return (0);
428bcde4861SRafael Vanoni Polanczyk }
429bcde4861SRafael Vanoni Polanczyk
430bcde4861SRafael Vanoni Polanczyk /*
431bcde4861SRafael Vanoni Polanczyk * DTrace aggregation walker that sorts through a snapshot of the
432bcde4861SRafael Vanoni Polanczyk * aggregation data collected during firings of the cpu-change-speed
433bcde4861SRafael Vanoni Polanczyk * probe.
434bcde4861SRafael Vanoni Polanczyk */
435bcde4861SRafael Vanoni Polanczyk /*ARGSUSED*/
436bcde4861SRafael Vanoni Polanczyk static int
pt_cpufreq_dtrace_walk(const dtrace_aggdata_t * data,void * arg)437bcde4861SRafael Vanoni Polanczyk pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *data, void *arg)
438bcde4861SRafael Vanoni Polanczyk {
439bcde4861SRafael Vanoni Polanczyk dtrace_aggdesc_t *aggdesc = data->dtada_desc;
440bcde4861SRafael Vanoni Polanczyk dtrace_recdesc_t *cpu_rec, *speed_rec;
441636423dbSRafael Vanoni cpu_power_info_t *cp;
442bcde4861SRafael Vanoni Polanczyk int32_t cpu;
443bcde4861SRafael Vanoni Polanczyk uint64_t speed;
444636423dbSRafael Vanoni hrtime_t res;
445bcde4861SRafael Vanoni Polanczyk int i;
446bcde4861SRafael Vanoni Polanczyk
447bcde4861SRafael Vanoni Polanczyk if (strcmp(aggdesc->dtagd_name, "times") == 0) {
448bcde4861SRafael Vanoni Polanczyk cpu_rec = &aggdesc->dtagd_rec[1];
449bcde4861SRafael Vanoni Polanczyk speed_rec = &aggdesc->dtagd_rec[2];
450bcde4861SRafael Vanoni Polanczyk
451bcde4861SRafael Vanoni Polanczyk /* LINTED - alignment */
452bcde4861SRafael Vanoni Polanczyk cpu = *(int32_t *)(data->dtada_data + cpu_rec->dtrd_offset);
453636423dbSRafael Vanoni
454636423dbSRafael Vanoni /* LINTED - alignment */
455636423dbSRafael Vanoni res = *((hrtime_t *)(data->dtada_percpu[cpu]));
456636423dbSRafael Vanoni
457bcde4861SRafael Vanoni Polanczyk /* LINTED - alignment */
458bcde4861SRafael Vanoni Polanczyk speed = *(uint64_t *)(data->dtada_data +
459bcde4861SRafael Vanoni Polanczyk speed_rec->dtrd_offset);
460bcde4861SRafael Vanoni Polanczyk
461636423dbSRafael Vanoni if (speed == 0)
462bcde4861SRafael Vanoni Polanczyk speed = max_cpufreq;
463636423dbSRafael Vanoni else
464636423dbSRafael Vanoni speed = HZ2MHZ(speed);
465bcde4861SRafael Vanoni Polanczyk
466bcde4861SRafael Vanoni Polanczyk /*
467bcde4861SRafael Vanoni Polanczyk * We have an aggregation record for "cpu" being at "speed"
468bcde4861SRafael Vanoni Polanczyk * for an interval of "n" nanoseconds. The reported interval
469bcde4861SRafael Vanoni Polanczyk * may exceed the powertop sampling interval, since we only
470bcde4861SRafael Vanoni Polanczyk * notice during potentially infrequent firings of the
471bcde4861SRafael Vanoni Polanczyk * "speed change" DTrace probe. In this case powertop would
472bcde4861SRafael Vanoni Polanczyk * have already accounted for the portions of the interval
473b47b5b34SRafael Vanoni * that happened during prior powertop samplings, so subtract
474bcde4861SRafael Vanoni Polanczyk * out time already accounted.
475bcde4861SRafael Vanoni Polanczyk */
476636423dbSRafael Vanoni cp = &g_cpu_power_states[cpu];
477bcde4861SRafael Vanoni Polanczyk
478b47b5b34SRafael Vanoni for (i = 0; i < g_npstates; i++) {
479b47b5b34SRafael Vanoni if (g_pstate_info[i].speed == speed) {
480636423dbSRafael Vanoni
481636423dbSRafael Vanoni if (cp->time_accounted > 0 &&
482636423dbSRafael Vanoni cp->speed_accounted == speed) {
483636423dbSRafael Vanoni if (res > cp->time_accounted) {
484636423dbSRafael Vanoni res -= cp->time_accounted;
485636423dbSRafael Vanoni cp->time_accounted = 0;
486636423dbSRafael Vanoni cp->speed_accounted = 0;
487636423dbSRafael Vanoni } else {
488636423dbSRafael Vanoni return (DTRACE_AGGWALK_NEXT);
489bcde4861SRafael Vanoni Polanczyk }
490bcde4861SRafael Vanoni Polanczyk }
491636423dbSRafael Vanoni
492636423dbSRafael Vanoni g_pstate_info[i].total_time += res;
493636423dbSRafael Vanoni cp->dtrace_time += res;
494bcde4861SRafael Vanoni Polanczyk }
495bcde4861SRafael Vanoni Polanczyk }
496bcde4861SRafael Vanoni Polanczyk }
497636423dbSRafael Vanoni
498bcde4861SRafael Vanoni Polanczyk return (DTRACE_AGGWALK_NEXT);
499bcde4861SRafael Vanoni Polanczyk }
500bcde4861SRafael Vanoni Polanczyk
501bcde4861SRafael Vanoni Polanczyk /*
5029bbf5ba1SRafael Vanoni * Checks if PM is enabled in /etc/power.conf, enabling if not
503bcde4861SRafael Vanoni Polanczyk */
504bcde4861SRafael Vanoni Polanczyk void
pt_cpufreq_suggest(void)5059bbf5ba1SRafael Vanoni pt_cpufreq_suggest(void)
506bcde4861SRafael Vanoni Polanczyk {
5079bbf5ba1SRafael Vanoni int ret = pt_cpufreq_check_pm();
5089bbf5ba1SRafael Vanoni
5099bbf5ba1SRafael Vanoni switch (ret) {
5109bbf5ba1SRafael Vanoni case 0:
5119bbf5ba1SRafael Vanoni pt_sugg_add("Suggestion: enable CPU power management by "
5129bbf5ba1SRafael Vanoni "pressing the P key", 40, 'P', (char *)g_msg_freq_enable,
5139bbf5ba1SRafael Vanoni pt_cpufreq_enable);
5149bbf5ba1SRafael Vanoni break;
5159bbf5ba1SRafael Vanoni }
516bcde4861SRafael Vanoni Polanczyk }
517bcde4861SRafael Vanoni Polanczyk
518bcde4861SRafael Vanoni Polanczyk /*
5199bbf5ba1SRafael Vanoni * Checks /etc/power.conf and returns:
5209bbf5ba1SRafael Vanoni *
5219bbf5ba1SRafael Vanoni * 0 if CPUPM is not enabled
5229bbf5ba1SRafael Vanoni * 1 if there's nothing for us to do because:
5239bbf5ba1SRafael Vanoni * (a) the system does not support frequency scaling
5249bbf5ba1SRafael Vanoni * (b) there's no power.conf.
5259bbf5ba1SRafael Vanoni * 2 if CPUPM is enabled
5269bbf5ba1SRafael Vanoni * 3 if the system is running in poll-mode, as opposed to event-mode
5279bbf5ba1SRafael Vanoni *
5289bbf5ba1SRafael Vanoni * Notice the ordering of the return values, they will be picked up and
5299bbf5ba1SRafael Vanoni * switched upon ascendingly.
530bcde4861SRafael Vanoni Polanczyk */
5319bbf5ba1SRafael Vanoni static int
pt_cpufreq_check_pm(void)5329bbf5ba1SRafael Vanoni pt_cpufreq_check_pm(void)
533bcde4861SRafael Vanoni Polanczyk {
5349bbf5ba1SRafael Vanoni char line[1024];
5359bbf5ba1SRafael Vanoni FILE *file;
5369bbf5ba1SRafael Vanoni int ret = 0;
537bcde4861SRafael Vanoni Polanczyk
5389bbf5ba1SRafael Vanoni if (g_npstates < 2 || (file = fopen(default_conf, "r")) == NULL)
5399bbf5ba1SRafael Vanoni return (1);
540bcde4861SRafael Vanoni Polanczyk
541bcde4861SRafael Vanoni Polanczyk (void) memset(line, 0, 1024);
542bcde4861SRafael Vanoni Polanczyk
5439bbf5ba1SRafael Vanoni while (fgets(line, 1024, file)) {
544bcde4861SRafael Vanoni Polanczyk if (strstr(line, "cpupm")) {
545bcde4861SRafael Vanoni Polanczyk if (strstr(line, "enable")) {
546bcde4861SRafael Vanoni Polanczyk (void) fclose(file);
5479bbf5ba1SRafael Vanoni return (2);
548bcde4861SRafael Vanoni Polanczyk }
549bcde4861SRafael Vanoni Polanczyk }
5509bbf5ba1SRafael Vanoni if (strstr(line, "poll"))
5519bbf5ba1SRafael Vanoni ret = 3;
552bcde4861SRafael Vanoni Polanczyk }
553bcde4861SRafael Vanoni Polanczyk
554bcde4861SRafael Vanoni Polanczyk (void) fclose(file);
5559bbf5ba1SRafael Vanoni
5569bbf5ba1SRafael Vanoni return (ret);
5579bbf5ba1SRafael Vanoni }
5589bbf5ba1SRafael Vanoni
5599bbf5ba1SRafael Vanoni /*
5609bbf5ba1SRafael Vanoni * Used as a suggestion, sets PM in /etc/power.conf and
5619bbf5ba1SRafael Vanoni * a 1sec threshold, then calls /usr/sbin/pmconfig
5629bbf5ba1SRafael Vanoni */
5639bbf5ba1SRafael Vanoni static void
pt_cpufreq_enable(void)5649bbf5ba1SRafael Vanoni pt_cpufreq_enable(void)
5659bbf5ba1SRafael Vanoni {
5669bbf5ba1SRafael Vanoni (void) system(cpupm_enable);
5679bbf5ba1SRafael Vanoni (void) system(cpupm_treshold);
5689bbf5ba1SRafael Vanoni (void) system(default_pmconf);
5699bbf5ba1SRafael Vanoni
5709bbf5ba1SRafael Vanoni if (pt_sugg_remove(pt_cpufreq_enable) == 0)
571*2d83778aSRafael Vanoni pt_error("failed to remove a %s suggestion\n",
572*2d83778aSRafael Vanoni g_msg_freq_state);
573bcde4861SRafael Vanoni Polanczyk }
574