1#! /usr/perl5/bin/perl
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10# or http://www.opensolaris.org/os/licensing.
11# See the License for the specific language governing permissions
12# and limitations under the License.
13#
14# When distributing Covered Code, include this CDDL HEADER in each
15# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16# If applicable, add the following below this CDDL HEADER, with the
17# fields enclosed by brackets "[]" replaced with your own identifying
18# information: Portions Copyright [yyyy] [name of copyright owner]
19#
20# CDDL HEADER END
21#
22
23#
24# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
25#
26
27#
28# Pg.pm provides object-oriented interface to the Solaris
29# Processor Group kstats
30#
31# See comments in the end
32#
33
34package Sun::Solaris::Pg;
35
36use strict;
37use warnings;
38use Sun::Solaris::Kstat;
39use Carp;
40use Errno;
41use List::Util qw(max sum);
42
43our $VERSION = '1.1';
44
45#
46# Currently the OS does not have the root PG and PGs constitute a forest of
47# small trees. This module gathers all such trees under one root with ID zero.
48# If the root is present already, we do not use faked root.
49#
50
51my $ROOT_ID = 0;
52
53#
54# PG_NO_PARENT means that kstats have PG parent ID and it is set to -1
55# PG_PARENT_UNDEF means that kstats have no PG parent ID
56#
57use constant {
58	PG_NO_PARENT	=> -1,
59	PG_PARENT_UNDEF => -2,
60};
61
62#
63# Sorting order between different sharing relationships. This order is used to
64# break ties between PGs with the same number of CPUs. If there are two PGs with
65# the same set of CPUs, the one with the higher weight will be the parent of the
66# one with the lower weight.
67#
68my %relationships_order = (
69			   'CPU_PM_Idle_Power_Domain' => 1,
70			   'Integer_Pipeline' => 2,
71			   'Cache' => 3,
72			   'CPU_PM_Active_Power_Domain' => 4,
73			   'Floating_Point_Unit' => 5,
74			   'Data_Pipe_to_memory' => 6,
75			   'Memory' => 7,
76			   'Socket' => 8,
77			   'System' => 9,
78			  );
79
80#
81# Object interface to the library. These are methods that can be used by the
82# module user.
83#
84
85#
86# Create a new object representing PG
87# All the heavy lifting is performed by _init function.
88# This function performs all the Perl blessing magic.
89#
90# The new() method accepts arguments in the form of a hash. The following
91# subarguments are supported:
92#
93#   -cpudata	# Collect per-CPU data from kstats if this is T
94#   -tags	# Match PGs to physical relationships if this is T
95#   -swload	# Collect software CPU load if this is T
96#   -retry	# how many times to retry PG initialization when it fails
97#   -delay # Delay in seconds between retries
98#
99# The arguments are passed to _init().
100#
101sub new
102{
103	my $class = shift;
104	my %args = @_;
105	my $retry_count = $args{-retry} || 0;
106	my $retry_delay = $args{-delay} || 1;
107
108	my $self =  _init(@_);
109
110	#
111	# If PG initialization fails with EAGAIN error and the caller requested
112	# retries, retry initialization.
113	#
114	for (; !$self && ($! == &Errno::EAGAIN) && $retry_count;
115	     $retry_count--) {
116		select(undef,undef,undef, $retry_delay);
117		$self = _init(@_);
118	}
119
120	if ($self) {
121		bless($self, $class) if defined($class);
122		bless($self) unless defined($class);
123	}
124
125	return ($self);
126}
127
128#
129# Functions below use internal function _pg_get which returns PG hash reference
130# corresponding to PG ID specified or 'undef' if the PG can't be found.
131#
132
133#
134# All methods return 'undef' in scalar context and an empty list in list
135# context when unrecoverable errors are detected.
136#
137
138#
139# Return the root ID of PG hierarchy
140#
141sub root
142{
143	scalar @_ == 1 or _usage("root(cookie)");
144	my $self = shift;
145
146	return unless $self->{PGTREE};
147
148	return ($ROOT_ID);
149}
150
151#
152# Return list of all pgs numerically sorted In scalar context return number of
153# PGs
154#
155sub all
156{
157	scalar @_ == 1 or _usage("all(cookie)");
158	my $self = shift;
159	my $pgtree =  $self->{PGTREE} or return;
160	my @ids = keys(%{$pgtree});
161
162	return (wantarray() ? _nsort(@ids) : scalar @ids);
163}
164
165#
166# Return list of all pgs by walking the tree depth first.
167#
168sub all_depth_first
169{
170	scalar @_ == 1 or _usage("all_depth_first(cookie)");
171	my $self = shift;
172
173	_walk_depth_first($self, $self->root());
174}
175
176#
177# Return list of all pgs by walking the tree breadth first.
178#
179sub all_breadth_first
180{
181	scalar @_ == 1 or _usage("all_breadth_first(cookie)");
182	my $self = shift;
183
184	_walk_breadth_first($self, $self->root());
185}
186
187#
188# Return list of CPUs in the PG specified
189# CPUs returned are numerically sorted
190# In scalar context return number of CPUs
191#
192sub cpus
193{
194	scalar @_ == 2 or _usage("cpus(cookie, pg)");
195	my $pg = _pg_get(shift, shift) or return;
196	my @cpus =  @{$pg->{cpus}};
197
198	return (wantarray() ? _nsort(@cpus) : _collapse(@cpus));
199}
200
201#
202# Return a parent for a given PG
203# Returns undef if there is no parent
204#
205sub parent
206{
207	scalar @_ == 2 or _usage("parent(cookie, pg)");
208	my $pg = _pg_get(shift, shift) or return;
209	my $parent = $pg->{parent};
210
211	return (defined($parent) && $parent >= 0 ? $parent : undef);
212}
213
214#
215# Return list of children for a given PG
216# In scalar context return list of children
217#
218sub children
219{
220	scalar @_ == 2 or _usage("children(cookie, pg)");
221	my $pg = _pg_get(shift, shift) or return;
222
223	my $children = $pg->{children} or return;
224	my @children = @{$children};
225
226	return (wantarray() ? _nsort(@children) : scalar @children);
227}
228
229#
230# Return sharing name for the PG
231#
232sub sh_name
233{
234	scalar @_ == 2 or _usage("sh_name(cookie, pg)");
235	my $pg = _pg_get(shift, shift) or return;
236	return ($pg->{sh_name});
237}
238
239#
240# Return T if specified PG ID is a leaf PG
241#
242sub is_leaf
243{
244	scalar @_ == 2 or _usage("is_leaf(cookie, pg)");
245	my $pg = _pg_get(shift, shift) or return;
246	return ($pg->{is_leaf});
247}
248
249#
250# Return leaf PGs
251#
252sub leaves
253{
254	scalar @_ == 1 or _usage("leaves(cookie, pg)");
255
256	my $self = shift;
257
258	return (grep { is_leaf($self, $_) } $self->all());
259}
260
261#
262# Update varying data in the snapshot
263#
264sub update
265{
266	scalar @_ == 1 or _usage("update(cookie)");
267
268	my $self = shift;
269	my $ks = $self->{KSTAT};
270
271	$ks->update();
272
273	my $pgtree = $self->{PGTREE};
274	my $pg_info = $ks->{$self->{PG_MODULE}};
275
276	#
277	# Walk PG kstats and copy updated data from kstats to the snapshot
278	#
279	foreach my $id (keys %$pg_info) {
280		my $pg = $pgtree->{$id} or next;
281
282		my $pg_ks = _kstat_get_pg($pg_info, $id,
283					  $self->{USE_OLD_KSTATS});
284		return unless $pg_ks;
285
286		#
287		# Update PG from kstats
288		#
289		$pg->{util} = $pg_ks->{hw_util};
290		$pg->{current_rate} = $pg_ks->{hw_util_rate};
291		$pg->{util_rate_max} = $pg_ks->{hw_util_rate_max};
292		$pg->{util_time_running} = $pg_ks->{hw_util_time_running};
293		$pg->{util_time_stopped} = $pg_ks->{hw_util_time_stopped};
294		$pg->{snaptime} = $pg_ks->{snaptime};
295		$pg->{generation} = $pg_ks->{generation};
296	}
297
298	#
299	# Update software load for each CPU
300	#
301	$self->{CPU_LOAD} = _get_sw_cpu_load($ks);
302
303	#
304	# Get hardware load per CPU
305	#
306	if ($self->{GET_CPU_DATA}) {
307		_get_hw_cpu_load($self);
308	}
309
310	return (1);
311}
312
313#
314# Return list of physical tags for the given PG
315#
316sub tags
317{
318	scalar @_ == 2 or _usage("tags(cookie, pg)");
319	my $pg = _pg_get(shift, shift) or return;
320
321	my $tags = $pg->{tags} or return;
322
323	my @tags = _uniq(@{$tags});
324
325	return (wantarray() ? @tags : join (',', @tags));
326}
327
328#
329# Return list of sharing relationships in the snapshot Relationships are sorted
330# by the level in the hierarchy If any PGs are given on the command line, only
331# return sharing relationships for given PGs, but still keep them sorted.
332#
333sub sharing_relationships
334{
335	scalar @_ or _usage("sharing_relationships(cookie, [pg, ...])");
336
337	my $self = shift;
338	my @pgs = $self->all_breadth_first();
339
340	if (scalar @_ > 0) {
341		#
342		# Caller specified PGs, remove any PGs not in caller's list
343		#
344		my %seen;
345		map { $seen{$_} = 1 } @_;
346
347		# Remove any PGs not provided by user
348		@pgs = grep { $seen{$_} } @pgs;
349	}
350
351	return (_uniq(map { $self->sh_name($_) } @pgs));
352}
353
354#
355# Return PG generation number. If PG is specified in the argument, return its
356# generation, otherwise return snapshot generation.
357# Snapshot generation is calculated as the total of PG generations
358#
359sub generation
360{
361	(scalar @_ == 1 || scalar @_ == 2) or _usage("generation(cookie, [pg])");
362	my $self = shift;
363
364	if (scalar @_ == 0) {
365		my @generations = map { $_->{generation} }
366				  values %{$self->{PGTREE}};
367		return (sum(@generations));
368
369	} else {
370		my $id = shift;
371		my $pg = _pg_get($self, $id) or return;
372		return ($pg->{generation});
373	}
374}
375
376#
377# Return level of PG in the tree, starting from root.
378# PG level is cached in the $pg->{level} field.
379#
380sub level
381{
382	scalar @_ == 2 or _usage("level(cookie, pg)");
383	my $self = shift;
384	my $pgid = shift;
385	my $pg = _pg_get($self, $pgid) or return;
386
387	return $pg->{level} if defined($pg->{level});
388
389	$pg->{level} = 0;
390
391	my $parent = _pg_get($self, $pg->{parent});
392	while ($parent) {
393		$pg->{level}++;
394		$parent = _pg_get($self, $parent->{parent});
395	}
396
397	return ($pg->{level});
398}
399
400#
401# Return T if PG supports utilization We assume that utilization is supported by
402# PG if it shows any non-zero time in util_time_running. It is possible that the
403# same condition may be caused by cpustat(1) running ever since PG was created,
404# but there is not much we can do about it.
405#
406sub has_utilization
407{
408	scalar @_ == 2 or _usage("has_utilization(cookie, pg)");
409	my $pg = _pg_get(shift, shift) or return;
410
411	return ($pg->{util_time_running} != 0);
412}
413
414
415#
416# Return utilization for the PG
417# Utilization is a difference in utilization value between two snapshots.
418# We can only compare utilization between PGs having the same generation ID.
419#
420sub utilization
421{
422	scalar @_ == 3 or _usage("utilization(cookie, cookie1, pg");
423	my $c1 = shift;
424	my $c2 = shift;
425	my $id = shift;
426
427	#
428	# Since we have two cookies, update capacity in both
429	#
430	_capacity_update($c1, $c2, $id);
431
432	my $pg1 = _pg_get($c1, $id) or return;
433	my $pg2 = _pg_get($c2, $id) or return;
434
435	#
436	# Nothing to return if one of the utilizations wasn't measured
437	#
438	return unless ($pg1->{util_time_running} && $pg2->{util_time_running});
439
440	#
441	# Verify generation IDs
442	#
443	return unless $pg1->{generation} eq $pg2->{generation};
444	my $u1 = $pg1->{util};
445	my $u2 = $pg2->{util};
446	return unless defined ($u1) && defined ($u2);
447
448	return (abs($u2 - $u1));
449}
450
451#
452# Return an estimate of PG capacity Capacity is calculated as the maximum of
453# observed utilization expressed in units per second or maximum CPU frequency
454# for all CPUs.
455#
456# We store capacity per sharing relationship, assuming that the same sharing has
457# the same capacity. This may not be true for heterogeneous systems.
458#
459sub capacity
460{
461	scalar @_ == 2 or _usage("capacity(cookie, pg");
462	my $self = shift;
463	my $pgid = shift;
464	my $pg = _pg_get($self, $pgid) or return;
465	my $shname = $pg->{sh_name} or return;
466
467	return (max($self->{MAX_FREQUENCY}, $self->{CAPACITY}->{$shname}));
468}
469
470#
471# Return accuracy of utilization calculation between two snapshots The accuracy
472# is determined based on the total time spent running and not running the
473# counters. If T1 is the time counters were running during the period and T2 is
474# the time they were turned off, the accuracy is T1 / (T1 + T2), expressed in
475# percentages.
476#
477sub accuracy
478{
479	scalar @_ == 3 or _usage("accuracy(cookie, cookie1, pg)");
480	my $c1 = shift;
481	my $c2 = shift;
482	my $id = shift;
483	my $trun;
484	my $tstop;
485
486	my $pg1 = _pg_get($c1, $id) or return;
487	my $pg2 = _pg_get($c2, $id) or return;
488
489	# Both PGs should have the same generation
490	return unless $pg1->{generation} eq $pg2->{generation};
491
492	#
493	# Get time spent with running and stopped counters
494	#
495	$trun = abs($pg2->{util_time_running} -
496		    $pg1->{util_time_running});
497	$tstop = abs($pg2->{util_time_stopped} -
498		     $pg1->{util_time_stopped});
499
500	my $total = $trun + $tstop;
501
502	#
503	# Calculate accuracy as percentage
504	#
505	my $accuracy = $total ? ($trun * 100) / $total : 0;
506	$accuracy = int($accuracy + 0.5);
507	$accuracy = 100 if $accuracy > 100;
508	return ($accuracy);
509}
510
511#
512# Return time difference in seconds between two snapshots
513#
514sub tdelta
515{
516	scalar @_ == 3 or _usage("tdelta(cookie, cookie1, pg)");
517	my $c1 = shift;
518	my $c2 = shift;
519	my $id = shift;
520
521	my $pg1 = _pg_get($c1, $id) or return;
522	my $pg2 = _pg_get($c2, $id) or return;
523
524	return unless $pg1->{generation} eq $pg2->{generation};
525
526	my $t1 = $pg1->{snaptime};
527	my $t2 = $pg2->{snaptime};
528	my $delta = abs($t1 - $t2);
529	return ($delta);
530}
531
532#
533# Return software utilization between two snapshots
534# In scalar context return software load as percentage.
535# In list context return a list (USER, SYSTEM, IDLE, SWLOAD)
536# All loads are returned as percentages
537#
538sub sw_utilization
539{
540	scalar @_ == 3 or _usage("tdelta(cookie, cookie1, pg)");
541
542	my $c1 = shift;
543	my $c2 = shift;
544	my $id = shift;
545
546	my $pg1 = _pg_get($c1, $id) or return;
547	my $pg2 = _pg_get($c2, $id) or return;
548
549	return unless $pg1->{generation} eq $pg2->{generation};
550
551	my @cpus = $c1->cpus($id);
552
553	my $load1 = $c1->{CPU_LOAD};
554	my $load2 = $c2->{CPU_LOAD};
555
556	my $idle = 0;
557	my $user = 0;
558	my $sys = 0;
559	my $total = 0;
560	my $swload = 0;
561
562	foreach my $cpu (@cpus) {
563		my $ld1 = $load1->{$cpu};
564		my $ld2 = $load2->{$cpu};
565		next unless $ld1 && $ld2;
566
567		$idle += $ld2->{cpu_idle} - $ld1->{cpu_idle};
568		$user += $ld2->{cpu_user} - $ld1->{cpu_user};
569		$sys  += $ld2->{cpu_sys}  - $ld1->{cpu_sys};
570	}
571
572	$total = $idle + $user + $sys;
573
574	# Prevent division by zero
575	$total = 1 unless $total;
576
577	$swload = ($user + $sys) * 100 / $total;
578	$idle   = $idle * 100 / $total;
579	$user   = $user * 100 / $total;
580	$sys    = $sys  * 100 / $total;
581
582	return (wantarray() ? ($user, $sys, $idle, $swload) : $swload);
583}
584
585#
586# Return utilization for the PG for a given CPU
587# Utilization is a difference in utilization value between two snapshots.
588# We can only compare utilization between PGs having the same generation ID.
589#
590sub cpu_utilization
591{
592	scalar @_ == 4 or _usage("utilization(cookie, cookie1, pg, cpu");
593	my $c1 = shift;
594	my $c2 = shift;
595	my $id = shift;
596	my $cpu = shift;
597
598	my $idle = 0;
599	my $user = 0;
600	my $sys = 0;
601	my $swtotal = 0;
602	my $swload = 0;
603
604	#
605	# Since we have two cookies, update capacity in both
606	#
607	_capacity_update($c1, $c2, $id);
608
609	my $pg1 = _pg_get($c1, $id) or return;
610	my $pg2 = _pg_get($c2, $id) or return;
611
612	#
613	# Nothing to return if one of the utilizations wasn't measured
614	#
615	return unless ($pg1->{util_time_running} && $pg2->{util_time_running});
616
617	#
618	# Nothing to return if CPU data is missing
619	#
620	return unless $pg1->{cpudata} && $pg2->{cpudata};
621
622	#
623	# Verify generation IDs
624	#
625	return unless $pg1->{generation} eq $pg2->{generation};
626
627	#
628	# Get data for the given CPU
629	#
630	my $cpudata1 = $pg1->{cpudata}->{$cpu};
631	my $cpudata2 = $pg2->{cpudata}->{$cpu};
632
633	return unless $cpudata1 && $cpudata2;
634
635	return unless $cpudata1->{generation} == $cpudata2->{generation};
636
637	my $u1 = $cpudata1->{util};
638	my $u2 = $cpudata2->{util};
639	return unless defined ($u1) && defined ($u2);
640	my $hw_utilization = abs ($u1 - $u2);
641
642	#
643	# Get time spent with running and stopped counters
644	#
645	my $trun = abs($cpudata1->{util_time_running} -
646		       $cpudata2->{util_time_running});
647	my $tstop = abs($cpudata1->{util_time_stopped} -
648			$cpudata2->{util_time_stopped});
649
650	my $total = $trun + $tstop;
651
652	#
653	# Calculate accuracy as percentage
654	#
655	my $accuracy = $total ? ($trun * 100) / $total : 0;
656	$accuracy = int($accuracy + 0.5);
657	$accuracy = 100 if $accuracy > 100;
658
659	my $t1 = $cpudata1->{snaptime};
660	my $t2 = $cpudata2->{snaptime};
661	my $tdelta = abs ($t1 - $t2);
662
663	my $shname = $pg2->{sh_name} or return;
664	my $capacity = max($c2->{MAX_FREQUENCY}, $c2->{CAPACITY}->{$shname});
665	my $utilization = $hw_utilization / $tdelta;
666	$capacity = $utilization unless $capacity;
667	$utilization /= $capacity;
668	$utilization *= 100;
669
670	my $ld1 = $c1->{CPU_LOAD}->{$cpu};
671	my $ld2 = $c2->{CPU_LOAD}->{$cpu};
672
673	if ($ld1 && $ld2) {
674		$idle = $ld2->{cpu_idle} - $ld1->{cpu_idle};
675		$user = $ld2->{cpu_user} - $ld1->{cpu_user};
676		$sys  = $ld2->{cpu_sys}  - $ld1->{cpu_sys};
677
678		$swtotal = $idle + $user + $sys;
679
680		# Prevent division by zero
681		$swtotal = 1 unless $swtotal;
682
683		$swload = ($user + $sys) * 100 / $swtotal;
684		$idle   = $idle * 100 / $swtotal;
685		$user   = $user * 100 / $swtotal;
686		$sys    = $sys  * 100 / $swtotal;
687	}
688
689	return (wantarray() ?
690		($utilization, $accuracy, $hw_utilization,
691		 $swload, $user, $sys, $idle) :
692		$utilization);
693}
694
695#
696# online_cpus(kstat)
697# Return list of on-line CPUs
698#
699sub online_cpus
700{
701	scalar @_ == 1 or _usage("online_cpus(cookie)");
702
703	my $self = shift or return;
704	my $ks = $self->{KSTAT} or return;
705
706	my $cpu_info = $ks->{cpu_info} or return;
707
708	my @cpus = grep {
709		my $cp = $cpu_info->{$_}->{"cpu_info$_"};
710		my $state = $cp->{state};
711		$state eq 'on-line' || $state eq 'no-intr';
712	} keys %{$cpu_info};
713
714	return (wantarray() ? @cpus : _nsort(@cpus));
715}
716
717#
718# Support methods
719#
720# The following methods are not PG specific but are generally useful for PG
721# interface consumers
722#
723
724#
725# Sort the list numerically
726#
727sub nsort
728{
729	scalar @_ > 0 or _usage("nsort(cookie, val, ...)");
730	shift;
731
732	return (_nsort(@_));
733}
734
735#
736# Return the input list with duplicates removed.
737# Should be used in list context
738#
739sub uniq
740{
741	scalar @_ > 0 or _usage("uniq(cookie, val, ...)");
742	shift;
743
744	return (_uniq(@_));
745}
746
747#
748# Sort list numerically and remove duplicates
749# Should be called in list context
750#
751sub uniqsort
752{
753	scalar @_ > 0 or _usage("uniqsort(cookie, val, ...)");
754	shift;
755
756	return (_uniqsort(@_));
757}
758
759
760#
761# Expand all arguments and present them as a numerically sorted list
762# x,y is expanded as (x y)
763# 1-3 ranges are expandes as (1 2 3)
764#
765sub expand
766{
767	scalar @_ > 0 or _usage("expand(cookie, val, ...)");
768	shift;
769
770	return (_uniqsort(map { _expand($_) } @_));
771}
772
773#
774# Consolidate consecutive ids as start-end
775# Input: list of ids
776# Output: string with space-sepated cpu values with ranges
777#   collapsed as x-y
778#
779sub id_collapse
780{
781	scalar @_ > 0 or _usage("collapse(cookie, val, ...)");
782	shift;
783
784	return _collapse(@_);
785}
786
787#
788# Return elements of the second list not present in the first list. Both lists
789# are passed by reference.
790#
791sub set_subtract
792{
793	scalar @_ == 3 or _usage("set_subtract(cookie, left, right)");
794	shift;
795
796	return (_set_subtract(@_));
797}
798
799#
800# Return the intersection of two lists passed by reference
801# Convert the first list to a hash with seen entries marked as 1-values
802# Then grep only elements present in the first list from the second list.
803# As a little optimization, use the shorter list to build a hash.
804#
805sub intersect
806{
807	scalar @_ == 3 or _usage("intersect(cookie, left, right)");
808	shift;
809
810	return (_set_intersect(@_));
811}
812
813#
814# Return elements of the second list not present in the first list. Both lists
815# are passed by reference.
816#
817sub _set_subtract
818{
819	my ($left, $right) = @_;
820	my %seen;	# Set to 1 for everything in the first list
821	# Create a hash indexed by elements in @left with ones as a value.
822	map { $seen{$_} = 1 } @$left;
823	# Find members of @right present in @left
824	return (grep { ! $seen{$_} } @$right);
825}
826
827#
828# END OF PUBLIC INTERFACE
829#
830
831#
832# INTERNAL FUNCTIONS
833#
834
835#
836# _usage(): print error message and terminate the program.
837#
838sub _usage
839{
840	my $msg = shift;
841	Carp::croak "Usage: Sun::Solaris::Pg::$msg";
842}
843
844#
845# Sort the list numerically
846# Should be called in list context
847#
848sub _nsort
849{
850	return (sort { $a <=> $b } @_);
851}
852
853#
854# Return the input list with duplicates removed.
855# Should be used in list context
856#
857sub _uniq
858{
859	my %seen;
860	return (grep { ++$seen{$_} == 1 } @_);
861}
862
863#
864# Sort list numerically and remove duplicates
865# Should be called in list context
866#
867sub _uniqsort
868{
869	return (sort { $a <=> $b } _uniq(@_));
870}
871
872# Get PG from the snapshot by id
873sub _pg_get
874{
875	my $self = shift;
876	my $pgid = shift;
877
878	return unless defined $pgid;
879	my $pgtree = $self->{PGTREE} or return;
880
881	return ($pgtree->{$pgid});
882}
883
884#
885# Copy data from kstat representation to our representation
886# Arguments:
887#   PG kstat
888#   Reference to the list of CPUs.
889# Any CPUs in the PG kstat not present in the CPU list are ignored.
890#
891sub _pg_create_from_kstat
892{
893	my $pg_ks = shift;
894	my $all_cpus = shift;
895	my %all_cpus;
896	my $pg = ();
897
898	#
899	# Mark CPUs available
900	#
901	map { $all_cpus{$_}++ } @$all_cpus;
902
903	return unless $pg_ks;
904
905	#
906	# Convert CPU list in the kstat from x-y,z form to the proper list
907	#
908	my @cpus = _expand($pg_ks->{cpus});
909
910	#
911	# Remove any CPUs not present in the arguments
912	#
913	@cpus = grep { $all_cpus{$_} } @cpus;
914
915	#
916	# Do not create PG unless it has any CPUs
917	#
918	return unless scalar @cpus;
919
920	#
921	# Copy data to the $pg structure
922	#
923	$pg->{ncpus} = scalar @cpus;
924	$pg->{cpus} = \@cpus;
925	$pg->{id} = defined($pg_ks->{pg_id}) ? $pg_ks->{pg_id} : $pg_ks->{id};
926	$pg->{util} = $pg_ks->{hw_util};
927	$pg->{current_rate} = $pg_ks->{hw_util_rate};
928	$pg->{util_rate_max} = $pg_ks->{hw_util_rate_max};
929	$pg->{util_time_running} = $pg_ks->{hw_util_time_running};
930	$pg->{util_time_stopped} = $pg_ks->{hw_util_time_stopped};
931	$pg->{snaptime} = $pg_ks->{snaptime};
932	$pg->{generation} = $pg_ks->{generation};
933	$pg->{sh_name} = $pg_ks->{relationship} || $pg_ks->{sharing_relation};
934	$pg->{parent} = $pg_ks->{parent_pg_id};
935	$pg->{parent} = PG_PARENT_UNDEF unless defined $pg->{parent};
936	#
937	# Replace spaces with underscores in sharing names
938	#
939	$pg->{sh_name} =~ s/ /_/g;
940	$pg->{is_leaf} = 1;
941
942	return $pg;
943}
944
945#
946# Create fake root PG with all CPUs
947# Arguments: list of CPUs
948#
949sub _pg_create_root
950{
951	my $pg = ();
952	my @cpus = @_;
953
954	$pg->{id} = $ROOT_ID;
955	$pg->{ncpus} = scalar @cpus;
956	$pg->{util} = 0;
957	$pg->{current_rate} = 0;
958	$pg->{util_rate_max} = 0;
959	$pg->{util_time_running} = 0;
960	$pg->{util_time_stopped} = 0;
961	$pg->{snaptime} = 0;
962	$pg->{generation} = 0;
963	$pg->{sh_name} = 'System';
964	$pg->{is_leaf} = 0;
965	$pg->{cpus} = \@cpus;
966	$pg->{parent} = PG_NO_PARENT;
967
968	return ($pg);
969}
970
971#
972# _pg_all_from_kstats(SNAPSHOT)
973# Extract all PG information from kstats
974#
975sub _pg_all_from_kstats
976{
977	my $self = shift;
978	my $ks = $self->{KSTAT};
979	my @all_cpus = @{$self->{CPUS}};
980
981	return unless $ks;
982
983	my $pgtree = ();
984	my $pg_info = $ks->{$self->{PG_MODULE}};
985
986	#
987	# Walk all PG kstats and copy them to $pgtree->{$id}
988	#
989	foreach my $id (keys %$pg_info) {
990		my $pg_ks = _kstat_get_pg($pg_info, $id,
991					  $self->{USE_OLD_KSTATS});
992		next unless $pg_ks;
993
994		my $pg = _pg_create_from_kstat($pg_ks, \@all_cpus);
995
996		$pgtree->{$id} = $pg if $pg;
997	}
998
999	#
1000	# OS does not have root PG, so create one.
1001	#
1002	if (!$pgtree->{$ROOT_ID}) {
1003		$pgtree->{$ROOT_ID} = _pg_create_root (@all_cpus);
1004	}
1005
1006	#
1007	# Construct parent-child relationships between PGs
1008	#
1009
1010	#
1011	# Get list of PGs sorted by number of CPUs
1012	# If two PGs have the same number of CPUs, sort by relationship order.
1013	#
1014	my @lineage = sort {
1015		$a->{ncpus} <=> $b->{ncpus} ||
1016		_relationship_order($a->{sh_name}) <=>
1017		_relationship_order($b->{sh_name})
1018	    } values %$pgtree;
1019
1020	#
1021	# For each PG in the lineage discover its parent if it doesn't have one.
1022	#
1023	for (my $i = 0; $i < scalar @lineage; $i++) {
1024		my $pg = $lineage[$i];
1025
1026		#
1027		# Ignore PGs which already have parent in kstats
1028		#
1029		my $parent = $pg->{parent};
1030		next if ($parent >= PG_NO_PARENT);
1031
1032		my $ncpus = $pg->{ncpus};
1033		my @cpus = @{$pg->{cpus}};
1034
1035		#
1036		# Walk the lineage, ignoring any CPUs with the same number of
1037		# CPUs
1038		for (my $j = $i + 1; $j < scalar @lineage; $j++) {
1039			my $pg1 = $lineage[$j];
1040			my @parent_cpus = @{$pg1->{cpus}};
1041			if (_is_subset(\@cpus, \@parent_cpus)) {
1042				$pg->{parent} = $pg1->{id};
1043				last;
1044			}
1045		}
1046	}
1047
1048	#
1049	# Find all top-level PGs and put them under $root
1050	#
1051	foreach my $pgid (keys %$pgtree) {
1052		next if $pgid == $ROOT_ID;
1053		my $pg = $pgtree->{$pgid};
1054		$pg->{parent} = $ROOT_ID unless $pg->{parent} >= 0;
1055	}
1056
1057	#
1058	# Now that we know parents, for each parent add all direct children to
1059	# their parent sets
1060	#
1061	foreach my $pg (@lineage) {
1062		my $parentid = $pg->{parent};
1063		next unless defined $parentid;
1064
1065		my $parent = $pgtree->{$parentid};
1066		push (@{$parent->{children}}, $pg->{id});
1067	}
1068
1069	return ($pgtree);
1070}
1071
1072#
1073# Read kstats and initialize PG object
1074# Collect basic information about cmt_pg
1075# Add list of children and list of CPUs
1076# Returns the hash reference indexed by pg id
1077#
1078# The _init() function accepts arguments in the form of a hash. The following
1079# subarguments are supported:
1080#
1081#   -cpudata	# Collect per-CPU data from kstats if this is T
1082#   -tags	# Match PGs to physical relationships if this is T
1083#   -swload	# Collect software CPU load if this is T
1084
1085sub _init
1086{
1087	my $ks = Sun::Solaris::Kstat->new(strip_strings => 1);
1088	return unless $ks;
1089
1090	my %args = @_;
1091	my $get_cpu_data = $args{-cpudata};
1092	my $get_tags = $args{-tags};
1093	my $get_swload = $args{-swload};
1094
1095	my $self;
1096
1097	my $use_old_kstat_names = scalar(grep {/^pg_hw_perf/ } keys (%$ks)) == 0;
1098
1099	my @frequencies;
1100	$self->{MAX_FREQUENCY} = 0;
1101
1102	$self->{PG_MODULE} = $use_old_kstat_names ? 'pg' : 'pg_hw_perf';
1103	$self->{PG_CPU_MODULE} =  $use_old_kstat_names ?
1104	  'pg_cpu' : 'pg_hw_perf_cpu';
1105	$self->{USE_OLD_KSTATS} = $use_old_kstat_names;
1106
1107	$get_cpu_data = 0 unless  scalar(grep {/^$self->{PG_CPU_MODULE}/ }
1108					 keys (%$ks));
1109
1110	# Get list of PG-related kstats
1111	my $pg_keys = $use_old_kstat_names ? 'pg' : 'pg_hw';
1112
1113	if (scalar(grep { /^$pg_keys/ } keys (%$ks)) == 0) {
1114		if (exists(&Errno::ENOTSUPP)) {
1115			$! = &Errno::ENOTSUPP;
1116		} else {
1117			$! = 48;
1118		}
1119		return;
1120	}
1121
1122
1123	#
1124	# Mapping of cores and chips to CPUs
1125	#
1126	my $hw_mapping;
1127
1128	#
1129	# Get list of all CPUs
1130	#
1131	my $cpu_info = $ks->{cpu_info};
1132
1133	#
1134	# @all-cpus is a list of all cpus
1135	#
1136	my @all_cpus = keys %$cpu_info;
1137
1138	#
1139	# Save list of all CPUs in the snapshot
1140	#
1141	$self->{CPUS} = \@all_cpus;
1142
1143	#
1144	# Find CPUs for each socket and chip
1145	# Also while we scan CPU kstats, get maximum frequency of each CPU.
1146	#
1147	foreach my $id (@all_cpus) {
1148		my $ci = $cpu_info->{$id}->{"cpu_info$id"};
1149		next unless $ci;
1150		my $core_id = $ci->{core_id};
1151		my $chip_id = $ci->{chip_id};
1152
1153		push(@{$hw_mapping->{core}->{$core_id}}, $id)
1154		  if defined $core_id;
1155		push(@{$hw_mapping->{chip}->{$chip_id}}, $id)
1156		  if defined $chip_id;
1157
1158		# Read CPU frequencies separated by commas
1159		my $freqs = $ci->{supported_frequencies_Hz};
1160		my $max_freq = max(split(/:/, $freqs));
1161
1162		# Calculate maximum frequency for the snapshot.
1163		$self->{MAX_FREQUENCY} = $max_freq if
1164		  $self->{MAX_FREQUENCY} < $max_freq;
1165	}
1166
1167	$self->{KSTAT} = $ks;
1168
1169	#
1170	# Convert kstats to PG tree
1171	#
1172	my $pgtree = _pg_all_from_kstats($self);
1173	$self->{PGTREE} = $pgtree;
1174
1175	#
1176	# Find capacity estimate per sharing relationship
1177	#
1178	foreach my $pgid (keys %$pgtree) {
1179		my $pg = $pgtree->{$pgid};
1180		my $shname = $pg->{sh_name};
1181		my $max_rate = $pg->{util_rate_max};
1182		$self->{CAPACITY}->{$shname} = $max_rate if
1183		  !$self->{CAPACITY}->{$shname} ||
1184		    $self->{CAPACITY}->{$shname} < $max_rate;
1185	}
1186
1187	if ($get_tags) {
1188		#
1189		# Walk all PGs and mark all PGs that have corresponding hardware
1190		# entities (system, chips, cores).
1191		#
1192		foreach my $pgid (keys %$pgtree) {
1193			my $pg = $pgtree->{$pgid};
1194			my @cpus = @{$pg->{cpus}};
1195			next unless scalar @cpus > 1;
1196
1197			if (_set_equal (\@cpus, \@all_cpus)) {
1198				#
1199				# PG has all CPUs in the system.
1200				#
1201				push (@{$pg->{tags}}, 'system');
1202			}
1203
1204			foreach my $name ('core', 'chip') {
1205				my $hwdata = $hw_mapping->{$name};
1206				foreach my $id (keys %$hwdata) {
1207					# CPUs for this entity
1208					my @hw_cpus = @{$hwdata->{$id}};
1209					if (_set_equal (\@cpus, \@hw_cpus)) {
1210						#
1211						# PG has exactly the same CPUs
1212						#
1213						push (@{$pg->{tags}}, $name);
1214					}
1215				}
1216			}
1217		}
1218	}
1219
1220	#
1221	# Save software load for each CPU
1222	#
1223	if ($get_swload) {
1224		$self->{CPU_LOAD} = _get_sw_cpu_load($ks);
1225	}
1226
1227	#
1228	# Collect per-CPU utilization data if requested
1229	#
1230	if ($get_cpu_data) {
1231		_get_hw_cpu_load($self);
1232	}
1233
1234	$self->{GET_CPU_DATA} = $get_cpu_data;
1235
1236	#
1237	# Verify that in the end we have the same PG generation for each PG
1238	#
1239	if (! _same_generation($self)) {
1240		$! = &Errno::EAGAIN;
1241		return;
1242	}
1243
1244	return ($self);
1245}
1246
1247#
1248# Verify that topology is the same as at the time snapshot was created
1249#
1250sub _same_generation
1251{
1252	my $self = shift;
1253	my $pgtree =  $self->{PGTREE} or return;
1254
1255	return (0) unless $self;
1256
1257	my $ks = $self->{KSTAT};
1258	$ks->update();
1259	my $pg_info = $ks->{$self->{PG_MODULE}};
1260	foreach my $id (keys %$pg_info) {
1261		my $pg = $pgtree->{$id} or next;
1262
1263		my $pg_ks = _kstat_get_pg($pg_info, $id,
1264					  $self->{USE_OLD_KSTATS});
1265		return unless $pg_ks;
1266		return (0) unless $pg->{generation} == $pg_ks->{generation};
1267	}
1268	return (1);
1269}
1270
1271#
1272# Update capacity for both PGs
1273#
1274sub _capacity_update
1275{
1276	my $c1 = shift;
1277	my $c2 = shift;
1278
1279	my $pgtree1 = $c1->{PGTREE};
1280	my $pgtree2 = $c2->{PGTREE};
1281
1282	foreach my $pgid (keys %$pgtree1) {
1283		my $pg1 = $pgtree1->{$pgid};
1284		my $pg2 = $pgtree2->{$pgid};
1285		next unless $pg1 && $pg2;
1286		next unless $pg1->{generation} != $pg2->{generation};
1287		my $shname1 = $pg1->{sh_name};
1288		my $shname2 = $pg2->{sh_name};
1289		next unless $shname1 eq $shname2;
1290		my $max_rate = max($pg1->{util_rate_max}, $pg2->{util_rate_max});
1291
1292		my $utilization = abs($pg1->{util} - $pg2->{util});
1293		my $tdelta = abs($pg1->{snaptime} - $pg2->{snaptime});
1294		$utilization /= $tdelta if $utilization && $tdelta;
1295		$max_rate = $utilization if
1296		  $utilization && $max_rate < $utilization;
1297
1298		$c1->{CAPACITY}->{$shname1} = $max_rate if
1299		  !$c1->{CAPACITY}->{$shname1} ||
1300		    !$c1->{CAPACITY}->{$shname1} < $max_rate;
1301		$c2->{CAPACITY}->{$shname2} = $max_rate if
1302		  !$c2->{CAPACITY}->{$shname2} ||
1303		    !$c2->{CAPACITY}->{$shname2} < $max_rate;
1304	}
1305}
1306
1307#
1308# Return list of PGs breadth first
1309#
1310sub _walk_depth_first
1311{
1312	my $p = shift;
1313	# Nothing to do if list is empty
1314	return unless scalar (@_);
1315
1316	return (map { ($_, _walk_depth_first ($p, $p->children($_))) } @_);
1317}
1318
1319#
1320# Return list of PGs breadth first
1321#
1322sub _walk_breadth_first
1323{
1324	my $p = shift;
1325	# Nothing to do if list is empty
1326	return unless scalar (@_);
1327
1328	return (@_, _walk_breadth_first($p, map { $p->children($_) } @_));
1329}
1330
1331#
1332# Given the kstat reference (already hashed by module name) and PG ID return the
1333# corresponding kstat.
1334#
1335sub _kstat_get_pg
1336{
1337	my $mod = shift;
1338	my $pgid = shift;
1339	my $use_old_kstats = shift;
1340
1341	my $id_field = $use_old_kstats ? 'id' : 'pg_id';
1342
1343	return ($mod->{$pgid}->{hardware}) if $use_old_kstats;
1344
1345	my @instances = grep { $_->{$id_field} == $pgid }
1346	  values(%{$mod->{$pgid}});
1347	return ($instances[0]);
1348}
1349
1350######################################################################
1351# Set routines
1352#######################################################################
1353#
1354# Return T if one list contains all the elements of another list.
1355# All lists are passed by reference
1356#
1357sub _is_subset
1358{
1359	my ($left, $right) = @_;
1360	my %seen;	# Set to 1 for everything in the first list
1361	# Put the shortest list in $left
1362
1363	Carp::croak "invalid left argument" unless ref ($left) eq 'ARRAY';
1364	Carp::croak "invalid right argument" unless ref ($right) eq 'ARRAY';
1365
1366	# Create a hash indexed by elements in @right with ones as a value.
1367	map { $seen{$_} = 1 } @$right;
1368
1369	# Find members of @left not present in @right
1370	my @extra = grep { !$seen{$_} } @$left;
1371	return (!scalar(@extra));
1372}
1373
1374sub _is_member
1375{
1376	my $set = shift;
1377	my $element = shift;
1378	my %seen;
1379
1380	map { $seen{$_} = 1 } @$set;
1381
1382	return ($seen{$element});
1383}
1384
1385#
1386# Return T if C1 and C2 contain the same elements
1387#
1388sub _set_equal
1389{
1390	my $c1 = shift;
1391	my $c2 = shift;
1392
1393	return 0 unless scalar @$c1 == scalar @$c2;
1394
1395	return (_is_subset($c1, $c2) && _is_subset($c2, $c1));
1396}
1397
1398#
1399# Return the intersection of two lists passed by reference
1400# Convert the first list to a hash with seen entries marked as 1-values
1401# Then grep only elements present in the first list from the second list.
1402# As a little optimization, use the shorter list to build a hash.
1403#
1404sub _set_intersect
1405{
1406	my ($left, $right) = @_;
1407	my %seen;	# Set to 1 for everything in the first list
1408	# Put the shortest list in $left
1409	scalar @$left <= scalar @$right or ($right, $left) = ($left, $right);
1410
1411	# Create a hash indexed by elements in @left with ones as a value.
1412	map { $seen{$_} = 1 } @$left;
1413	# Find members of @right present in @left
1414	return (grep { $seen{$_} } @$right);
1415}
1416
1417#
1418# Expand start-end into the list of values
1419# Input: string containing a single numeric ID or x-y range
1420# Output: single value or a list of values
1421# Ranges with start being more than end are inverted
1422#
1423sub _expand
1424{
1425	# Skip the first argument if it is the object reference
1426	shift if ref $@[0] eq 'HASH';
1427
1428	my $arg = shift;
1429
1430	return unless defined $arg;
1431
1432	my @args = split /,/, $arg;
1433
1434	return map { _expand($_) } @args if scalar @args > 1;
1435
1436	$arg = shift @args;
1437	return unless defined $arg;
1438
1439	if ($arg =~ m/^\d+$/) {
1440		# single number
1441		return ($arg);
1442	} elsif ($arg =~ m/^(\d+)\-(\d+)$/) {
1443		my ($start, $end) = ($1, $2);	# $start-$end
1444		# Reverse the interval if start > end
1445		($start, $end) = ($end, $start) if $start > $end;
1446		return ($start .. $end);
1447	} else {
1448		return $arg;
1449	}
1450	return;
1451}
1452
1453#
1454# Consolidate consecutive ids as start-end
1455# Input: list of ids
1456# Output: string with space-sepated cpu values with ranges
1457#   collapsed as x-y
1458#
1459sub _collapse
1460{
1461	return ('') unless @_;
1462	my @args = _uniqsort(@_);
1463	my $start = shift(@args);
1464	my $result = '';
1465	my $end = $start;	# Initial range consists of the first element
1466	foreach my $el (@args) {
1467		if (!$el =~ /^\d+$/) {
1468			$result = "$result $el";
1469			$end = $el;
1470		} elsif ($el == ($end + 1)) {
1471			#
1472			# Got consecutive ID, so extend end of range without
1473			# printing anything since the range may extend further
1474			#
1475			$end = $el;
1476		} else {
1477			#
1478			# Next ID is not consecutive, so print IDs gotten so
1479			# far.
1480			#
1481			if ($end > $start + 1) {	# range
1482				$result = "$result $start-$end";
1483			} elsif ($end > $start) {	# different values
1484				$result = "$result $start $end";
1485			} else {	# same value
1486				$result = "$result $start";
1487			}
1488
1489			# Try finding consecutive range starting from this ID
1490			$start = $end = $el;
1491		}
1492	}
1493
1494	# Print last ID(s)
1495	if (! ($end =~ /^\d+$/)) {
1496		$result = "$result $end";
1497	} elsif ($end > $start + 1) {
1498		$result = "$result $start-$end";
1499	} elsif ($end > $start) {
1500		$result = "$result $start $end";
1501	} else {
1502		$result = "$result $start";
1503	}
1504	# Remove any spaces in the beginning
1505	$result =~ s/^\s+//;
1506	return ($result);
1507}
1508
1509#
1510# get relationship order from relationship name.
1511# return 0 for all unknown names.
1512#
1513sub _relationship_order
1514{
1515	my $name = shift;
1516	return ($relationships_order{$name} || 0);
1517}
1518
1519#
1520# Get software load for each CPU from kstats
1521# Argument: kstat reference
1522# Returns: reference to the hash with
1523# cpu_idle, cpu_user, cpu_sys keys.
1524#
1525sub _get_sw_cpu_load
1526{
1527	my $ks = shift or return;
1528
1529	my $loads;
1530	my $sys_ks = $ks->{cpu};
1531	foreach my $cpu (keys %$sys_ks) {
1532		my $sys = $sys_ks->{$cpu}->{sys};
1533		$loads->{$cpu}->{cpu_idle} = $sys->{cpu_ticks_idle};
1534		$loads->{$cpu}->{cpu_user} = $sys->{cpu_ticks_user};
1535		$loads->{$cpu}->{cpu_sys} = $sys->{cpu_ticks_kernel};
1536	}
1537
1538	return ($loads);
1539}
1540
1541#
1542# Get software load for each CPU from kstats
1543# Arguments:
1544#  pgtree reference
1545#  kstat reference
1546#
1547# Returns: nothing
1548# Stores CPU load in the $pg->{cpudata} hash for each PG
1549#
1550sub _get_hw_cpu_load
1551{
1552	my $self = shift;
1553	my $pgtree = $self->{PGTREE};
1554	my $ks = $self->{KSTAT};
1555
1556	my $pg_cpu_ks = $ks->{$self->{PG_CPU_MODULE}};
1557
1558	foreach my $pgid (keys %$pgtree) {
1559		my $pg = $pgtree->{$pgid};
1560		my @cpus = @{$pg->{cpus}};
1561		my $cpu;
1562		my $pg_id;
1563		foreach my $cpu (keys %$pg_cpu_ks) {
1564			next unless _is_member(\@cpus, $cpu);
1565			my $cpu_hw_data = $pg_cpu_ks->{$cpu};
1566			foreach my $hw (keys %$cpu_hw_data) {
1567				my $cpudata = $cpu_hw_data->{$hw};
1568
1569				#
1570				# Only consider information for this PG
1571				#
1572				next unless $cpudata->{pg_id} == $pgid;
1573
1574				$pg->{cpudata}->{$cpu}->{generation} =
1575				  $cpudata->{generation};
1576				$pg->{cpudata}->{$cpu}->{util} =
1577				  $cpudata->{hw_util};
1578				$pg->{cpudata}->{$cpu}->{util_time_running} =
1579				  $cpudata->{hw_util_time_running};
1580				$pg->{cpudata}->{$cpu}->{util_time_stopped} =
1581				  $cpudata->{hw_util_time_stopped};
1582				$pg->{cpudata}->{$cpu}->{snaptime} =
1583				  $cpudata->{snaptime};
1584			}
1585		}
1586	}
1587}
1588
15891;
1590
1591__END__
1592
1593#
1594# The information about PG hierarchy is contained in a object return by the
1595# new() method.
1596#
1597# This module can deal with old PG kstats that have 'pg' and 'pg_cpu' as module
1598# names as well as new PG kstats which use 'pg_hw_perf' and ''pg_hw_perf_cpu' as
1599# the module name.
1600#
1601# The object contains the following fields:
1602#
1603#   CPUS		List of all CPUs present.
1604#   CAPACITY		Estimate of capacity for each sharing
1605#   PGTREE		The PG tree. See below for the tree representation.
1606#
1607#   PG_MODULE 		Module name for the PG kstats. It is either 'pg' for
1608#			 old style kstats, or 'pg_hw_perf' for new style kstats.
1609#
1610#   MAX_FREQUENCY	Maximum CPU frequency
1611#   USE_OLD_KSTATS	True if we are dealing with old style kstats
1612#   KSTAT		The kstat object used to generate this hierarchy.
1613#
1614# The PG tree is represented as a hash table indexed by PG ID. Each element of
1615# the table is the hash reference with the following fields:
1616#
1617#   children		Reference to the list of children PG IDs
1618#   cpus		Reference to the list of cpu IDs in the PG
1619#   current_rate	Current utilization rate
1620#   generation		PG generation
1621#   id			PG id
1622#   ncpus		number of CPUs in the PG
1623#   parent		PG parent id, or -1 if there is none.
1624#   sh_name		Sharing name
1625#   snaptime		Snapshot time
1626#   util		Hardware utilization
1627#   util_rate_max	Maximum utilization rate
1628#   util_time_running	Time (in nanoseconds) when utilization data is collected
1629#   util_time_stopped	Time when utilization data is not collected
1630#
1631# The fields (with the exception of 'children') are a copy of the data from
1632# kstats.
1633#
1634# The PG hierarchy in the kernel does not have the root PG. We simulate the root
1635# (System) PG which is the parent of top level PGs in the system. This PG always
1636# has ID 0.
1637#
1638