#! /usr/perl5/bin/perl # # CDDL HEADER START # # The contents of this file are subject to the terms of the # Common Development and Distribution License (the "License"). # You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. # See the License for the specific language governing permissions # and limitations under the License. # # When distributing Covered Code, include this CDDL HEADER in each # file and include the License file at usr/src/OPENSOLARIS.LICENSE. # If applicable, add the following below this CDDL HEADER, with the # fields enclosed by brackets "[]" replaced with your own identifying # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END # # # Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. # # # Pg.pm provides object-oriented interface to the Solaris # Processor Group kstats # # See comments in the end # package Sun::Solaris::Pg; use strict; use warnings; use Sun::Solaris::Kstat; use Carp; use Errno; use List::Util qw(max sum); our $VERSION = '1.1'; # # Currently the OS does not have the root PG and PGs constitute a forest of # small trees. This module gathers all such trees under one root with ID zero. # If the root is present already, we do not use faked root. # my $ROOT_ID = 0; # # PG_NO_PARENT means that kstats have PG parent ID and it is set to -1 # PG_PARENT_UNDEF means that kstats have no PG parent ID # use constant { PG_NO_PARENT => -1, PG_PARENT_UNDEF => -2, }; # # Sorting order between different sharing relationships. This order is used to # break ties between PGs with the same number of CPUs. If there are two PGs with # the same set of CPUs, the one with the higher weight will be the parent of the # one with the lower weight. # my %relationships_order = ( 'CPU_PM_Idle_Power_Domain' => 1, 'Integer_Pipeline' => 2, 'Cache' => 3, 'CPU_PM_Active_Power_Domain' => 4, 'Floating_Point_Unit' => 5, 'Data_Pipe_to_memory' => 6, 'Memory' => 7, 'Socket' => 8, 'System' => 9, ); # # Object interface to the library. These are methods that can be used by the # module user. # # # Create a new object representing PG # All the heavy lifting is performed by _init function. # This function performs all the Perl blessing magic. # # The new() method accepts arguments in the form of a hash. The following # subarguments are supported: # # -cpudata # Collect per-CPU data from kstats if this is T # -tags # Match PGs to physical relationships if this is T # -swload # Collect software CPU load if this is T # -retry # how many times to retry PG initialization when it fails # -delay # Delay in seconds between retries # # The arguments are passed to _init(). # sub new { my $class = shift; my %args = @_; my $retry_count = $args{-retry} || 0; my $retry_delay = $args{-delay} || 1; my $self = _init(@_); # # If PG initialization fails with EAGAIN error and the caller requested # retries, retry initialization. # for (; !$self && ($! == &Errno::EAGAIN) && $retry_count; $retry_count--) { select(undef,undef,undef, $retry_delay); $self = _init(@_); } if ($self) { bless($self, $class) if defined($class); bless($self) unless defined($class); } return ($self); } # # Functions below use internal function _pg_get which returns PG hash reference # corresponding to PG ID specified or 'undef' if the PG can't be found. # # # All methods return 'undef' in scalar context and an empty list in list # context when unrecoverable errors are detected. # # # Return the root ID of PG hierarchy # sub root { scalar @_ == 1 or _usage("root(cookie)"); my $self = shift; return unless $self->{PGTREE}; return ($ROOT_ID); } # # Return list of all pgs numerically sorted In scalar context return number of # PGs # sub all { scalar @_ == 1 or _usage("all(cookie)"); my $self = shift; my $pgtree = $self->{PGTREE} or return; my @ids = keys(%{$pgtree}); return (wantarray() ? _nsort(@ids) : scalar @ids); } # # Return list of all pgs by walking the tree depth first. # sub all_depth_first { scalar @_ == 1 or _usage("all_depth_first(cookie)"); my $self = shift; _walk_depth_first($self, $self->root()); } # # Return list of all pgs by walking the tree breadth first. # sub all_breadth_first { scalar @_ == 1 or _usage("all_breadth_first(cookie)"); my $self = shift; _walk_breadth_first($self, $self->root()); } # # Return list of CPUs in the PG specified # CPUs returned are numerically sorted # In scalar context return number of CPUs # sub cpus { scalar @_ == 2 or _usage("cpus(cookie, pg)"); my $pg = _pg_get(shift, shift) or return; my @cpus = @{$pg->{cpus}}; return (wantarray() ? _nsort(@cpus) : _collapse(@cpus)); } # # Return a parent for a given PG # Returns undef if there is no parent # sub parent { scalar @_ == 2 or _usage("parent(cookie, pg)"); my $pg = _pg_get(shift, shift) or return; my $parent = $pg->{parent}; return (defined($parent) && $parent >= 0 ? $parent : undef); } # # Return list of children for a given PG # In scalar context return list of children # sub children { scalar @_ == 2 or _usage("children(cookie, pg)"); my $pg = _pg_get(shift, shift) or return; my $children = $pg->{children} or return; my @children = @{$children}; return (wantarray() ? _nsort(@children) : scalar @children); } # # Return sharing name for the PG # sub sh_name { scalar @_ == 2 or _usage("sh_name(cookie, pg)"); my $pg = _pg_get(shift, shift) or return; return ($pg->{sh_name}); } # # Return T if specified PG ID is a leaf PG # sub is_leaf { scalar @_ == 2 or _usage("is_leaf(cookie, pg)"); my $pg = _pg_get(shift, shift) or return; return ($pg->{is_leaf}); } # # Return leaf PGs # sub leaves { scalar @_ == 1 or _usage("leaves(cookie, pg)"); my $self = shift; return (grep { is_leaf($self, $_) } $self->all()); } # # Update varying data in the snapshot # sub update { scalar @_ == 1 or _usage("update(cookie)"); my $self = shift; my $ks = $self->{KSTAT}; $ks->update(); my $pgtree = $self->{PGTREE}; my $pg_info = $ks->{$self->{PG_MODULE}}; # # Walk PG kstats and copy updated data from kstats to the snapshot # foreach my $id (keys %$pg_info) { my $pg = $pgtree->{$id} or next; my $pg_ks = _kstat_get_pg($pg_info, $id, $self->{USE_OLD_KSTATS}); return unless $pg_ks; # # Update PG from kstats # $pg->{util} = $pg_ks->{hw_util}; $pg->{current_rate} = $pg_ks->{hw_util_rate}; $pg->{util_rate_max} = $pg_ks->{hw_util_rate_max}; $pg->{util_time_running} = $pg_ks->{hw_util_time_running}; $pg->{util_time_stopped} = $pg_ks->{hw_util_time_stopped}; $pg->{snaptime} = $pg_ks->{snaptime}; $pg->{generation} = $pg_ks->{generation}; } # # Update software load for each CPU # $self->{CPU_LOAD} = _get_sw_cpu_load($ks); # # Get hardware load per CPU # if ($self->{GET_CPU_DATA}) { _get_hw_cpu_load($self); } return (1); } # # Return list of physical tags for the given PG # sub tags { scalar @_ == 2 or _usage("tags(cookie, pg)"); my $pg = _pg_get(shift, shift) or return; my $tags = $pg->{tags} or return; my @tags = _uniq(@{$tags}); return (wantarray() ? @tags : join (',', @tags)); } # # Return list of sharing relationships in the snapshot Relationships are sorted # by the level in the hierarchy If any PGs are given on the command line, only # return sharing relationships for given PGs, but still keep them sorted. # sub sharing_relationships { scalar @_ or _usage("sharing_relationships(cookie, [pg, ...])"); my $self = shift; my @pgs = $self->all_breadth_first(); if (scalar @_ > 0) { # # Caller specified PGs, remove any PGs not in caller's list # my %seen; map { $seen{$_} = 1 } @_; # Remove any PGs not provided by user @pgs = grep { $seen{$_} } @pgs; } return (_uniq(map { $self->sh_name($_) } @pgs)); } # # Return PG generation number. If PG is specified in the argument, return its # generation, otherwise return snapshot generation. # Snapshot generation is calculated as the total of PG generations # sub generation { (scalar @_ == 1 || scalar @_ == 2) or _usage("generation(cookie, [pg])"); my $self = shift; if (scalar @_ == 0) { my @generations = map { $_->{generation} } values %{$self->{PGTREE}}; return (sum(@generations)); } else { my $id = shift; my $pg = _pg_get($self, $id) or return; return ($pg->{generation}); } } # # Return level of PG in the tree, starting from root. # PG level is cached in the $pg->{level} field. # sub level { scalar @_ == 2 or _usage("level(cookie, pg)"); my $self = shift; my $pgid = shift; my $pg = _pg_get($self, $pgid) or return; return $pg->{level} if defined($pg->{level}); $pg->{level} = 0; my $parent = _pg_get($self, $pg->{parent}); while ($parent) { $pg->{level}++; $parent = _pg_get($self, $parent->{parent}); } return ($pg->{level}); } # # Return T if PG supports utilization We assume that utilization is supported by # PG if it shows any non-zero time in util_time_running. It is possible that the # same condition may be caused by cpustat(1) running ever since PG was created, # but there is not much we can do about it. # sub has_utilization { scalar @_ == 2 or _usage("has_utilization(cookie, pg)"); my $pg = _pg_get(shift, shift) or return; return ($pg->{util_time_running} != 0); } # # Return utilization for the PG # Utilization is a difference in utilization value between two snapshots. # We can only compare utilization between PGs having the same generation ID. # sub utilization { scalar @_ == 3 or _usage("utilization(cookie, cookie1, pg"); my $c1 = shift; my $c2 = shift; my $id = shift; # # Since we have two cookies, update capacity in both # _capacity_update($c1, $c2, $id); my $pg1 = _pg_get($c1, $id) or return; my $pg2 = _pg_get($c2, $id) or return; # # Nothing to return if one of the utilizations wasn't measured # return unless ($pg1->{util_time_running} && $pg2->{util_time_running}); # # Verify generation IDs # return unless $pg1->{generation} eq $pg2->{generation}; my $u1 = $pg1->{util}; my $u2 = $pg2->{util}; return unless defined ($u1) && defined ($u2); return (abs($u2 - $u1)); } # # Return an estimate of PG capacity Capacity is calculated as the maximum of # observed utilization expressed in units per second or maximum CPU frequency # for all CPUs. # # We store capacity per sharing relationship, assuming that the same sharing has # the same capacity. This may not be true for heterogeneous systems. # sub capacity { scalar @_ == 2 or _usage("capacity(cookie, pg"); my $self = shift; my $pgid = shift; my $pg = _pg_get($self, $pgid) or return; my $shname = $pg->{sh_name} or return; return (max($self->{MAX_FREQUENCY}, $self->{CAPACITY}->{$shname})); } # # Return accuracy of utilization calculation between two snapshots The accuracy # is determined based on the total time spent running and not running the # counters. If T1 is the time counters were running during the period and T2 is # the time they were turned off, the accuracy is T1 / (T1 + T2), expressed in # percentages. # sub accuracy { scalar @_ == 3 or _usage("accuracy(cookie, cookie1, pg)"); my $c1 = shift; my $c2 = shift; my $id = shift; my $trun; my $tstop; my $pg1 = _pg_get($c1, $id) or return; my $pg2 = _pg_get($c2, $id) or return; # Both PGs should have the same generation return unless $pg1->{generation} eq $pg2->{generation}; # # Get time spent with running and stopped counters # $trun = abs($pg2->{util_time_running} - $pg1->{util_time_running}); $tstop = abs($pg2->{util_time_stopped} - $pg1->{util_time_stopped}); my $total = $trun + $tstop; # # Calculate accuracy as percentage # my $accuracy = $total ? ($trun * 100) / $total : 0; $accuracy = int($accuracy + 0.5); $accuracy = 100 if $accuracy > 100; return ($accuracy); } # # Return time difference in seconds between two snapshots # sub tdelta { scalar @_ == 3 or _usage("tdelta(cookie, cookie1, pg)"); my $c1 = shift; my $c2 = shift; my $id = shift; my $pg1 = _pg_get($c1, $id) or return; my $pg2 = _pg_get($c2, $id) or return; return unless $pg1->{generation} eq $pg2->{generation}; my $t1 = $pg1->{snaptime}; my $t2 = $pg2->{snaptime}; my $delta = abs($t1 - $t2); return ($delta); } # # Return software utilization between two snapshots # In scalar context return software load as percentage. # In list context return a list (USER, SYSTEM, IDLE, SWLOAD) # All loads are returned as percentages # sub sw_utilization { scalar @_ == 3 or _usage("tdelta(cookie, cookie1, pg)"); my $c1 = shift; my $c2 = shift; my $id = shift; my $pg1 = _pg_get($c1, $id) or return; my $pg2 = _pg_get($c2, $id) or return; return unless $pg1->{generation} eq $pg2->{generation}; my @cpus = $c1->cpus($id); my $load1 = $c1->{CPU_LOAD}; my $load2 = $c2->{CPU_LOAD}; my $idle = 0; my $user = 0; my $sys = 0; my $total = 0; my $swload = 0; foreach my $cpu (@cpus) { my $ld1 = $load1->{$cpu}; my $ld2 = $load2->{$cpu}; next unless $ld1 && $ld2; $idle += $ld2->{cpu_idle} - $ld1->{cpu_idle}; $user += $ld2->{cpu_user} - $ld1->{cpu_user}; $sys += $ld2->{cpu_sys} - $ld1->{cpu_sys}; } $total = $idle + $user + $sys; # Prevent division by zero $total = 1 unless $total; $swload = ($user + $sys) * 100 / $total; $idle = $idle * 100 / $total; $user = $user * 100 / $total; $sys = $sys * 100 / $total; return (wantarray() ? ($user, $sys, $idle, $swload) : $swload); } # # Return utilization for the PG for a given CPU # Utilization is a difference in utilization value between two snapshots. # We can only compare utilization between PGs having the same generation ID. # sub cpu_utilization { scalar @_ == 4 or _usage("utilization(cookie, cookie1, pg, cpu"); my $c1 = shift; my $c2 = shift; my $id = shift; my $cpu = shift; my $idle = 0; my $user = 0; my $sys = 0; my $swtotal = 0; my $swload = 0; # # Since we have two cookies, update capacity in both # _capacity_update($c1, $c2, $id); my $pg1 = _pg_get($c1, $id) or return; my $pg2 = _pg_get($c2, $id) or return; # # Nothing to return if one of the utilizations wasn't measured # return unless ($pg1->{util_time_running} && $pg2->{util_time_running}); # # Nothing to return if CPU data is missing # return unless $pg1->{cpudata} && $pg2->{cpudata}; # # Verify generation IDs # return unless $pg1->{generation} eq $pg2->{generation}; # # Get data for the given CPU # my $cpudata1 = $pg1->{cpudata}->{$cpu}; my $cpudata2 = $pg2->{cpudata}->{$cpu}; return unless $cpudata1 && $cpudata2; return unless $cpudata1->{generation} == $cpudata2->{generation}; my $u1 = $cpudata1->{util}; my $u2 = $cpudata2->{util}; return unless defined ($u1) && defined ($u2); my $hw_utilization = abs ($u1 - $u2); # # Get time spent with running and stopped counters # my $trun = abs($cpudata1->{util_time_running} - $cpudata2->{util_time_running}); my $tstop = abs($cpudata1->{util_time_stopped} - $cpudata2->{util_time_stopped}); my $total = $trun + $tstop; # # Calculate accuracy as percentage # my $accuracy = $total ? ($trun * 100) / $total : 0; $accuracy = int($accuracy + 0.5); $accuracy = 100 if $accuracy > 100; my $t1 = $cpudata1->{snaptime}; my $t2 = $cpudata2->{snaptime}; my $tdelta = abs ($t1 - $t2); my $shname = $pg2->{sh_name} or return; my $capacity = max($c2->{MAX_FREQUENCY}, $c2->{CAPACITY}->{$shname}); my $utilization = $hw_utilization / $tdelta; $capacity = $utilization unless $capacity; $utilization /= $capacity; $utilization *= 100; my $ld1 = $c1->{CPU_LOAD}->{$cpu}; my $ld2 = $c2->{CPU_LOAD}->{$cpu}; if ($ld1 && $ld2) { $idle = $ld2->{cpu_idle} - $ld1->{cpu_idle}; $user = $ld2->{cpu_user} - $ld1->{cpu_user}; $sys = $ld2->{cpu_sys} - $ld1->{cpu_sys}; $swtotal = $idle + $user + $sys; # Prevent division by zero $swtotal = 1 unless $swtotal; $swload = ($user + $sys) * 100 / $swtotal; $idle = $idle * 100 / $swtotal; $user = $user * 100 / $swtotal; $sys = $sys * 100 / $swtotal; } return (wantarray() ? ($utilization, $accuracy, $hw_utilization, $swload, $user, $sys, $idle) : $utilization); } # # online_cpus(kstat) # Return list of on-line CPUs # sub online_cpus { scalar @_ == 1 or _usage("online_cpus(cookie)"); my $self = shift or return; my $ks = $self->{KSTAT} or return; my $cpu_info = $ks->{cpu_info} or return; my @cpus = grep { my $cp = $cpu_info->{$_}->{"cpu_info$_"}; my $state = $cp->{state}; $state eq 'on-line' || $state eq 'no-intr'; } keys %{$cpu_info}; return (wantarray() ? @cpus : _nsort(@cpus)); } # # Support methods # # The following methods are not PG specific but are generally useful for PG # interface consumers # # # Sort the list numerically # sub nsort { scalar @_ > 0 or _usage("nsort(cookie, val, ...)"); shift; return (_nsort(@_)); } # # Return the input list with duplicates removed. # Should be used in list context # sub uniq { scalar @_ > 0 or _usage("uniq(cookie, val, ...)"); shift; return (_uniq(@_)); } # # Sort list numerically and remove duplicates # Should be called in list context # sub uniqsort { scalar @_ > 0 or _usage("uniqsort(cookie, val, ...)"); shift; return (_uniqsort(@_)); } # # Expand all arguments and present them as a numerically sorted list # x,y is expanded as (x y) # 1-3 ranges are expandes as (1 2 3) # sub expand { scalar @_ > 0 or _usage("expand(cookie, val, ...)"); shift; return (_uniqsort(map { _expand($_) } @_)); } # # Consolidate consecutive ids as start-end # Input: list of ids # Output: string with space-sepated cpu values with ranges # collapsed as x-y # sub id_collapse { scalar @_ > 0 or _usage("collapse(cookie, val, ...)"); shift; return _collapse(@_); } # # Return elements of the second list not present in the first list. Both lists # are passed by reference. # sub set_subtract { scalar @_ == 3 or _usage("set_subtract(cookie, left, right)"); shift; return (_set_subtract(@_)); } # # Return the intersection of two lists passed by reference # Convert the first list to a hash with seen entries marked as 1-values # Then grep only elements present in the first list from the second list. # As a little optimization, use the shorter list to build a hash. # sub intersect { scalar @_ == 3 or _usage("intersect(cookie, left, right)"); shift; return (_set_intersect(@_)); } # # Return elements of the second list not present in the first list. Both lists # are passed by reference. # sub _set_subtract { my ($left, $right) = @_; my %seen; # Set to 1 for everything in the first list # Create a hash indexed by elements in @left with ones as a value. map { $seen{$_} = 1 } @$left; # Find members of @right present in @left return (grep { ! $seen{$_} } @$right); } # # END OF PUBLIC INTERFACE # # # INTERNAL FUNCTIONS # # # _usage(): print error message and terminate the program. # sub _usage { my $msg = shift; Carp::croak "Usage: Sun::Solaris::Pg::$msg"; } # # Sort the list numerically # Should be called in list context # sub _nsort { return (sort { $a <=> $b } @_); } # # Return the input list with duplicates removed. # Should be used in list context # sub _uniq { my %seen; return (grep { ++$seen{$_} == 1 } @_); } # # Sort list numerically and remove duplicates # Should be called in list context # sub _uniqsort { return (sort { $a <=> $b } _uniq(@_)); } # Get PG from the snapshot by id sub _pg_get { my $self = shift; my $pgid = shift; return unless defined $pgid; my $pgtree = $self->{PGTREE} or return; return ($pgtree->{$pgid}); } # # Copy data from kstat representation to our representation # Arguments: # PG kstat # Reference to the list of CPUs. # Any CPUs in the PG kstat not present in the CPU list are ignored. # sub _pg_create_from_kstat { my $pg_ks = shift; my $all_cpus = shift; my %all_cpus; my $pg = (); # # Mark CPUs available # map { $all_cpus{$_}++ } @$all_cpus; return unless $pg_ks; # # Convert CPU list in the kstat from x-y,z form to the proper list # my @cpus = _expand($pg_ks->{cpus}); # # Remove any CPUs not present in the arguments # @cpus = grep { $all_cpus{$_} } @cpus; # # Do not create PG unless it has any CPUs # return unless scalar @cpus; # # Copy data to the $pg structure # $pg->{ncpus} = scalar @cpus; $pg->{cpus} = \@cpus; $pg->{id} = defined($pg_ks->{pg_id}) ? $pg_ks->{pg_id} : $pg_ks->{id}; $pg->{util} = $pg_ks->{hw_util}; $pg->{current_rate} = $pg_ks->{hw_util_rate}; $pg->{util_rate_max} = $pg_ks->{hw_util_rate_max}; $pg->{util_time_running} = $pg_ks->{hw_util_time_running}; $pg->{util_time_stopped} = $pg_ks->{hw_util_time_stopped}; $pg->{snaptime} = $pg_ks->{snaptime}; $pg->{generation} = $pg_ks->{generation}; $pg->{sh_name} = $pg_ks->{relationship} || $pg_ks->{sharing_relation}; $pg->{parent} = $pg_ks->{parent_pg_id}; $pg->{parent} = PG_PARENT_UNDEF unless defined $pg->{parent}; # # Replace spaces with underscores in sharing names # $pg->{sh_name} =~ s/ /_/g; $pg->{is_leaf} = 1; return $pg; } # # Create fake root PG with all CPUs # Arguments: list of CPUs # sub _pg_create_root { my $pg = (); my @cpus = @_; $pg->{id} = $ROOT_ID; $pg->{ncpus} = scalar @cpus; $pg->{util} = 0; $pg->{current_rate} = 0; $pg->{util_rate_max} = 0; $pg->{util_time_running} = 0; $pg->{util_time_stopped} = 0; $pg->{snaptime} = 0; $pg->{generation} = 0; $pg->{sh_name} = 'System'; $pg->{is_leaf} = 0; $pg->{cpus} = \@cpus; $pg->{parent} = PG_NO_PARENT; return ($pg); } # # _pg_all_from_kstats(SNAPSHOT) # Extract all PG information from kstats # sub _pg_all_from_kstats { my $self = shift; my $ks = $self->{KSTAT}; my @all_cpus = @{$self->{CPUS}}; return unless $ks; my $pgtree = (); my $pg_info = $ks->{$self->{PG_MODULE}}; # # Walk all PG kstats and copy them to $pgtree->{$id} # foreach my $id (keys %$pg_info) { my $pg_ks = _kstat_get_pg($pg_info, $id, $self->{USE_OLD_KSTATS}); next unless $pg_ks; my $pg = _pg_create_from_kstat($pg_ks, \@all_cpus); $pgtree->{$id} = $pg if $pg; } # # OS does not have root PG, so create one. # if (!$pgtree->{$ROOT_ID}) { $pgtree->{$ROOT_ID} = _pg_create_root (@all_cpus); } # # Construct parent-child relationships between PGs # # # Get list of PGs sorted by number of CPUs # If two PGs have the same number of CPUs, sort by relationship order. # my @lineage = sort { $a->{ncpus} <=> $b->{ncpus} || _relationship_order($a->{sh_name}) <=> _relationship_order($b->{sh_name}) } values %$pgtree; # # For each PG in the lineage discover its parent if it doesn't have one. # for (my $i = 0; $i < scalar @lineage; $i++) { my $pg = $lineage[$i]; # # Ignore PGs which already have parent in kstats # my $parent = $pg->{parent}; next if ($parent >= PG_NO_PARENT); my $ncpus = $pg->{ncpus}; my @cpus = @{$pg->{cpus}}; # # Walk the lineage, ignoring any CPUs with the same number of # CPUs for (my $j = $i + 1; $j < scalar @lineage; $j++) { my $pg1 = $lineage[$j]; my @parent_cpus = @{$pg1->{cpus}}; if (_is_subset(\@cpus, \@parent_cpus)) { $pg->{parent} = $pg1->{id}; last; } } } # # Find all top-level PGs and put them under $root # foreach my $pgid (keys %$pgtree) { next if $pgid == $ROOT_ID; my $pg = $pgtree->{$pgid}; $pg->{parent} = $ROOT_ID unless $pg->{parent} >= 0; } # # Now that we know parents, for each parent add all direct children to # their parent sets # foreach my $pg (@lineage) { my $parentid = $pg->{parent}; next unless defined $parentid; my $parent = $pgtree->{$parentid}; push (@{$parent->{children}}, $pg->{id}); } return ($pgtree); } # # Read kstats and initialize PG object # Collect basic information about cmt_pg # Add list of children and list of CPUs # Returns the hash reference indexed by pg id # # The _init() function accepts arguments in the form of a hash. The following # subarguments are supported: # # -cpudata # Collect per-CPU data from kstats if this is T # -tags # Match PGs to physical relationships if this is T # -swload # Collect software CPU load if this is T sub _init { my $ks = Sun::Solaris::Kstat->new(strip_strings => 1); return unless $ks; my %args = @_; my $get_cpu_data = $args{-cpudata}; my $get_tags = $args{-tags}; my $get_swload = $args{-swload}; my $self; my $use_old_kstat_names = scalar(grep {/^pg_hw_perf/ } keys (%$ks)) == 0; my @frequencies; $self->{MAX_FREQUENCY} = 0; $self->{PG_MODULE} = $use_old_kstat_names ? 'pg' : 'pg_hw_perf'; $self->{PG_CPU_MODULE} = $use_old_kstat_names ? 'pg_cpu' : 'pg_hw_perf_cpu'; $self->{USE_OLD_KSTATS} = $use_old_kstat_names; $get_cpu_data = 0 unless scalar(grep {/^$self->{PG_CPU_MODULE}/ } keys (%$ks)); # Get list of PG-related kstats my $pg_keys = $use_old_kstat_names ? 'pg' : 'pg_hw'; if (scalar(grep { /^$pg_keys/ } keys (%$ks)) == 0) { if (exists(&Errno::ENOTSUPP)) { $! = &Errno::ENOTSUPP; } else { $! = 48; } return; } # # Mapping of cores and chips to CPUs # my $hw_mapping; # # Get list of all CPUs # my $cpu_info = $ks->{cpu_info}; # # @all-cpus is a list of all cpus # my @all_cpus = keys %$cpu_info; # # Save list of all CPUs in the snapshot # $self->{CPUS} = \@all_cpus; # # Find CPUs for each socket and chip # Also while we scan CPU kstats, get maximum frequency of each CPU. # foreach my $id (@all_cpus) { my $ci = $cpu_info->{$id}->{"cpu_info$id"}; next unless $ci; my $core_id = $ci->{core_id}; my $chip_id = $ci->{chip_id}; push(@{$hw_mapping->{core}->{$core_id}}, $id) if defined $core_id; push(@{$hw_mapping->{chip}->{$chip_id}}, $id) if defined $chip_id; # Read CPU frequencies separated by commas my $freqs = $ci->{supported_frequencies_Hz}; my $max_freq = max(split(/:/, $freqs)); # Calculate maximum frequency for the snapshot. $self->{MAX_FREQUENCY} = $max_freq if $self->{MAX_FREQUENCY} < $max_freq; } $self->{KSTAT} = $ks; # # Convert kstats to PG tree # my $pgtree = _pg_all_from_kstats($self); $self->{PGTREE} = $pgtree; # # Find capacity estimate per sharing relationship # foreach my $pgid (keys %$pgtree) { my $pg = $pgtree->{$pgid}; my $shname = $pg->{sh_name}; my $max_rate = $pg->{util_rate_max}; $self->{CAPACITY}->{$shname} = $max_rate if !$self->{CAPACITY}->{$shname} || $self->{CAPACITY}->{$shname} < $max_rate; } if ($get_tags) { # # Walk all PGs and mark all PGs that have corresponding hardware # entities (system, chips, cores). # foreach my $pgid (keys %$pgtree) { my $pg = $pgtree->{$pgid}; my @cpus = @{$pg->{cpus}}; next unless scalar @cpus > 1; if (_set_equal (\@cpus, \@all_cpus)) { # # PG has all CPUs in the system. # push (@{$pg->{tags}}, 'system'); } foreach my $name ('core', 'chip') { my $hwdata = $hw_mapping->{$name}; foreach my $id (keys %$hwdata) { # CPUs for this entity my @hw_cpus = @{$hwdata->{$id}}; if (_set_equal (\@cpus, \@hw_cpus)) { # # PG has exactly the same CPUs # push (@{$pg->{tags}}, $name); } } } } } # # Save software load for each CPU # if ($get_swload) { $self->{CPU_LOAD} = _get_sw_cpu_load($ks); } # # Collect per-CPU utilization data if requested # if ($get_cpu_data) { _get_hw_cpu_load($self); } $self->{GET_CPU_DATA} = $get_cpu_data; # # Verify that in the end we have the same PG generation for each PG # if (! _same_generation($self)) { $! = &Errno::EAGAIN; return; } return ($self); } # # Verify that topology is the same as at the time snapshot was created # sub _same_generation { my $self = shift; my $pgtree = $self->{PGTREE} or return; return (0) unless $self; my $ks = $self->{KSTAT}; $ks->update(); my $pg_info = $ks->{$self->{PG_MODULE}}; foreach my $id (keys %$pg_info) { my $pg = $pgtree->{$id} or next; my $pg_ks = _kstat_get_pg($pg_info, $id, $self->{USE_OLD_KSTATS}); return unless $pg_ks; return (0) unless $pg->{generation} == $pg_ks->{generation}; } return (1); } # # Update capacity for both PGs # sub _capacity_update { my $c1 = shift; my $c2 = shift; my $pgtree1 = $c1->{PGTREE}; my $pgtree2 = $c2->{PGTREE}; foreach my $pgid (keys %$pgtree1) { my $pg1 = $pgtree1->{$pgid}; my $pg2 = $pgtree2->{$pgid}; next unless $pg1 && $pg2; next unless $pg1->{generation} != $pg2->{generation}; my $shname1 = $pg1->{sh_name}; my $shname2 = $pg2->{sh_name}; next unless $shname1 eq $shname2; my $max_rate = max($pg1->{util_rate_max}, $pg2->{util_rate_max}); my $utilization = abs($pg1->{util} - $pg2->{util}); my $tdelta = abs($pg1->{snaptime} - $pg2->{snaptime}); $utilization /= $tdelta if $utilization && $tdelta; $max_rate = $utilization if $utilization && $max_rate < $utilization; $c1->{CAPACITY}->{$shname1} = $max_rate if !$c1->{CAPACITY}->{$shname1} || !$c1->{CAPACITY}->{$shname1} < $max_rate; $c2->{CAPACITY}->{$shname2} = $max_rate if !$c2->{CAPACITY}->{$shname2} || !$c2->{CAPACITY}->{$shname2} < $max_rate; } } # # Return list of PGs breadth first # sub _walk_depth_first { my $p = shift; # Nothing to do if list is empty return unless scalar (@_); return (map { ($_, _walk_depth_first ($p, $p->children($_))) } @_); } # # Return list of PGs breadth first # sub _walk_breadth_first { my $p = shift; # Nothing to do if list is empty return unless scalar (@_); return (@_, _walk_breadth_first($p, map { $p->children($_) } @_)); } # # Given the kstat reference (already hashed by module name) and PG ID return the # corresponding kstat. # sub _kstat_get_pg { my $mod = shift; my $pgid = shift; my $use_old_kstats = shift; my $id_field = $use_old_kstats ? 'id' : 'pg_id'; return ($mod->{$pgid}->{hardware}) if $use_old_kstats; my @instances = grep { $_->{$id_field} == $pgid } values(%{$mod->{$pgid}}); return ($instances[0]); } ###################################################################### # Set routines ####################################################################### # # Return T if one list contains all the elements of another list. # All lists are passed by reference # sub _is_subset { my ($left, $right) = @_; my %seen; # Set to 1 for everything in the first list # Put the shortest list in $left Carp::croak "invalid left argument" unless ref ($left) eq 'ARRAY'; Carp::croak "invalid right argument" unless ref ($right) eq 'ARRAY'; # Create a hash indexed by elements in @right with ones as a value. map { $seen{$_} = 1 } @$right; # Find members of @left not present in @right my @extra = grep { !$seen{$_} } @$left; return (!scalar(@extra)); } sub _is_member { my $set = shift; my $element = shift; my %seen; map { $seen{$_} = 1 } @$set; return ($seen{$element}); } # # Return T if C1 and C2 contain the same elements # sub _set_equal { my $c1 = shift; my $c2 = shift; return 0 unless scalar @$c1 == scalar @$c2; return (_is_subset($c1, $c2) && _is_subset($c2, $c1)); } # # Return the intersection of two lists passed by reference # Convert the first list to a hash with seen entries marked as 1-values # Then grep only elements present in the first list from the second list. # As a little optimization, use the shorter list to build a hash. # sub _set_intersect { my ($left, $right) = @_; my %seen; # Set to 1 for everything in the first list # Put the shortest list in $left scalar @$left <= scalar @$right or ($right, $left) = ($left, $right); # Create a hash indexed by elements in @left with ones as a value. map { $seen{$_} = 1 } @$left; # Find members of @right present in @left return (grep { $seen{$_} } @$right); } # # Expand start-end into the list of values # Input: string containing a single numeric ID or x-y range # Output: single value or a list of values # Ranges with start being more than end are inverted # sub _expand { # Skip the first argument if it is the object reference shift if ref $@[0] eq 'HASH'; my $arg = shift; return unless defined $arg; my @args = split /,/, $arg; return map { _expand($_) } @args if scalar @args > 1; $arg = shift @args; return unless defined $arg; if ($arg =~ m/^\d+$/) { # single number return ($arg); } elsif ($arg =~ m/^(\d+)\-(\d+)$/) { my ($start, $end) = ($1, $2); # $start-$end # Reverse the interval if start > end ($start, $end) = ($end, $start) if $start > $end; return ($start .. $end); } else { return $arg; } return; } # # Consolidate consecutive ids as start-end # Input: list of ids # Output: string with space-sepated cpu values with ranges # collapsed as x-y # sub _collapse { return ('') unless @_; my @args = _uniqsort(@_); my $start = shift(@args); my $result = ''; my $end = $start; # Initial range consists of the first element foreach my $el (@args) { if (!$el =~ /^\d+$/) { $result = "$result $el"; $end = $el; } elsif ($el == ($end + 1)) { # # Got consecutive ID, so extend end of range without # printing anything since the range may extend further # $end = $el; } else { # # Next ID is not consecutive, so print IDs gotten so # far. # if ($end > $start + 1) { # range $result = "$result $start-$end"; } elsif ($end > $start) { # different values $result = "$result $start $end"; } else { # same value $result = "$result $start"; } # Try finding consecutive range starting from this ID $start = $end = $el; } } # Print last ID(s) if (! ($end =~ /^\d+$/)) { $result = "$result $end"; } elsif ($end > $start + 1) { $result = "$result $start-$end"; } elsif ($end > $start) { $result = "$result $start $end"; } else { $result = "$result $start"; } # Remove any spaces in the beginning $result =~ s/^\s+//; return ($result); } # # get relationship order from relationship name. # return 0 for all unknown names. # sub _relationship_order { my $name = shift; return ($relationships_order{$name} || 0); } # # Get software load for each CPU from kstats # Argument: kstat reference # Returns: reference to the hash with # cpu_idle, cpu_user, cpu_sys keys. # sub _get_sw_cpu_load { my $ks = shift or return; my $loads; my $sys_ks = $ks->{cpu}; foreach my $cpu (keys %$sys_ks) { my $sys = $sys_ks->{$cpu}->{sys}; $loads->{$cpu}->{cpu_idle} = $sys->{cpu_ticks_idle}; $loads->{$cpu}->{cpu_user} = $sys->{cpu_ticks_user}; $loads->{$cpu}->{cpu_sys} = $sys->{cpu_ticks_kernel}; } return ($loads); } # # Get software load for each CPU from kstats # Arguments: # pgtree reference # kstat reference # # Returns: nothing # Stores CPU load in the $pg->{cpudata} hash for each PG # sub _get_hw_cpu_load { my $self = shift; my $pgtree = $self->{PGTREE}; my $ks = $self->{KSTAT}; my $pg_cpu_ks = $ks->{$self->{PG_CPU_MODULE}}; foreach my $pgid (keys %$pgtree) { my $pg = $pgtree->{$pgid}; my @cpus = @{$pg->{cpus}}; my $cpu; my $pg_id; foreach my $cpu (keys %$pg_cpu_ks) { next unless _is_member(\@cpus, $cpu); my $cpu_hw_data = $pg_cpu_ks->{$cpu}; foreach my $hw (keys %$cpu_hw_data) { my $cpudata = $cpu_hw_data->{$hw}; # # Only consider information for this PG # next unless $cpudata->{pg_id} == $pgid; $pg->{cpudata}->{$cpu}->{generation} = $cpudata->{generation}; $pg->{cpudata}->{$cpu}->{util} = $cpudata->{hw_util}; $pg->{cpudata}->{$cpu}->{util_time_running} = $cpudata->{hw_util_time_running}; $pg->{cpudata}->{$cpu}->{util_time_stopped} = $cpudata->{hw_util_time_stopped}; $pg->{cpudata}->{$cpu}->{snaptime} = $cpudata->{snaptime}; } } } } 1; __END__ # # The information about PG hierarchy is contained in a object return by the # new() method. # # This module can deal with old PG kstats that have 'pg' and 'pg_cpu' as module # names as well as new PG kstats which use 'pg_hw_perf' and ''pg_hw_perf_cpu' as # the module name. # # The object contains the following fields: # # CPUS List of all CPUs present. # CAPACITY Estimate of capacity for each sharing # PGTREE The PG tree. See below for the tree representation. # # PG_MODULE Module name for the PG kstats. It is either 'pg' for # old style kstats, or 'pg_hw_perf' for new style kstats. # # MAX_FREQUENCY Maximum CPU frequency # USE_OLD_KSTATS True if we are dealing with old style kstats # KSTAT The kstat object used to generate this hierarchy. # # The PG tree is represented as a hash table indexed by PG ID. Each element of # the table is the hash reference with the following fields: # # children Reference to the list of children PG IDs # cpus Reference to the list of cpu IDs in the PG # current_rate Current utilization rate # generation PG generation # id PG id # ncpus number of CPUs in the PG # parent PG parent id, or -1 if there is none. # sh_name Sharing name # snaptime Snapshot time # util Hardware utilization # util_rate_max Maximum utilization rate # util_time_running Time (in nanoseconds) when utilization data is collected # util_time_stopped Time when utilization data is not collected # # The fields (with the exception of 'children') are a copy of the data from # kstats. # # The PG hierarchy in the kernel does not have the root PG. We simulate the root # (System) PG which is the parent of top level PGs in the system. This PG always # has ID 0. #