diff options
-rw-r--r-- | usr/src/cmd/Makefile | 2 | ||||
-rw-r--r-- | usr/src/cmd/perl/5.8.4/contrib/Makefile | 10 | ||||
-rw-r--r-- | usr/src/cmd/perl/contrib/Sun/Solaris/Pg/Makefile.PL | 64 | ||||
-rw-r--r-- | usr/src/cmd/perl/contrib/Sun/Solaris/Pg/Pg.pm | 1637 | ||||
-rw-r--r-- | usr/src/cmd/perl/contrib/Sun/Solaris/Pg/pod/Pg.pod | 223 | ||||
-rw-r--r-- | usr/src/cmd/perl/skel/Makefile | 3 | ||||
-rw-r--r-- | usr/src/cmd/pginfo/Makefile | 45 | ||||
-rw-r--r-- | usr/src/cmd/pginfo/pginfo.pl | 618 | ||||
-rw-r--r-- | usr/src/cmd/pgstat/Makefile | 45 | ||||
-rw-r--r-- | usr/src/cmd/pgstat/pgstat.pl | 1018 | ||||
-rw-r--r-- | usr/src/lib/libsecdb/exec_attr.txt | 3 | ||||
-rw-r--r-- | usr/src/pkg/manifests/runtime-perl-510-module-sun-solaris.mf | 2 | ||||
-rw-r--r-- | usr/src/pkg/manifests/runtime-perl-584.mf | 1 | ||||
-rw-r--r-- | usr/src/pkg/manifests/system-extended-system-utilities.mf | 2 | ||||
-rw-r--r-- | usr/src/uts/common/disp/cmt.c | 10 | ||||
-rw-r--r-- | usr/src/uts/common/os/cap_util.c | 68 | ||||
-rw-r--r-- | usr/src/uts/common/os/pghw.c | 145 | ||||
-rw-r--r-- | usr/src/uts/common/sys/pg.h | 6 | ||||
-rw-r--r-- | usr/src/uts/common/sys/pghw.h | 5 |
19 files changed, 3823 insertions, 84 deletions
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile index 0524593d6f..5c011b42b4 100644 --- a/usr/src/cmd/Makefile +++ b/usr/src/cmd/Makefile @@ -293,6 +293,8 @@ COMMON_SUBDIRS= \ pcitool \ pfexec \ pfexecd \ + pginfo \ + pgstat \ pgrep \ picl \ plimit \ diff --git a/usr/src/cmd/perl/5.8.4/contrib/Makefile b/usr/src/cmd/perl/5.8.4/contrib/Makefile index 32aa28c230..61989e0193 100644 --- a/usr/src/cmd/perl/5.8.4/contrib/Makefile +++ b/usr/src/cmd/perl/5.8.4/contrib/Makefile @@ -18,11 +18,9 @@ # # CDDL HEADER END # + # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. -# Use is subject to license terms. -# -#ident "%Z%%M% %I% %E% SMI" +# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. # include ../../../Makefile.cmd @@ -55,7 +53,9 @@ PERL_DYNAMIC_EXT = \ # Add any pure-perl extensions here. PERL_NONXS_EXT = \ $(SUN_SOLARIS)/BSM \ - $(SUN_SOLARIS)/PerlGcc + $(SUN_SOLARIS)/PerlGcc \ + $(SUN_SOLARIS)/Pg + PERL_EXT = $(PERL_DYNAMIC_EXT) $(PERL_NONXS_EXT) PERL_EXT_MAKEFILES = $(PERL_EXT:%=%/Makefile) diff --git a/usr/src/cmd/perl/contrib/Sun/Solaris/Pg/Makefile.PL b/usr/src/cmd/perl/contrib/Sun/Solaris/Pg/Makefile.PL new file mode 100644 index 0000000000..f45d918f84 --- /dev/null +++ b/usr/src/cmd/perl/contrib/Sun/Solaris/Pg/Makefile.PL @@ -0,0 +1,64 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. +# + +require 5.8.4; +use strict; +use warnings; +use ExtUtils::MakeMaker; + +# #defines. +my @defines = ( DEFINE => exists($ENV{RELEASE_BUILD}) ? '-DNDEBUG' : '' ); + +# List of POD pages to install. +my @man3pods = ( MAN3PODS => {} ); + + +# +# If not building as part of ON. +# +if (! exists($ENV{CODEMGR_WS})) { + + # + # Suppress the setting of LD_RUN_PATH. The ON build environment + # contains a modified MakeMaker that does this automatically, so we + # only need to do this if we are building outside of ON. + # + package MY; + no warnings qw(once); + + # + # Install the POD documentation for non-ON builds. + # + my $man3pfx = '$(INST_MAN3DIR)/Sun::Solaris::Pg'; + @man3pods = ( + MAN3PODS => { 'pod/Pg.pod' => $man3pfx . '.$(MAN3EXT)' } + ); +} + +WriteMakefile( + NAME => 'Sun::Solaris::Pg', + VERSION_FROM => 'Pg.pm', + @man3pods, +); diff --git a/usr/src/cmd/perl/contrib/Sun/Solaris/Pg/Pg.pm b/usr/src/cmd/perl/contrib/Sun/Solaris/Pg/Pg.pm new file mode 100644 index 0000000000..c1d84b67d5 --- /dev/null +++ b/usr/src/cmd/perl/contrib/Sun/Solaris/Pg/Pg.pm @@ -0,0 +1,1637 @@ +#! /usr/perl5/bin/perl +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. +# + +# +# Pg.pm provides object-oriented interface to the Solaris +# Processor Group kstats +# +# See comments in the end +# + +package Sun::Solaris::Pg; + +use strict; +use warnings; +use Sun::Solaris::Kstat; +use Carp; +use Errno; +use List::Util qw(max sum); + +our $VERSION = '1.1'; + +# +# Currently the OS does not have the root PG and PGs constitute a forest of +# small trees. This module gathers all such trees under one root with ID zero. +# If the root is present already, we do not use faked root. +# + +my $ROOT_ID = 0; + +# +# PG_NO_PARENT means that kstats have PG parent ID and it is set to -1 +# PG_PARENT_UNDEF means that kstats have no PG parent ID +# +use constant { + PG_NO_PARENT => -1, + PG_PARENT_UNDEF => -2, +}; + +# +# Sorting order between different sharing relationships. This order is used to +# break ties between PGs with the same number of CPUs. If there are two PGs with +# the same set of CPUs, the one with the higher weight will be the parent of the +# one with the lower weight. +# +my %relationships_order = ( + 'CPU_PM_Idle_Power_Domain' => 1, + 'Integer_Pipeline' => 2, + 'Cache' => 3, + 'CPU_PM_Active_Power_Domain' => 4, + 'Floating_Point_Unit' => 5, + 'Data_Pipe_to_memory' => 6, + 'Memory' => 7, + 'Socket' => 8, + 'System' => 9, + ); + +# +# Object interface to the library. These are methods that can be used by the +# module user. +# + +# +# Create a new object representing PG +# All the heavy lifting is performed by _init function. +# This function performs all the Perl blessing magic. +# +# The new() method accepts arguments in the form of a hash. The following +# subarguments are supported: +# +# -cpudata # Collect per-CPU data from kstats if this is T +# -tags # Match PGs to physical relationships if this is T +# -swload # Collect software CPU load if this is T +# -retry # how many times to retry PG initialization when it fails +# -delay # Delay in seconds between retries +# +# The arguments are passed to _init(). +# +sub new +{ + my $class = shift; + my %args = @_; + my $retry_count = $args{-retry} || 0; + my $retry_delay = $args{-delay} || 1; + + my $self = _init(@_); + + # + # If PG initialization fails with EAGAIN error and the caller requested + # retries, retry initialization. + # + for (; !$self && ($! == &Errno::EAGAIN) && $retry_count; + $retry_count--) { + select(undef,undef,undef, $retry_delay); + $self = _init(@_); + } + + if ($self) { + bless($self, $class) if defined($class); + bless($self) unless defined($class); + } + + return ($self); +} + +# +# Functions below use internal function _pg_get which returns PG hash reference +# corresponding to PG ID specified or 'undef' if the PG can't be found. +# + +# +# All methods return 'undef' in scalar context and an empty list in list +# context when unrecoverable errors are detected. +# + +# +# Return the root ID of PG hierarchy +# +sub root +{ + scalar @_ == 1 or _usage("root(cookie)"); + my $self = shift; + + return unless $self->{PGTREE}; + + return ($ROOT_ID); +} + +# +# Return list of all pgs numerically sorted In scalar context return number of +# PGs +# +sub all +{ + scalar @_ == 1 or _usage("all(cookie)"); + my $self = shift; + my $pgtree = $self->{PGTREE} or return; + my @ids = keys(%{$pgtree}); + + return (wantarray() ? _nsort(@ids) : scalar @ids); +} + +# +# Return list of all pgs by walking the tree depth first. +# +sub all_depth_first +{ + scalar @_ == 1 or _usage("all_depth_first(cookie)"); + my $self = shift; + + _walk_depth_first($self, $self->root()); +} + +# +# Return list of all pgs by walking the tree breadth first. +# +sub all_breadth_first +{ + scalar @_ == 1 or _usage("all_breadth_first(cookie)"); + my $self = shift; + + _walk_breadth_first($self, $self->root()); +} + +# +# Return list of CPUs in the PG specified +# CPUs returned are numerically sorted +# In scalar context return number of CPUs +# +sub cpus +{ + scalar @_ == 2 or _usage("cpus(cookie, pg)"); + my $pg = _pg_get(shift, shift) or return; + my @cpus = @{$pg->{cpus}}; + + return (wantarray() ? _nsort(@cpus) : _collapse(@cpus)); +} + +# +# Return a parent for a given PG +# Returns undef if there is no parent +# +sub parent +{ + scalar @_ == 2 or _usage("parent(cookie, pg)"); + my $pg = _pg_get(shift, shift) or return; + my $parent = $pg->{parent}; + + return (defined($parent) && $parent >= 0 ? $parent : undef); +} + +# +# Return list of children for a given PG +# In scalar context return list of children +# +sub children +{ + scalar @_ == 2 or _usage("children(cookie, pg)"); + my $pg = _pg_get(shift, shift) or return; + + my $children = $pg->{children} or return; + my @children = @{$children}; + + return (wantarray() ? _nsort(@children) : scalar @children); +} + +# +# Return sharing name for the PG +# +sub sh_name +{ + scalar @_ == 2 or _usage("sh_name(cookie, pg)"); + my $pg = _pg_get(shift, shift) or return; + return ($pg->{sh_name}); +} + +# +# Return T if specified PG ID is a leaf PG +# +sub is_leaf +{ + scalar @_ == 2 or _usage("is_leaf(cookie, pg)"); + my $pg = _pg_get(shift, shift) or return; + return ($pg->{is_leaf}); +} + +# +# Return leaf PGs +# +sub leaves +{ + scalar @_ == 1 or _usage("leaves(cookie, pg)"); + + my $self = shift; + + return (grep { is_leaf($self, $_) } $self->all()); +} + +# +# Update varying data in the snapshot +# +sub update +{ + scalar @_ == 1 or _usage("update(cookie)"); + + my $self = shift; + my $ks = $self->{KSTAT}; + + $ks->update(); + + my $pgtree = $self->{PGTREE}; + my $pg_info = $ks->{$self->{PG_MODULE}}; + + # + # Walk PG kstats and copy updated data from kstats to the snapshot + # + foreach my $id (keys %$pg_info) { + my $pg = $pgtree->{$id} or next; + + my $pg_ks = _kstat_get_pg($pg_info, $id, + $self->{USE_OLD_KSTATS}); + return unless $pg_ks; + + # + # Update PG from kstats + # + $pg->{util} = $pg_ks->{hw_util}; + $pg->{current_rate} = $pg_ks->{hw_util_rate}; + $pg->{util_rate_max} = $pg_ks->{hw_util_rate_max}; + $pg->{util_time_running} = $pg_ks->{hw_util_time_running}; + $pg->{util_time_stopped} = $pg_ks->{hw_util_time_stopped}; + $pg->{snaptime} = $pg_ks->{snaptime}; + $pg->{generation} = $pg_ks->{generation}; + } + + # + # Update software load for each CPU + # + $self->{CPU_LOAD} = _get_sw_cpu_load($ks); + + # + # Get hardware load per CPU + # + if ($self->{GET_CPU_DATA}) { + _get_hw_cpu_load($self); + } + + return (1); +} + +# +# Return list of physical tags for the given PG +# +sub tags +{ + scalar @_ == 2 or _usage("tags(cookie, pg)"); + my $pg = _pg_get(shift, shift) or return; + + my $tags = $pg->{tags} or return; + + my @tags = _uniq(@{$tags}); + + return (wantarray() ? @tags : join (',', @tags)); +} + +# +# Return list of sharing relationships in the snapshot Relationships are sorted +# by the level in the hierarchy If any PGs are given on the command line, only +# return sharing relationships for given PGs, but still keep them sorted. +# +sub sharing_relationships +{ + scalar @_ or _usage("sharing_relationships(cookie, [pg, ...])"); + + my $self = shift; + my @pgs = $self->all_breadth_first(); + + if (scalar @_ > 0) { + # + # Caller specified PGs, remove any PGs not in caller's list + # + my %seen; + map { $seen{$_} = 1 } @_; + + # Remove any PGs not provided by user + @pgs = grep { $seen{$_} } @pgs; + } + + return (_uniq(map { $self->sh_name($_) } @pgs)); +} + +# +# Return PG generation number. If PG is specified in the argument, return its +# generation, otherwise return snapshot generation. +# Snapshot generation is calculated as the total of PG generations +# +sub generation +{ + (scalar @_ == 1 || scalar @_ == 2) or _usage("generation(cookie, [pg])"); + my $self = shift; + + if (scalar @_ == 0) { + my @generations = map { $_->{generation} } + values %{$self->{PGTREE}}; + return (sum(@generations)); + + } else { + my $id = shift; + my $pg = _pg_get($self, $id) or return; + return ($pg->{generation}); + } +} + +# +# Return level of PG in the tree, starting from root. +# PG level is cached in the $pg->{level} field. +# +sub level +{ + scalar @_ == 2 or _usage("level(cookie, pg)"); + my $self = shift; + my $pgid = shift; + my $pg = _pg_get($self, $pgid) or return; + + return $pg->{level} if defined($pg->{level}); + + $pg->{level} = 0; + + my $parent = _pg_get($self, $pg->{parent}); + while ($parent) { + $pg->{level}++; + $parent = _pg_get($self, $parent->{parent}); + } + + return ($pg->{level}); +} + +# +# Return T if PG supports utilization We assume that utilization is supported by +# PG if it shows any non-zero time in util_time_running. It is possible that the +# same condition may be caused by cpustat(1) running ever since PG was created, +# but there is not much we can do about it. +# +sub has_utilization +{ + scalar @_ == 2 or _usage("has_utilization(cookie, pg)"); + my $pg = _pg_get(shift, shift) or return; + + return ($pg->{util_time_running} != 0); +} + + +# +# Return utilization for the PG +# Utilization is a difference in utilization value between two snapshots. +# We can only compare utilization between PGs having the same generation ID. +# +sub utilization +{ + scalar @_ == 3 or _usage("utilization(cookie, cookie1, pg"); + my $c1 = shift; + my $c2 = shift; + my $id = shift; + + # + # Since we have two cookies, update capacity in both + # + _capacity_update($c1, $c2, $id); + + my $pg1 = _pg_get($c1, $id) or return; + my $pg2 = _pg_get($c2, $id) or return; + + # + # Nothing to return if one of the utilizations wasn't measured + # + return unless ($pg1->{util_time_running} && $pg2->{util_time_running}); + + # + # Verify generation IDs + # + return unless $pg1->{generation} eq $pg2->{generation}; + my $u1 = $pg1->{util}; + my $u2 = $pg2->{util}; + return unless defined ($u1) && defined ($u2); + + return (abs($u2 - $u1)); +} + +# +# Return an estimate of PG capacity Capacity is calculated as the maximum of +# observed utilization expressed in units per second or maximum CPU frequency +# for all CPUs. +# +# We store capacity per sharing relationship, assuming that the same sharing has +# the same capacity. This may not be true for heterogeneous systems. +# +sub capacity +{ + scalar @_ == 2 or _usage("capacity(cookie, pg"); + my $self = shift; + my $pgid = shift; + my $pg = _pg_get($self, $pgid) or return; + my $shname = $pg->{sh_name} or return; + + return (max($self->{MAX_FREQUENCY}, $self->{CAPACITY}->{$shname})); +} + +# +# Return accuracy of utilization calculation between two snapshots The accuracy +# is determined based on the total time spent running and not running the +# counters. If T1 is the time counters were running during the period and T2 is +# the time they were turned off, the accuracy is T1 / (T1 + T2), expressed in +# percentages. +# +sub accuracy +{ + scalar @_ == 3 or _usage("accuracy(cookie, cookie1, pg)"); + my $c1 = shift; + my $c2 = shift; + my $id = shift; + my $trun; + my $tstop; + + my $pg1 = _pg_get($c1, $id) or return; + my $pg2 = _pg_get($c2, $id) or return; + + # Both PGs should have the same generation + return unless $pg1->{generation} eq $pg2->{generation}; + + # + # Get time spent with running and stopped counters + # + $trun = abs($pg2->{util_time_running} - + $pg1->{util_time_running}); + $tstop = abs($pg2->{util_time_stopped} - + $pg1->{util_time_stopped}); + + my $total = $trun + $tstop; + + # + # Calculate accuracy as percentage + # + my $accuracy = $total ? ($trun * 100) / $total : 0; + $accuracy = int($accuracy + 0.5); + $accuracy = 100 if $accuracy > 100; + return ($accuracy); +} + +# +# Return time difference in seconds between two snapshots +# +sub tdelta +{ + scalar @_ == 3 or _usage("tdelta(cookie, cookie1, pg)"); + my $c1 = shift; + my $c2 = shift; + my $id = shift; + + my $pg1 = _pg_get($c1, $id) or return; + my $pg2 = _pg_get($c2, $id) or return; + + return unless $pg1->{generation} eq $pg2->{generation}; + + my $t1 = $pg1->{snaptime}; + my $t2 = $pg2->{snaptime}; + my $delta = abs($t1 - $t2); + return ($delta); +} + +# +# Return software utilization between two snapshots +# In scalar context return software load as percentage. +# In list context return a list (USER, SYSTEM, IDLE, SWLOAD) +# All loads are returned as percentages +# +sub sw_utilization +{ + scalar @_ == 3 or _usage("tdelta(cookie, cookie1, pg)"); + + my $c1 = shift; + my $c2 = shift; + my $id = shift; + + my $pg1 = _pg_get($c1, $id) or return; + my $pg2 = _pg_get($c2, $id) or return; + + return unless $pg1->{generation} eq $pg2->{generation}; + + my @cpus = $c1->cpus($id); + + my $load1 = $c1->{CPU_LOAD}; + my $load2 = $c2->{CPU_LOAD}; + + my $idle = 0; + my $user = 0; + my $sys = 0; + my $total = 0; + my $swload = 0; + + foreach my $cpu (@cpus) { + my $ld1 = $load1->{$cpu}; + my $ld2 = $load2->{$cpu}; + next unless $ld1 && $ld2; + + $idle += $ld2->{cpu_idle} - $ld1->{cpu_idle}; + $user += $ld2->{cpu_user} - $ld1->{cpu_user}; + $sys += $ld2->{cpu_sys} - $ld1->{cpu_sys}; + } + + $total = $idle + $user + $sys; + + # Prevent division by zero + $total = 1 unless $total; + + $swload = ($user + $sys) * 100 / $total; + $idle = $idle * 100 / $total; + $user = $user * 100 / $total; + $sys = $sys * 100 / $total; + + return (wantarray() ? ($user, $sys, $idle, $swload) : $swload); +} + +# +# Return utilization for the PG for a given CPU +# Utilization is a difference in utilization value between two snapshots. +# We can only compare utilization between PGs having the same generation ID. +# +sub cpu_utilization +{ + scalar @_ == 4 or _usage("utilization(cookie, cookie1, pg, cpu"); + my $c1 = shift; + my $c2 = shift; + my $id = shift; + my $cpu = shift; + + my $idle = 0; + my $user = 0; + my $sys = 0; + my $swtotal = 0; + my $swload = 0; + + # + # Since we have two cookies, update capacity in both + # + _capacity_update($c1, $c2, $id); + + my $pg1 = _pg_get($c1, $id) or return; + my $pg2 = _pg_get($c2, $id) or return; + + # + # Nothing to return if one of the utilizations wasn't measured + # + return unless ($pg1->{util_time_running} && $pg2->{util_time_running}); + + # + # Nothing to return if CPU data is missing + # + return unless $pg1->{cpudata} && $pg2->{cpudata}; + + # + # Verify generation IDs + # + return unless $pg1->{generation} eq $pg2->{generation}; + + # + # Get data for the given CPU + # + my $cpudata1 = $pg1->{cpudata}->{$cpu}; + my $cpudata2 = $pg2->{cpudata}->{$cpu}; + + return unless $cpudata1 && $cpudata2; + + return unless $cpudata1->{generation} == $cpudata2->{generation}; + + my $u1 = $cpudata1->{util}; + my $u2 = $cpudata2->{util}; + return unless defined ($u1) && defined ($u2); + my $hw_utilization = abs ($u1 - $u2); + + # + # Get time spent with running and stopped counters + # + my $trun = abs($cpudata1->{util_time_running} - + $cpudata2->{util_time_running}); + my $tstop = abs($cpudata1->{util_time_stopped} - + $cpudata2->{util_time_stopped}); + + my $total = $trun + $tstop; + + # + # Calculate accuracy as percentage + # + my $accuracy = $total ? ($trun * 100) / $total : 0; + $accuracy = int($accuracy + 0.5); + $accuracy = 100 if $accuracy > 100; + + my $t1 = $cpudata1->{snaptime}; + my $t2 = $cpudata2->{snaptime}; + my $tdelta = abs ($t1 - $t2); + + my $shname = $pg2->{sh_name} or return; + my $capacity = max($c2->{MAX_FREQUENCY}, $c2->{CAPACITY}->{$shname}); + my $utilization = $hw_utilization / $tdelta; + $capacity = $utilization unless $capacity; + $utilization /= $capacity; + $utilization *= 100; + + my $ld1 = $c1->{CPU_LOAD}->{$cpu}; + my $ld2 = $c2->{CPU_LOAD}->{$cpu}; + + if ($ld1 && $ld2) { + $idle = $ld2->{cpu_idle} - $ld1->{cpu_idle}; + $user = $ld2->{cpu_user} - $ld1->{cpu_user}; + $sys = $ld2->{cpu_sys} - $ld1->{cpu_sys}; + + $swtotal = $idle + $user + $sys; + + # Prevent division by zero + $swtotal = 1 unless $swtotal; + + $swload = ($user + $sys) * 100 / $swtotal; + $idle = $idle * 100 / $swtotal; + $user = $user * 100 / $swtotal; + $sys = $sys * 100 / $swtotal; + } + + return (wantarray() ? + ($utilization, $accuracy, $hw_utilization, + $swload, $user, $sys, $idle) : + $utilization); +} + +# +# online_cpus(kstat) +# Return list of on-line CPUs +# +sub online_cpus +{ + scalar @_ == 1 or _usage("online_cpus(cookie)"); + + my $self = shift or return; + my $ks = $self->{KSTAT} or return; + + my $cpu_info = $ks->{cpu_info} or return; + + my @cpus = grep { + my $cp = $cpu_info->{$_}->{"cpu_info$_"}; + my $state = $cp->{state}; + $state eq 'on-line' || $state eq 'no-intr'; + } keys %{$cpu_info}; + + return (wantarray() ? @cpus : _nsort(@cpus)); +} + +# +# Support methods +# +# The following methods are not PG specific but are generally useful for PG +# interface consumers +# + +# +# Sort the list numerically +# +sub nsort +{ + scalar @_ > 0 or _usage("nsort(cookie, val, ...)"); + shift; + + return (_nsort(@_)); +} + +# +# Return the input list with duplicates removed. +# Should be used in list context +# +sub uniq +{ + scalar @_ > 0 or _usage("uniq(cookie, val, ...)"); + shift; + + return (_uniq(@_)); +} + +# +# Sort list numerically and remove duplicates +# Should be called in list context +# +sub uniqsort +{ + scalar @_ > 0 or _usage("uniqsort(cookie, val, ...)"); + shift; + + return (_uniqsort(@_)); +} + + +# +# Expand all arguments and present them as a numerically sorted list +# x,y is expanded as (x y) +# 1-3 ranges are expandes as (1 2 3) +# +sub expand +{ + scalar @_ > 0 or _usage("expand(cookie, val, ...)"); + shift; + + return (_uniqsort(map { _expand($_) } @_)); +} + +# +# Consolidate consecutive ids as start-end +# Input: list of ids +# Output: string with space-sepated cpu values with ranges +# collapsed as x-y +# +sub id_collapse +{ + scalar @_ > 0 or _usage("collapse(cookie, val, ...)"); + shift; + + return _collapse(@_); +} + +# +# Return elements of the second list not present in the first list. Both lists +# are passed by reference. +# +sub set_subtract +{ + scalar @_ == 3 or _usage("set_subtract(cookie, left, right)"); + shift; + + return (_set_subtract(@_)); +} + +# +# Return the intersection of two lists passed by reference +# Convert the first list to a hash with seen entries marked as 1-values +# Then grep only elements present in the first list from the second list. +# As a little optimization, use the shorter list to build a hash. +# +sub intersect +{ + scalar @_ == 3 or _usage("intersect(cookie, left, right)"); + shift; + + return (_set_intersect(@_)); +} + +# +# Return elements of the second list not present in the first list. Both lists +# are passed by reference. +# +sub _set_subtract +{ + my ($left, $right) = @_; + my %seen; # Set to 1 for everything in the first list + # Create a hash indexed by elements in @left with ones as a value. + map { $seen{$_} = 1 } @$left; + # Find members of @right present in @left + return (grep { ! $seen{$_} } @$right); +} + +# +# END OF PUBLIC INTERFACE +# + +# +# INTERNAL FUNCTIONS +# + +# +# _usage(): print error message and terminate the program. +# +sub _usage +{ + my $msg = shift; + Carp::croak "Usage: Sun::Solaris::Pg::$msg"; +} + +# +# Sort the list numerically +# Should be called in list context +# +sub _nsort +{ + return (sort { $a <=> $b } @_); +} + +# +# Return the input list with duplicates removed. +# Should be used in list context +# +sub _uniq +{ + my %seen; + return (grep { ++$seen{$_} == 1 } @_); +} + +# +# Sort list numerically and remove duplicates +# Should be called in list context +# +sub _uniqsort +{ + return (sort { $a <=> $b } _uniq(@_)); +} + +# Get PG from the snapshot by id +sub _pg_get +{ + my $self = shift; + my $pgid = shift; + + return unless defined $pgid; + my $pgtree = $self->{PGTREE} or return; + + return ($pgtree->{$pgid}); +} + +# +# Copy data from kstat representation to our representation +# Arguments: +# PG kstat +# Reference to the list of CPUs. +# Any CPUs in the PG kstat not present in the CPU list are ignored. +# +sub _pg_create_from_kstat +{ + my $pg_ks = shift; + my $all_cpus = shift; + my %all_cpus; + my $pg = (); + + # + # Mark CPUs available + # + map { $all_cpus{$_}++ } @$all_cpus; + + return unless $pg_ks; + + # + # Convert CPU list in the kstat from x-y,z form to the proper list + # + my @cpus = _expand($pg_ks->{cpus}); + + # + # Remove any CPUs not present in the arguments + # + @cpus = grep { $all_cpus{$_} } @cpus; + + # + # Do not create PG unless it has any CPUs + # + return unless scalar @cpus; + + # + # Copy data to the $pg structure + # + $pg->{ncpus} = scalar @cpus; + $pg->{cpus} = \@cpus; + $pg->{id} = defined($pg_ks->{pg_id}) ? $pg_ks->{pg_id} : $pg_ks->{id}; + $pg->{util} = $pg_ks->{hw_util}; + $pg->{current_rate} = $pg_ks->{hw_util_rate}; + $pg->{util_rate_max} = $pg_ks->{hw_util_rate_max}; + $pg->{util_time_running} = $pg_ks->{hw_util_time_running}; + $pg->{util_time_stopped} = $pg_ks->{hw_util_time_stopped}; + $pg->{snaptime} = $pg_ks->{snaptime}; + $pg->{generation} = $pg_ks->{generation}; + $pg->{sh_name} = $pg_ks->{relationship} || $pg_ks->{sharing_relation}; + $pg->{parent} = $pg_ks->{parent_pg_id}; + $pg->{parent} = PG_PARENT_UNDEF unless defined $pg->{parent}; + # + # Replace spaces with underscores in sharing names + # + $pg->{sh_name} =~ s/ /_/g; + $pg->{is_leaf} = 1; + + return $pg; +} + +# +# Create fake root PG with all CPUs +# Arguments: list of CPUs +# +sub _pg_create_root +{ + my $pg = (); + my @cpus = @_; + + $pg->{id} = $ROOT_ID; + $pg->{ncpus} = scalar @cpus; + $pg->{util} = 0; + $pg->{current_rate} = 0; + $pg->{util_rate_max} = 0; + $pg->{util_time_running} = 0; + $pg->{util_time_stopped} = 0; + $pg->{snaptime} = 0; + $pg->{generation} = 0; + $pg->{sh_name} = 'System'; + $pg->{is_leaf} = 0; + $pg->{cpus} = \@cpus; + $pg->{parent} = PG_NO_PARENT; + + return ($pg); +} + +# +# _pg_all_from_kstats(SNAPSHOT) +# Extract all PG information from kstats +# +sub _pg_all_from_kstats +{ + my $self = shift; + my $ks = $self->{KSTAT}; + my @all_cpus = @{$self->{CPUS}}; + + return unless $ks; + + my $pgtree = (); + my $pg_info = $ks->{$self->{PG_MODULE}}; + + # + # Walk all PG kstats and copy them to $pgtree->{$id} + # + foreach my $id (keys %$pg_info) { + my $pg_ks = _kstat_get_pg($pg_info, $id, + $self->{USE_OLD_KSTATS}); + next unless $pg_ks; + + my $pg = _pg_create_from_kstat($pg_ks, \@all_cpus); + + $pgtree->{$id} = $pg if $pg; + } + + # + # OS does not have root PG, so create one. + # + if (!$pgtree->{$ROOT_ID}) { + $pgtree->{$ROOT_ID} = _pg_create_root (@all_cpus); + } + + # + # Construct parent-child relationships between PGs + # + + # + # Get list of PGs sorted by number of CPUs + # If two PGs have the same number of CPUs, sort by relationship order. + # + my @lineage = sort { + $a->{ncpus} <=> $b->{ncpus} || + _relationship_order($a->{sh_name}) <=> + _relationship_order($b->{sh_name}) + } values %$pgtree; + + # + # For each PG in the lineage discover its parent if it doesn't have one. + # + for (my $i = 0; $i < scalar @lineage; $i++) { + my $pg = $lineage[$i]; + + # + # Ignore PGs which already have parent in kstats + # + my $parent = $pg->{parent}; + next if ($parent >= PG_NO_PARENT); + + my $ncpus = $pg->{ncpus}; + my @cpus = @{$pg->{cpus}}; + + # + # Walk the lineage, ignoring any CPUs with the same number of + # CPUs + for (my $j = $i + 1; $j < scalar @lineage; $j++) { + my $pg1 = $lineage[$j]; + my @parent_cpus = @{$pg1->{cpus}}; + if (_is_subset(\@cpus, \@parent_cpus)) { + $pg->{parent} = $pg1->{id}; + last; + } + } + } + + # + # Find all top-level PGs and put them under $root + # + foreach my $pgid (keys %$pgtree) { + next if $pgid == $ROOT_ID; + my $pg = $pgtree->{$pgid}; + $pg->{parent} = $ROOT_ID unless $pg->{parent} >= 0; + } + + # + # Now that we know parents, for each parent add all direct children to + # their parent sets + # + foreach my $pg (@lineage) { + my $parentid = $pg->{parent}; + next unless defined $parentid; + + my $parent = $pgtree->{$parentid}; + push (@{$parent->{children}}, $pg->{id}); + } + + return ($pgtree); +} + +# +# Read kstats and initialize PG object +# Collect basic information about cmt_pg +# Add list of children and list of CPUs +# Returns the hash reference indexed by pg id +# +# The _init() function accepts arguments in the form of a hash. The following +# subarguments are supported: +# +# -cpudata # Collect per-CPU data from kstats if this is T +# -tags # Match PGs to physical relationships if this is T +# -swload # Collect software CPU load if this is T + +sub _init +{ + my $ks = Sun::Solaris::Kstat->new(strip_strings => 1); + return unless $ks; + + my %args = @_; + my $get_cpu_data = $args{-cpudata}; + my $get_tags = $args{-tags}; + my $get_swload = $args{-swload}; + + my $self; + + my $use_old_kstat_names = scalar(grep {/^pg_hw_perf/ } keys (%$ks)) == 0; + + my @frequencies; + $self->{MAX_FREQUENCY} = 0; + + $self->{PG_MODULE} = $use_old_kstat_names ? 'pg' : 'pg_hw_perf'; + $self->{PG_CPU_MODULE} = $use_old_kstat_names ? + 'pg_cpu' : 'pg_hw_perf_cpu'; + $self->{USE_OLD_KSTATS} = $use_old_kstat_names; + + $get_cpu_data = 0 unless scalar(grep {/^$self->{PG_CPU_MODULE}/ } + keys (%$ks)); + + # Get list of PG-related kstats + my $pg_keys = $use_old_kstat_names ? 'pg' : 'pg_hw'; + + if (scalar(grep { /^$pg_keys/ } keys (%$ks)) == 0) { + if (exists(&Errno::ENOTSUPP)) { + $! = &Errno::ENOTSUPP; + } else { + $! = 48; + } + return; + } + + + # + # Mapping of cores and chips to CPUs + # + my $hw_mapping; + + # + # Get list of all CPUs + # + my $cpu_info = $ks->{cpu_info}; + + # + # @all-cpus is a list of all cpus + # + my @all_cpus = keys %$cpu_info; + + # + # Save list of all CPUs in the snapshot + # + $self->{CPUS} = \@all_cpus; + + # + # Find CPUs for each socket and chip + # Also while we scan CPU kstats, get maximum frequency of each CPU. + # + foreach my $id (@all_cpus) { + my $ci = $cpu_info->{$id}->{"cpu_info$id"}; + next unless $ci; + my $core_id = $ci->{core_id}; + my $chip_id = $ci->{chip_id}; + + push(@{$hw_mapping->{core}->{$core_id}}, $id) + if defined $core_id; + push(@{$hw_mapping->{chip}->{$chip_id}}, $id) + if defined $chip_id; + + # Read CPU frequencies separated by commas + my $freqs = $ci->{supported_frequencies_Hz}; + my $max_freq = max(split(/:/, $freqs)); + + # Calculate maximum frequency for the snapshot. + $self->{MAX_FREQUENCY} = $max_freq if + $self->{MAX_FREQUENCY} < $max_freq; + } + + $self->{KSTAT} = $ks; + + # + # Convert kstats to PG tree + # + my $pgtree = _pg_all_from_kstats($self); + $self->{PGTREE} = $pgtree; + + # + # Find capacity estimate per sharing relationship + # + foreach my $pgid (keys %$pgtree) { + my $pg = $pgtree->{$pgid}; + my $shname = $pg->{sh_name}; + my $max_rate = $pg->{util_rate_max}; + $self->{CAPACITY}->{$shname} = $max_rate if + !$self->{CAPACITY}->{$shname} || + $self->{CAPACITY}->{$shname} < $max_rate; + } + + if ($get_tags) { + # + # Walk all PGs and mark all PGs that have corresponding hardware + # entities (system, chips, cores). + # + foreach my $pgid (keys %$pgtree) { + my $pg = $pgtree->{$pgid}; + my @cpus = @{$pg->{cpus}}; + next unless scalar @cpus > 1; + + if (_set_equal (\@cpus, \@all_cpus)) { + # + # PG has all CPUs in the system. + # + push (@{$pg->{tags}}, 'system'); + } + + foreach my $name ('core', 'chip') { + my $hwdata = $hw_mapping->{$name}; + foreach my $id (keys %$hwdata) { + # CPUs for this entity + my @hw_cpus = @{$hwdata->{$id}}; + if (_set_equal (\@cpus, \@hw_cpus)) { + # + # PG has exactly the same CPUs + # + push (@{$pg->{tags}}, $name); + } + } + } + } + } + + # + # Save software load for each CPU + # + if ($get_swload) { + $self->{CPU_LOAD} = _get_sw_cpu_load($ks); + } + + # + # Collect per-CPU utilization data if requested + # + if ($get_cpu_data) { + _get_hw_cpu_load($self); + } + + $self->{GET_CPU_DATA} = $get_cpu_data; + + # + # Verify that in the end we have the same PG generation for each PG + # + if (! _same_generation($self)) { + $! = &Errno::EAGAIN; + return; + } + + return ($self); +} + +# +# Verify that topology is the same as at the time snapshot was created +# +sub _same_generation +{ + my $self = shift; + my $pgtree = $self->{PGTREE} or return; + + return (0) unless $self; + + my $ks = $self->{KSTAT}; + $ks->update(); + my $pg_info = $ks->{$self->{PG_MODULE}}; + foreach my $id (keys %$pg_info) { + my $pg = $pgtree->{$id} or next; + + my $pg_ks = _kstat_get_pg($pg_info, $id, + $self->{USE_OLD_KSTATS}); + return unless $pg_ks; + return (0) unless $pg->{generation} == $pg_ks->{generation}; + } + return (1); +} + +# +# Update capacity for both PGs +# +sub _capacity_update +{ + my $c1 = shift; + my $c2 = shift; + + my $pgtree1 = $c1->{PGTREE}; + my $pgtree2 = $c2->{PGTREE}; + + foreach my $pgid (keys %$pgtree1) { + my $pg1 = $pgtree1->{$pgid}; + my $pg2 = $pgtree2->{$pgid}; + next unless $pg1 && $pg2; + next unless $pg1->{generation} != $pg2->{generation}; + my $shname1 = $pg1->{sh_name}; + my $shname2 = $pg2->{sh_name}; + next unless $shname1 eq $shname2; + my $max_rate = max($pg1->{util_rate_max}, $pg2->{util_rate_max}); + + my $utilization = abs($pg1->{util} - $pg2->{util}); + my $tdelta = abs($pg1->{snaptime} - $pg2->{snaptime}); + $utilization /= $tdelta if $utilization && $tdelta; + $max_rate = $utilization if + $utilization && $max_rate < $utilization; + + $c1->{CAPACITY}->{$shname1} = $max_rate if + !$c1->{CAPACITY}->{$shname1} || + !$c1->{CAPACITY}->{$shname1} < $max_rate; + $c2->{CAPACITY}->{$shname2} = $max_rate if + !$c2->{CAPACITY}->{$shname2} || + !$c2->{CAPACITY}->{$shname2} < $max_rate; + } +} + +# +# Return list of PGs breadth first +# +sub _walk_depth_first +{ + my $p = shift; + # Nothing to do if list is empty + return unless scalar (@_); + + return (map { ($_, _walk_depth_first ($p, $p->children($_))) } @_); +} + +# +# Return list of PGs breadth first +# +sub _walk_breadth_first +{ + my $p = shift; + # Nothing to do if list is empty + return unless scalar (@_); + + return (@_, _walk_breadth_first($p, map { $p->children($_) } @_)); +} + +# +# Given the kstat reference (already hashed by module name) and PG ID return the +# corresponding kstat. +# +sub _kstat_get_pg +{ + my $mod = shift; + my $pgid = shift; + my $use_old_kstats = shift; + + my $id_field = $use_old_kstats ? 'id' : 'pg_id'; + + return ($mod->{$pgid}->{hardware}) if $use_old_kstats; + + my @instances = grep { $_->{$id_field} == $pgid } + values(%{$mod->{$pgid}}); + return ($instances[0]); +} + +###################################################################### +# Set routines +####################################################################### +# +# Return T if one list contains all the elements of another list. +# All lists are passed by reference +# +sub _is_subset +{ + my ($left, $right) = @_; + my %seen; # Set to 1 for everything in the first list + # Put the shortest list in $left + + Carp::croak "invalid left argument" unless ref ($left) eq 'ARRAY'; + Carp::croak "invalid right argument" unless ref ($right) eq 'ARRAY'; + + # Create a hash indexed by elements in @right with ones as a value. + map { $seen{$_} = 1 } @$right; + + # Find members of @left not present in @right + my @extra = grep { !$seen{$_} } @$left; + return (!scalar(@extra)); +} + +sub _is_member +{ + my $set = shift; + my $element = shift; + my %seen; + + map { $seen{$_} = 1 } @$set; + + return ($seen{$element}); +} + +# +# Return T if C1 and C2 contain the same elements +# +sub _set_equal +{ + my $c1 = shift; + my $c2 = shift; + + return 0 unless scalar @$c1 == scalar @$c2; + + return (_is_subset($c1, $c2) && _is_subset($c2, $c1)); +} + +# +# Return the intersection of two lists passed by reference +# Convert the first list to a hash with seen entries marked as 1-values +# Then grep only elements present in the first list from the second list. +# As a little optimization, use the shorter list to build a hash. +# +sub _set_intersect +{ + my ($left, $right) = @_; + my %seen; # Set to 1 for everything in the first list + # Put the shortest list in $left + scalar @$left <= scalar @$right or ($right, $left) = ($left, $right); + + # Create a hash indexed by elements in @left with ones as a value. + map { $seen{$_} = 1 } @$left; + # Find members of @right present in @left + return (grep { $seen{$_} } @$right); +} + +# +# Expand start-end into the list of values +# Input: string containing a single numeric ID or x-y range +# Output: single value or a list of values +# Ranges with start being more than end are inverted +# +sub _expand +{ + # Skip the first argument if it is the object reference + shift if ref $@[0] eq 'HASH'; + + my $arg = shift; + + return unless defined $arg; + + my @args = split /,/, $arg; + + return map { _expand($_) } @args if scalar @args > 1; + + $arg = shift @args; + return unless defined $arg; + + if ($arg =~ m/^\d+$/) { + # single number + return ($arg); + } elsif ($arg =~ m/^(\d+)\-(\d+)$/) { + my ($start, $end) = ($1, $2); # $start-$end + # Reverse the interval if start > end + ($start, $end) = ($end, $start) if $start > $end; + return ($start .. $end); + } else { + return $arg; + } + return; +} + +# +# Consolidate consecutive ids as start-end +# Input: list of ids +# Output: string with space-sepated cpu values with ranges +# collapsed as x-y +# +sub _collapse +{ + return ('') unless @_; + my @args = _uniqsort(@_); + my $start = shift(@args); + my $result = ''; + my $end = $start; # Initial range consists of the first element + foreach my $el (@args) { + if (!$el =~ /^\d+$/) { + $result = "$result $el"; + $end = $el; + } elsif ($el == ($end + 1)) { + # + # Got consecutive ID, so extend end of range without + # printing anything since the range may extend further + # + $end = $el; + } else { + # + # Next ID is not consecutive, so print IDs gotten so + # far. + # + if ($end > $start + 1) { # range + $result = "$result $start-$end"; + } elsif ($end > $start) { # different values + $result = "$result $start $end"; + } else { # same value + $result = "$result $start"; + } + + # Try finding consecutive range starting from this ID + $start = $end = $el; + } + } + + # Print last ID(s) + if (! ($end =~ /^\d+$/)) { + $result = "$result $end"; + } elsif ($end > $start + 1) { + $result = "$result $start-$end"; + } elsif ($end > $start) { + $result = "$result $start $end"; + } else { + $result = "$result $start"; + } + # Remove any spaces in the beginning + $result =~ s/^\s+//; + return ($result); +} + +# +# get relationship order from relationship name. +# return 0 for all unknown names. +# +sub _relationship_order +{ + my $name = shift; + return ($relationships_order{$name} || 0); +} + +# +# Get software load for each CPU from kstats +# Argument: kstat reference +# Returns: reference to the hash with +# cpu_idle, cpu_user, cpu_sys keys. +# +sub _get_sw_cpu_load +{ + my $ks = shift or return; + + my $loads; + my $sys_ks = $ks->{cpu}; + foreach my $cpu (keys %$sys_ks) { + my $sys = $sys_ks->{$cpu}->{sys}; + $loads->{$cpu}->{cpu_idle} = $sys->{cpu_ticks_idle}; + $loads->{$cpu}->{cpu_user} = $sys->{cpu_ticks_user}; + $loads->{$cpu}->{cpu_sys} = $sys->{cpu_ticks_kernel}; + } + + return ($loads); +} + +# +# Get software load for each CPU from kstats +# Arguments: +# pgtree reference +# kstat reference +# +# Returns: nothing +# Stores CPU load in the $pg->{cpudata} hash for each PG +# +sub _get_hw_cpu_load +{ + my $self = shift; + my $pgtree = $self->{PGTREE}; + my $ks = $self->{KSTAT}; + + my $pg_cpu_ks = $ks->{$self->{PG_CPU_MODULE}}; + + foreach my $pgid (keys %$pgtree) { + my $pg = $pgtree->{$pgid}; + my @cpus = @{$pg->{cpus}}; + my $cpu; + my $pg_id; + foreach my $cpu (keys %$pg_cpu_ks) { + next unless _is_member(\@cpus, $cpu); + my $cpu_hw_data = $pg_cpu_ks->{$cpu}; + foreach my $hw (keys %$cpu_hw_data) { + my $cpudata = $cpu_hw_data->{$hw}; + + # + # Only consider information for this PG + # + next unless $cpudata->{pg_id} == $pgid; + + $pg->{cpudata}->{$cpu}->{generation} = + $cpudata->{generation}; + $pg->{cpudata}->{$cpu}->{util} = + $cpudata->{hw_util}; + $pg->{cpudata}->{$cpu}->{util_time_running} = + $cpudata->{hw_util_time_running}; + $pg->{cpudata}->{$cpu}->{util_time_stopped} = + $cpudata->{hw_util_time_stopped}; + $pg->{cpudata}->{$cpu}->{snaptime} = + $cpudata->{snaptime}; + } + } + } +} + +1; + +__END__ + +# +# The information about PG hierarchy is contained in a object return by the +# new() method. +# +# This module can deal with old PG kstats that have 'pg' and 'pg_cpu' as module +# names as well as new PG kstats which use 'pg_hw_perf' and ''pg_hw_perf_cpu' as +# the module name. +# +# The object contains the following fields: +# +# CPUS List of all CPUs present. +# CAPACITY Estimate of capacity for each sharing +# PGTREE The PG tree. See below for the tree representation. +# +# PG_MODULE Module name for the PG kstats. It is either 'pg' for +# old style kstats, or 'pg_hw_perf' for new style kstats. +# +# MAX_FREQUENCY Maximum CPU frequency +# USE_OLD_KSTATS True if we are dealing with old style kstats +# KSTAT The kstat object used to generate this hierarchy. +# +# The PG tree is represented as a hash table indexed by PG ID. Each element of +# the table is the hash reference with the following fields: +# +# children Reference to the list of children PG IDs +# cpus Reference to the list of cpu IDs in the PG +# current_rate Current utilization rate +# generation PG generation +# id PG id +# ncpus number of CPUs in the PG +# parent PG parent id, or -1 if there is none. +# sh_name Sharing name +# snaptime Snapshot time +# util Hardware utilization +# util_rate_max Maximum utilization rate +# util_time_running Time (in nanoseconds) when utilization data is collected +# util_time_stopped Time when utilization data is not collected +# +# The fields (with the exception of 'children') are a copy of the data from +# kstats. +# +# The PG hierarchy in the kernel does not have the root PG. We simulate the root +# (System) PG which is the parent of top level PGs in the system. This PG always +# has ID 0. +# diff --git a/usr/src/cmd/perl/contrib/Sun/Solaris/Pg/pod/Pg.pod b/usr/src/cmd/perl/contrib/Sun/Solaris/Pg/pod/Pg.pod new file mode 100644 index 0000000000..97572a4e2f --- /dev/null +++ b/usr/src/cmd/perl/contrib/Sun/Solaris/Pg/pod/Pg.pod @@ -0,0 +1,223 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. +# + +# +# Sun::Solaris::Pg documentation +# + +=head1 NAME + +Solaris::Pg - Perl interface to Processor Group kstats + +=head1 SYNOPSYS + + use Solaris::Pg; + + $p = Solaris::Pg->new(-cpudata=> 1, -tags=> 1, -swload=> 1); + + @pgs = $p->all(); + + @pgs_depth = $p->all_depth_first(); + + @pgs_breadth = $p->all_breadth_first(); + + @leaves = $p->leaves(); + + $root = $p->root; + + $p->update(); + + $leaf = $leaves[0]; + + $name = $p->sh_name($leaf); + + @names = $p->sharing_relationships(); + + @tags = $p->tags($leaf); + + $p1 = Solaris::Pg->new(-cpudata=> 1, -tags=> 1, -swload=> 1); + + if ($p->has_utilization()) { + $utilization = $p->utilization($p1, $leaf); + $capacity = $p->capacity($p1, $leaf); + $accuracy = $p->accuracy($p1, $leaf); + $tdelta = $p->tdelta($p1); + } + + $sw_utilization = $p->sw_utilization($p1, $leaf);) + + +=head1 DESCRIPTION + +The Solaris::Pg module provides an interface to the Solaris PG information +available through B<pg> and B<pg_cpu> kstats. The module provides an object +oriented interface. + +=head1 METHODS + +=head2 new + +Create a new Pg instance. The new() function accepts arguments in the form of a +hash. The following subarguments are supported: + +=over + +=item -cpudata + +Collect per-CPU data from kstats if this is True. + +=item -tags + +Match PGs to physical relationships if this is True. + +=item -swload + +Collect software CPU load if this is True. + +=back + +=head2 root + +Return ID of the root of Processor Group hierarchy. + +=head2 all + +Return list of all PGs sorted by ID. + +=head2 all_depth_first() + +Return list of all PGs sorted by walking the PG hierarchy depth first, starting +from root. + +=head2 all_breadth_first() + +Return list of all PGs sorted by walking the PG hierarchy breadth first, +starting from root. + +=head2 cpus(PG) + +Return list of all CPUs in the PG specified. The list is sorted by CPU ID. + +=head2 generation([PG]) + +Return the generation number for the given PG. Without arguments, return the +generation number for the whole snapshot. Different generation number means that +PG configuration may have changed. + +=head2 parent(PG) + +Return parent ID or undef if there is no parent. + +=head2 children(PG) + +Return list of children for the PG. + +=head2 is_leaf(PG) + +Returns T iff PG is leaf. + +=head2 leaves + +Returns list of leaf PGs. + +=head2 level(PG) + +Return the numeric level of PG in the hierarchy, starting from root which has +level zero. + +=head2 sh_name(PG) + +Returns sharing name for the PG. + +=head2 sharing_relationships([PG], ...) + +Without any arguments, returns the list of sharing relationships in the +snapshot. Relationships are sorted by the level in the hierarchy If any PGs are +given on the command line, only return sharing relationships for given PGs, but +still keep them sorted. + +=head2 tags(PG) + +Return list of strings describing physical relationships ('core', 'chip') for the given PG. + +=head2 update() + +Update utilization and generation data in the PG snapshot. + +=head2 has_utilization(PG) + +Returns True if given PG hasd data about hardware utilization. + +=head2 utilization(PGOBJ, PG) + +Return numeric utilization for the time interval represented by two PG objects +for the given PG. Utilization is a difference in utilization value between two +snapshots. The given PG must belong to the same generation in both snapshots. +Returns B<undef> if utilization can not be obtained. + +=head2 sw_utilization(PGOBJ, PG) + +Return numeric software utilization for the time interval represented by two PG +objects for the given PG. Utilization is a difference in utilization value +between two snapshots. The given PG must belong to the same generation in both +snapshots. Returns B<undef> if utilization can not be obtained. Software +utilization is combined CPU load for all CPUs in the PG. Returns B<undef> if +utilization can not be obtained. + +=head2 sw_utilization(PGOBJ, PG, CPU) + +Return utilization for the PG for a given CPU in a given PG. Utilization is a +difference in utilization value between two snapshots. We can only compare +utilization between PGs having the same generation ID. Returns B<undef> if +utilization can not be obtained. + +=head2 capacity(PGOBJ, PG) + +Return numeric capacity for the time interval represented by two PG objects for +the given PG. Note that the actual capacity is the maximum of all capacities +across all PGs of this type.The given PG must belong to the same generation in +both snapshots. Returns B<undef> if capacities can not be obtained. + +=head2 accuracy(PGOBJ, PG) + +Return accuracy of utilization calculation between two snapshots The accuracy is +determined based on the total time spent running and not running the counters. +If T1 is the time counters were running during the period and T2 is the time +they were turned off, the accuracy is T1 / (T1 + T2), expressed in percentages. + +=head2 tdelta(PGOBJ, PG) + +Return time interval between two snapshots for the given PG. The time is expressed in seconds and is a floating-point number. + +=head2 EXPORT + +None by default. + +=head1 SEE ALSO + +L<pginfo(1)>, L<pgstat(1)> + +=cut + diff --git a/usr/src/cmd/perl/skel/Makefile b/usr/src/cmd/perl/skel/Makefile index 30d649b516..8814ec4a30 100644 --- a/usr/src/cmd/perl/skel/Makefile +++ b/usr/src/cmd/perl/skel/Makefile @@ -54,7 +54,8 @@ PERL_DYNAMIC_EXT = \ # Add any pure-perl extensions here. PERL_NONXS_EXT = \ $(SUN_SOLARIS)/BSM \ - $(SUN_SOLARIS)/PerlGcc + $(SUN_SOLARIS)/PerlGcc \ + $(SUN_SOLARIS)/Pg PERL_EXT = $(PERL_DYNAMIC_EXT) $(PERL_NONXS_EXT) PERL_EXT_MAKEFILES = $(PERL_EXT:%=%/Makefile) diff --git a/usr/src/cmd/pginfo/Makefile b/usr/src/cmd/pginfo/Makefile new file mode 100644 index 0000000000..c209c8f914 --- /dev/null +++ b/usr/src/cmd/pginfo/Makefile @@ -0,0 +1,45 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. +# + +PROG= pginfo + +include ../Makefile.cmd + +TARGET= all + +.KEEP_STATE: + +all: $(PROG) + +install: all .WAIT $(ROOTPROG) + +clean: + +$(ROOTBINPROG): $(PROG) + $(INS.file) + +lint: + +include ../Makefile.targ diff --git a/usr/src/cmd/pginfo/pginfo.pl b/usr/src/cmd/pginfo/pginfo.pl new file mode 100644 index 0000000000..8a15b4adc9 --- /dev/null +++ b/usr/src/cmd/pginfo/pginfo.pl @@ -0,0 +1,618 @@ +#! /usr/perl5/bin/perl +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. +# + +# +# pginfo - tool for displaying Processor Group information +# + +use warnings; +use strict; +use File::Basename; +use Errno; +use POSIX qw(locale_h); +use Getopt::Long qw(:config no_ignore_case bundling auto_version); +use List::Util qw(first max min); +use Sun::Solaris::Utils qw(textdomain gettext); +use Sun::Solaris::Pg; + +# +# Constants +# +# It is possible that wnen trying to parse PG information, PG generation changes +# which will cause PG new method to fail with errno set to EAGAIN In this case +# we retry open up to RETRY_COUNT times pausing RETRY_DELAY seconds between each +# retry. +# +# When printing PGs we print them as a little tree with each PG shifted by +# LEVEL_OFFSET from each parent. For example: +# +# PG RELATIONSHIP CPUs +# 0 System 0-7 +# 3 Socket 0 2 4 6 +# 2 Cache 0 2 4 6 +# + +use constant { + VERSION => 1.1, + LEVEL_OFFSET => 1, + RETRY_COUNT => 4, + RETRY_DELAY => 0.25, +}; + +# +# Return codes +# +# 0 Successful completion. +# +# 1 An error occurred. +# +# 2 Invalid command-line options were specified. +# +use constant { + E_SUCCESS => 0, + E_ERROR => 1, + E_USAGE => 2, +}; + + +# Set message locale +setlocale(LC_ALL, ""); +textdomain(TEXT_DOMAIN); + +# Get script name for error messages +our $cmdname = basename($0, ".pl"); + +# +# Process options +# +my $do_cpulist; # -C - Show CPU IDs +my $do_cpus; # -c - Treat args as CPU IDs +my $do_physical; # -p - Show physical relationships +my $do_sharing_only; # -S - Only show sharing relationships +my $do_tree; # -T - Show ASCII tree +my $do_usage; # -h - Show usage +my $do_version; # -V - Show version +my $script_mode; # -I - Only show IDs +my $verbose = 0; # -v - Verbose output +my @sharing_filter; # -r string,... +my @sharing_filter_neg; # -R string,... + +# Exit code +my $rc = E_SUCCESS; + +# Parse options from the command line +GetOptions("cpus|c" => \$do_cpus, + "idlist|I" => \$script_mode, + "cpulist|C" => \$do_cpulist, + "physical|p" => \$do_physical, + "help|h|?" => \$do_usage, + "sharing|s" => \$do_sharing_only, + "relationship|r=s" => \@sharing_filter, + "norelationship|R=s" => \@sharing_filter_neg, + "tree|topology|T" => \$do_tree, + "version|V" => \$do_version, + "verbose+" => \$verbose, + "v+" => \$verbose, +) || usage(E_USAGE); + +# Print usage message when -h is given +usage(E_SUCCESS) if $do_usage; + +if ($do_version) { + # + # Print version information and exit + # + printf gettext("%s version %s\n"), $cmdname, VERSION; + exit(E_SUCCESS); +} + +# +# Verify options compatibility +# +if ($script_mode && $do_cpulist) { + printf STDERR + gettext("%s: options -I and -C can not be used at the same time\n"), + $cmdname; + usage(E_USAGE); +} + +if (($script_mode || $do_cpulist) && + ($do_physical || $do_sharing_only || + $do_tree)) { + printf STDERR + gettext("%s: options -C and -I can not be used with -p -s or -T\n"), + $cmdname; + usage(E_USAGE); +} + +if ($do_physical && $do_sharing_only) { + printf STDERR + gettext("%s: option -p can not be used with -s\n"), $cmdname; + usage(E_USAGE); +} + +if ($do_tree && $do_sharing_only) { + printf STDERR + gettext("%s: option -T can not be used with -s\n"), + $cmdname; + usage(E_USAGE); +} + +if ($verbose && !($script_mode || $do_cpulist || $do_sharing_only)) { + $do_tree = 1; + $do_physical = 1; +} + +# +# Get PG information +# +my $p = Sun::Solaris::Pg->new(-tags => $do_physical, + -retry => RETRY_COUNT, + '-delay' => RETRY_DELAY); + +if (!$p) { + printf STDERR + gettext("%s: can not obtain Processor Group information: $!\n"), + $cmdname; + exit(E_ERROR); +} + +# +# Convert -[Rr] string1,string2,... into list (string1, string2, ...) +# +@sharing_filter = map { split /,/ } @sharing_filter; +@sharing_filter_neg = map { split /,/ } @sharing_filter_neg; + +# +# Get list of all PGs in the system +# +my @all_pgs = $p->all_depth_first(); + +if (scalar(@all_pgs) == 0) { + printf STDERR + gettext("%s: this system does not have any Processor groups\n"), + $cmdname; + exit(E_ERROR); +} + +# +# @pgs is the list of PGs we are going to work with after all the option +# processing +# +my @pgs = @all_pgs; + +# +# get list of all CPUs in the system by looking at the root PG cpus +# +my @all_cpus = $p->cpus($p->root()); + +# +# If there are arguments in the command line, treat them as either PG IDs or as +# CPUs that should be converted to PG IDs. +# Arguments can be specified as x-y x,y,z and use special keyword 'all' +# +if (scalar @ARGV) { + # + # Convert 'all' in arguments to all CPUs or all PGs + # + my @args; + my @all = $do_cpus ? @all_cpus : @all_pgs; + @args = map { $_ eq 'all' ? @all : $_ } @ARGV; + + # Expand any x-y,z ranges + @args = $p->expand(@args); + + if ($do_cpus) { + # @bad_cpus is a list of invalid CPU IDs + my @bad_cpus = $p->set_subtract(\@all_cpus, \@args); + if (scalar @bad_cpus) { + printf STDERR + gettext("%s: Invalid processor IDs %s\n"), + $cmdname, $p->id_collapse(@bad_cpus); + $rc = E_ERROR; + } + # + # List of PGs is the list of any PGs that contain specified CPUs + # + @pgs = grep { + my @cpus = $p->cpus($_); + scalar($p->intersect(\@cpus, \@args)); + } @all_pgs; + } else { + # @pgs is a list of valid CPUs in the arguments + @pgs = $p->intersect(\@all_pgs, \@args); + # @bad_pgs is a list of invalid PG IDs + my @bad_pgs = $p->set_subtract(\@all_pgs, \@args); + if (scalar @bad_pgs) { + printf STDERR + gettext("%s: Invalid PG IDs %s\n"), + $cmdname, $p->id_collapse(@bad_pgs); + $rc = E_ERROR; + } + } +} + +# +# Now we have list of PGs to work with. Now apply filtering. First list only +# those matching -R +# +@pgs = grep { list_match($p->sh_name($_), @sharing_filter) } @pgs if + scalar @sharing_filter; + +# Remove any that doesn't match -r +@pgs = grep { !list_match($p->sh_name($_), @sharing_filter_neg) } @pgs if + scalar @sharing_filter_neg; + +# Do we have any PGs left? +if (scalar(@pgs) == 0) { + printf STDERR + gettext("%s: no processor groups matching command line arguments %s\n"), + $cmdname, "@ARGV"; + exit(E_ERROR); +} + +# +# Global list of PGs that should be excluded from the output - it is only used +# when tree mode is specified. +# +my @exclude_pgs; +if ($do_tree) { + @exclude_pgs = grep { + list_match($p->sh_name($_), @sharing_filter_neg) + } @all_pgs; + + # + # In tree mode add PGs that are in the lineage of given PGs + # + @pgs = pg_lineage($p, @pgs) +} + +# +# -I is specified, print list of all PGs +# +if ($script_mode) { + if (scalar(@pgs)) { + @pgs = sort { $a <=> $b } @pgs; + print "@pgs\n"; + } else { + print "none\n"; + } + exit($rc); +} + +# +# -C is specified, print list of all CPUs belonging to PGs +# +if ($do_cpulist) { + my @cpu_list = $p->uniqsort(map { $p->cpus($_) } @pgs); + print "@cpu_list\n"; + exit($rc); +} + +# Mapping of relationships to list of PGs +my %pgs_by_relationship; + +# Maximum length of all sharing names +my $max_sharename_len = length('RELATIONSHIP'); + +# Maximum length of PG ID +my $max_pg_len = length(max(@pgs)) + 1; + +# +# For calculating proper offsets we need to know minimum and maximum level for +# all PGs +# +my @levels = map { $p->level($_) } @pgs; +my $maxlevel = max(@levels); +my $minlevel = min(@levels); + +# Calculate maximum string length that should be used to represent PGs +foreach my $pg (@pgs) { + my $name = $p->sh_name ($pg) || "unknown"; + my $level = $p->level($pg) || 0; + + if ($do_physical) { + my $tags = $p->tags($pg); + $name = "$name [$tags]" if $tags; + } + + my $length = length($name) + $level - $minlevel; + $max_sharename_len = $length if $length > $max_sharename_len; +} + +if ($do_sharing_only) { + # + # -s - only print sharing relationships + + # Get list of sharing relationships + my @relationships = $p->sharing_relationships(@pgs); + + if ($verbose) { + printf "%-${max_sharename_len}s %s\n", + 'RELATIONSHIP', 'PGs'; + foreach my $rel (@relationships) { + my @pg_rel = grep { $p->sh_name($_) eq $rel } + @pgs; + my $pg_rel = $p->id_collapse (@pg_rel); + $pgs_by_relationship{$rel} = \@pg_rel; + } + } + + foreach my $rel (@relationships) { + printf "%-${max_sharename_len}s", $rel; + if ($verbose) { + my @pgs = @{$pgs_by_relationship{$rel}}; + my $pgs = $p->id_collapse (@pgs); + print ' ', $pgs; + } + print "\n"; + } + + # we are done + exit($rc); +} + +# +# Print PGs either in list form or tree form +# +if (!$do_tree) { + my $header; + + $header = sprintf "%-${max_pg_len}s %-${max_sharename_len}s" . + " %s\n", + 'PG', 'RELATIONSHIP', 'CPUs'; + + print $header; + map { pg_print ($p, $_) } @pgs; +} else { + # + # Construct a tree from PG hierarchy and prune any PGs that are + # specified with -R option + # + my $pg_tree = pg_make_tree($p); + map { pg_remove_from_tree($pg_tree, $_) } @exclude_pgs; + + # Find top-level PGs + my @top_level = grep { + $pg_tree->{$_} && !defined($pg_tree->{$_}->{parent}) + } @pgs; + + # Print each top-level node as ASCII tree + foreach my $pg (@top_level) { + my $children = $pg_tree->{$pg}->{children}; + my @children = $children ? @{$children} : (); + @children = $p->intersect(\@children, \@pgs); + pg_print_tree($p, $pg_tree, $pg, '', '', scalar @children); + } +} + +# We are done! +exit($rc); + +###################################################################### +# Internal functions +# + +# +# pg_print(cookie, pg) +# print PG information in list mode +# +sub pg_print +{ + my $p = shift; + my $pg = shift; + my $sharing = $p->sh_name($pg); + if ($do_physical) { + my $tags = $p->tags($pg); + $sharing = "$sharing [$tags]" if $tags; + } + my $level = $p->level($pg) - $minlevel; + $sharing = (' ' x (LEVEL_OFFSET * $level)) . $sharing; + my $cpus = $p->cpus($pg); + printf "%-${max_pg_len}d %-${max_sharename_len}s", $pg, $sharing; + print " $cpus"; + print "\n"; +} + +# +# pg_showcpus(cookie, pg) +# Print CPUs in the current PG +# +sub pg_showcpus +{ + my $p = shift; + my $pg = shift; + + my @cpus = $p->cpus($pg); + my $ncpus = scalar @cpus; + return 0 unless $ncpus; + my $cpu_string = $p->cpus($pg); + return (($ncpus == 1) ? + "CPU: $cpu_string": + "CPUs: $cpu_string"); +} + +# +# pg_print_node(cookie, pg) +# print PG as ASCII tree node +# +sub pg_print_node +{ + my $p = shift; + my $pg = shift; + + my $sharing = $p->sh_name($pg); + if ($do_physical) { + my $tags = $p->tags($pg); + $sharing = "$sharing [$tags]" if $tags; + } + + print "$pg ($sharing)"; + my $cpus = pg_showcpus($p, $pg); + print " $cpus"; + print "\n"; +} + +# +# pg_print_tree(cookie, tree, pg, prefix, childprefix, npeers) +# print ASCII tree of PGs in the tree +# prefix should be used for the current node, childprefix for children nodes +# npeers is the number of peers of the current node +# +sub pg_print_tree +{ + my $p = shift; + my $pg_tree = shift; + my $pg = shift; + return unless defined ($pg); # done! + my $prefix = shift; + my $childprefix = shift; + my $npeers = shift; + + # Get list of my children + my $children = $pg_tree->{$pg}->{children}; + my @children = $children ? @{$children} : (); + @children = $p->intersect(\@children, \@pgs); + my $nchildren = scalar @children; + + my $printprefix = "$childprefix"; + my $printpostfix = $npeers ? "| " : " "; + + my $bar = $npeers ? "|" : "`"; + + print $childprefix ? $childprefix : ""; + print $prefix ? "$bar" . "-- " : ""; + pg_print_node ($p, $pg); + + my $new_prefix = $npeers ? $prefix : " "; + + # Print the subtree with a new offset, starting from each child + map { + pg_print_tree($p, $pg_tree, $_, "| ", + "$childprefix$new_prefix", --$nchildren) + } @children; +} + +# +# list_match(arg, list) +# Return arg if argument matches any of the elements on the list +# +sub list_match +{ + my $arg = shift; + + return first { $arg =~ m/$_/i } @_; +} + +# +# Make a version of PG parent-children relationships from cookie +# +sub pg_make_tree +{ + my $p = shift; + my $pg_tree = (); + + foreach my $pg ($p->all()) { + my @children = $p->children($pg); + $pg_tree->{$pg}->{parent} = $p->parent($pg); + $pg_tree->{$pg}->{children} = \@children; + } + + return ($pg_tree); +} + +# +# pg_remove_from_tree(tree, pg) +# Prune PG from the tree +# +sub pg_remove_from_tree +{ + my $pg_tree = shift; + my $pg = shift; + my $node = $pg_tree->{$pg}; + return unless $node; + + my @children = @{$node->{children}}; + my $parent = $node->{parent}; + my $parent_node; + + # + # Children have a new parent + # + map { $pg_tree->{$_}->{parent} = $parent } @children; + + # + # All children move to the parent (if there is one) + # + if (defined($parent) && ($parent_node = $pg_tree->{$parent})) { + # + # Merge children from parent and @children list + # + my @parent_children = @{$parent_node->{children}}; + # + # Remove myself from parent children + # + @parent_children = grep { $_ != $pg } @parent_children; + @parent_children = $p->nsort(@parent_children, @children); + $parent_node->{children} = \@parent_children; + } + + # Remove current node + delete $pg_tree->{$pg}; +} + +# +# For a given list of PGs return the full lineage +# +sub pg_lineage +{ + my $p = shift; + return unless scalar @_; + + my @parents = grep { defined($_) } map { $p->parent ($_) } @_; + + return ($p->uniq(@_, @parents, pg_lineage ($p, @parents))); +} + +# +# Print usage information and exit with the return code specified +# +sub usage +{ + my $rc = shift; + printf STDERR + gettext("Usage:\t%s [-T] [-p] [-v] [-r string] [-R string] [pg ... | -c processor_id ...]\n\n"), + $cmdname; + printf STDERR + gettext("\t%s -s [-v] [-r string] [-R string] [pg ... | -c processor_id ...]\n\n"), $cmdname; + printf STDERR gettext("\t%s -C | -I [-r string] [-R string] [pg ... | -c processor_id ...]\n\n"), + $cmdname; + printf STDERR gettext("\t%s -h\n\n"), $cmdname; + + exit($rc); +} + +__END__ diff --git a/usr/src/cmd/pgstat/Makefile b/usr/src/cmd/pgstat/Makefile new file mode 100644 index 0000000000..9b66b76e37 --- /dev/null +++ b/usr/src/cmd/pgstat/Makefile @@ -0,0 +1,45 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. +# + +PROG= pgstat + +include ../Makefile.cmd + +TARGET= all + +.KEEP_STATE: + +all: $(PROG) + +install: all .WAIT $(ROOTPROG) + +clean: + +$(ROOTBINPROG): $(PROG) + $(INS.file) + +lint: + +include ../Makefile.targ diff --git a/usr/src/cmd/pgstat/pgstat.pl b/usr/src/cmd/pgstat/pgstat.pl new file mode 100644 index 0000000000..faa7099c7d --- /dev/null +++ b/usr/src/cmd/pgstat/pgstat.pl @@ -0,0 +1,1018 @@ +#! /usr/perl5/bin/perl +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. +# + +# +# pgstat - tool for displaying Processor Group statistics +# + +use warnings; +use strict; +use File::Basename; +use List::Util qw(first max min); +use Errno; +use POSIX qw(locale_h strftime); +use Getopt::Long qw(:config no_ignore_case bundling auto_version); +use Sun::Solaris::Utils qw(textdomain gettext); +use Sun::Solaris::Pg; + +# +# Constants section +# +# It is possible that wnen trying to parse PG kstats, PG generation changes +# which will cause PG new method to fail with errno set to EAGAIN In this case +# we retry open up to RETRY_COUNT times pausing RETRY_DELAY seconds between each +# retry. +# +# When printing PGs we print them as a little tree with each PG shifted by +# LEVEL_OFFSET from each parent. For example: +# +# PG RELATIONSHIP CPUs +# 0 System 0-7 +# 3 Socket 0 2 4 6 +# 2 Cache 0 2 4 6 +# +# +# DEFAULT_INTERVAL - interval in seconds between snapshot if none is specified +# DEFAULT_COUNT - Number of iterations if none is specified +# HWLOAD_UNKNOWN - Value that we use to represent unknown hardware load +# HWLOAD_UNDEF - Value that we use to represent undefined hardware load +# +use constant { + VERSION => 1.1, + DEFAULT_INTERVAL => 1, + DEFAULT_COUNT => 1, + RETRY_COUNT => 4, + RETRY_DELAY => 0.25, + HWLOAD_UNKNOWN => -1, + HWLOAD_UNDEF => -2, + LEVEL_OFFSET => 1, +}; + +# +# Format for fields, showing percentage headers +# +my $pcnt_fmt = "%6s"; +# +# Format for percentages field +# +my $pcnt = "%5.1f"; + +# +# Return codes +# +# 0 Successful completion. +# +# 1 An error occurred. +# +# 2 Invalid command-line options were specified. +# +use constant { + E_SUCCESS => 0, + E_ERROR => 1, + E_USAGE => 2, +}; + +# +# Valid sort keys for -s and -S options +# +my @sort_keys = qw(pg hwload swload user sys idle depth breadth); + +# Set message locale +setlocale(LC_ALL, ""); +textdomain(TEXT_DOMAIN); + +# Get script name for error messages +our $cmdname = basename($0, ".pl"); + +my @pg_list; # -P pg,... - PG arguments +my @cpu_list; # -c cpu,... - CPU arguments +my @sharing_filter_neg; # -R string,... - Prune PGs +my @sharing_filter; # -r string,... - Matching sharing names +my $do_aggregate; # -A - Show summary in the end +my $do_cpu_utilization; # -C - Show per-CPU utilization +my $do_physical; # -p - Show physical relationships +my $do_timestamp; # -T - Print timestamp +my $do_usage; # -h - Show usage +my $do_version; # -V - Verbose output +my $show_top; # -t - show top N +my $sort_order_a; # -S key - Ascending sort order +my $sort_order_d; # -s key - Descending sort order +my $verbose; # -v - Verbose output; + +$verbose = 0; + +# Parse options from the command line +GetOptions("aggregate|A" => \$do_aggregate, + "cpus|c=s" => \@cpu_list, + "showcpu|C" => \$do_cpu_utilization, + "help|h|?" => \$do_usage, + "pgs|P=s" => \@pg_list, + "physical|p" => \$do_physical, + "relationship|r=s" => \@sharing_filter, + "norelationship|R=s" => \@sharing_filter_neg, + "sort|s=s" => \$sort_order_d, + "Sort|S=s" => \$sort_order_a, + "top|t=i" => \$show_top, + "timestamp|T=s" => \$do_timestamp, + "version|V" => \$do_version, + "verbose+" => \$verbose, + "v+" => \$verbose, +) || usage(E_USAGE); + +# Print usage message when -h is given +usage(E_SUCCESS) if $do_usage; + +if ($do_version) { + printf gettext("%s version %s\n"), $cmdname, VERSION; + exit(E_SUCCESS); +} + +# +# Verify options +# +# -T should have either u or d argument +if (defined($do_timestamp) && !($do_timestamp eq 'u' || $do_timestamp eq 'd')) { + printf STDERR gettext("%s: Invalid -T %s argument\n"), + $cmdname, $do_timestamp; + usage(E_USAGE); +} + +if ($sort_order_a && $sort_order_d) { + printf STDERR gettext("%s: -S and -s flags can not be used together\n"), + $cmdname; + usage(E_USAGE); +} + +if (defined ($show_top) && $show_top <= 0) { + printf STDERR gettext("%s: -t should specify positive integer\n"), + $cmdname; + usage(E_USAGE); +} + +# +# Figure out requested sorting of the output +# By default 'depth-first' is used +# +my $sort_key; +my $sort_reverse; + +if (!($sort_order_a || $sort_order_d)) { + $sort_key = 'depth'; + $sort_reverse = 1; +} else { + $sort_key = $sort_order_d || $sort_order_a; + $sort_reverse = defined($sort_order_d); +} + +# +# Make sure sort key is valid +# +if (!list_match($sort_key, \@sort_keys, 1)) { + printf STDERR gettext("%s: invalid sort key %s\n"), + $cmdname, $sort_key; + usage(E_USAGE); +} + +# +# Convert -[Rr] string1,string2,... into list (string1, string2, ...) +# +@sharing_filter = map { split /,/ } @sharing_filter; +@sharing_filter_neg = map { split /,/ } @sharing_filter_neg; + +# +# We use two PG snapshot to compare utilization between them. One snapshot is +# kept behind another in time. +# +my $p = Sun::Solaris::Pg->new(-cpudata => $do_cpu_utilization, + -swload => 1, + -tags => $do_physical, + -retry => RETRY_COUNT, + -delay => RETRY_DELAY); + +if (!$p) { + printf STDERR + gettext("%s: can not obtain Processor Group information: $!\n"), + $cmdname; + exit(E_ERROR); +} + +my $p_initial = $p; +my $p_dup = Sun::Solaris::Pg->new(-cpudata => $do_cpu_utilization, + -swload => 1, + -tags => $do_physical, + -retry => RETRY_COUNT, + -delay => RETRY_DELAY); + +if (!$p_dup) { + printf STDERR + gettext("%s: can not obtain Processor Group information: $!\n"), + $cmdname; + exit(E_ERROR); +} + +# +# Get interval and count +# +my $count = DEFAULT_COUNT; +my $interval = DEFAULT_INTERVAL; + +if (scalar @ARGV > 0) { + $interval = shift @ARGV; + if (scalar @ARGV > 0) { + $count = $ARGV[0]; + } else { + $count = 0; + } +} + +if (! ($interval=~ m/^\d+\.?\d*$/)) { + printf STDERR + gettext("%s: Invalid interval %s - should be numeric\n"), + $cmdname, $interval; + usage(E_USAGE); +} + +if ($count && ! ($count=~ m/^\d+$/)) { + printf STDERR + gettext("%s: Invalid count %s - should be numeric\n"), + $cmdname, $count; + usage(E_USAGE); +} + +my $infinite = 1 unless $count; + +# +# Get list of all PGs +# +my @all_pgs = $p->all_depth_first(); + +# +# get list of all CPUs in the system by looking at the root PG cpus +# +my @all_cpus = $p->cpus($p->root()); + +# PGs to work with +my @pgs = @all_pgs; + +my $rc = E_SUCCESS; + +# +# Convert CPU and PG lists into proper Perl lists, converting things like +# 1-3,5 into (1, 2, 3, 5). Also convert 'all' into the list of all CPUs or PGs +# +@cpu_list = + map { $_ eq 'all' ? @all_cpus : $_ } # all -> (cpu1, cpu2, ...) + map { split /,/ } @cpu_list; # x,y -> (x, y) + +@cpu_list = $p->expand(@cpu_list); # 1-3 -> 1 2 3 + +# Same drill for PGs +@pg_list = + map { $_ eq 'all' ? @all_pgs : $_ } + map { split /,/ } @pg_list; + +@pg_list = $p->expand(@pg_list); + +# +# Convert CPU list to list of PGs +# +if (scalar @cpu_list) { + + # + # Warn about any invalid CPU IDs in the arguments + # @bad_cpus is a list of invalid CPU IDs + # + my @bad_cpus = $p->set_subtract(\@all_cpus, \@cpu_list); + if (scalar @bad_cpus) { + printf STDERR + gettext("%s: Invalid processor IDs %s\n"), + $cmdname, $p->id_collapse(@bad_cpus); + $rc = E_ERROR; + } + + # + # Find all PGs which have at least some CPUs from @cpu_list + # + my @pgs_from_cpus = grep { + my @cpus = $p->cpus($_); + scalar($p->intersect(\@cpus, \@cpu_list)); + } @all_pgs; + + # Combine PGs from @pg_list (if any) with PGs we found + @pg_list = (@pg_list, @pgs_from_cpus); +} + +# +# If there are any PGs specified by the user, complain about invalid ones +# +@pgs = get_pg_list($p, \@pg_list, \@sharing_filter, \@sharing_filter_neg); + +if (scalar @pg_list > 0) { + # + # Warn about any invalid PG + # @bad_pgs is a list of invalid CPUs in the arguments + # + my @bad_pgs = $p->set_subtract(\@all_pgs, \@pg_list); + if (scalar @bad_pgs) { + printf STDERR + gettext("%s: warning: invalid PG IDs %s\n"), + $cmdname, $p->id_collapse(@bad_pgs); + } +} + +# Do we have any PGs left? +if (scalar(@pgs) == 0) { + printf STDERR + gettext("%s: No processor groups matching command line arguments\n"), + $cmdname; + exit(E_USAGE); +} + +# +# Set $do_levels if we should provide output identation by level It doesn't make +# sense to provide identation if PGs are sorted not in topology order. +# +my $do_levels = ($sort_key eq 'breadth' || $sort_key eq 'depth'); + +# +# %name_of_pg hash keeps sharing name, possibly with physical tags appended to +# it for each PG. +# +my %name_of_pg; + +# +# For calculating proper offsets we need to know minimum and maximum level for +# all PGs +# +my $max_sharename_len = length('RELATIONSHIP'); + +my $maxlevel; +my $minlevel; + +if ($do_levels) { + my @levels = map { $p->level($_) } @pgs; # Levels for each PG + $maxlevel = max(@levels); + $minlevel = min(@levels); +} + +# +# Walk over all PGs and find out the string length that we need to represent +# sharing name + physical tags + indentation level. +# +foreach my $pg (@pgs) { + my $name = $p->sh_name ($pg) || "unknown"; + my $level = $p->level($pg) || 0 if $do_levels; + + if ($do_physical) { + my $tags = $p->tags($pg); + $name = "$name [$tags]" if $tags; + $name_of_pg{$pg} = $name; + } + + $name_of_pg{$pg} = $name; + my $length = length($name); + $length += $level - $minlevel if $do_levels; + $max_sharename_len = $length if $length > $max_sharename_len; +} + +# Maximum length of PG ID field +my $max_pg_len = length(max(@pgs)) + 1; +$max_pg_len = length('PG') if ($max_pg_len) < length('PG'); + +# +# +# %pgs hash contains various statistics per PG that is used for sorting. +my %pgs; + +# Total number of main loop iterations we actually do +my $total_iterations = 0; + +# +# For summary, keep track of minimum and maximum data per PG +# +my $history; + +# +# Provide summary output when aggregation is requested and user hits ^C +# +$SIG{'INT'} = \&print_totals if $do_aggregate; + +###################################################################### +# Main loop +########### + +while ($infinite || $count--) { + # + # Print timestamp if -T is specified + # + if ($do_timestamp) { + if ($do_timestamp eq 'u') { + print time(), "\n"; + } else { + my $date_str = strftime "%A, %B %e, %Y %r %Z", + localtime; + print "$date_str\n"; + } + } + + # + # Wait for the requested interval + # + select(undef, undef, undef, $interval); + + # + # Print headers + # There are two different output formats - one regular and one verbose + # + if (!$verbose) { + printf "%-${max_pg_len}s %-${max_sharename_len}s ". + "$pcnt_fmt $pcnt_fmt %-s\n", + 'PG', 'RELATIONSHIP', 'HW', 'SW', 'CPUS'; + } else { + printf "%-${max_pg_len}s %-${max_sharename_len}s" . + " $pcnt_fmt %4s %4s $pcnt_fmt $pcnt_fmt $pcnt_fmt $pcnt_fmt %s\n", + 'PG','RELATIONSHIP', + 'HW', 'UTIL', 'CAP', + 'SW', 'USR', 'SYS', 'IDLE', 'CPUS'; + } + + # + # Update the data in one of the snapshots + # + $p_dup->update(); + + # + # Do not show offlined CPUs + # + my @online_cpus = $p->online_cpus(); + + # + # Check whether both snapshots belong to the same generation + # + if ($p->generation() != $p_dup->generation()) { + printf gettext("Configuration changed!\n"); + # Swap $p and $p_dup; + $p = $p_dup; + $p_dup = Sun::Solaris::Pg->new( + -cpudata => $do_cpu_utilization, + -swload => 1, + -tags => $do_physical, + -retry => RETRY_COUNT, + -delay => RETRY_DELAY); + if (!$p_dup) { + printf STDERR gettext( + "%s: can not obtain Processor Group information: $!\n"), + $cmdname; + exit(E_ERROR); + } + # + # Recreate @pg_list since it may have changed + # + @pgs = get_pg_list($p, \@pg_list, + \@sharing_filter, \@sharing_filter_neg); + + next; + } + + %pgs = (); + + # + # Go over each PG and gets its utilization data + # + foreach my $pg (@pgs) { + my ($hwload, $utilization, $capacity, $accuracy) = + get_load($p, $p_dup, $pg); + my @cpus = $p->cpus ($pg); + my ($user, $sys, $idle, $swload) = + $p->sw_utilization($p_dup, $pg); + + # Adjust idle and swload based on rounding + ($swload, $idle) = get_swload($user, $sys); + + $pgs{$pg}->{pg} = $pg; + $pgs{$pg}->{hwload} = $hwload; + $pgs{$pg}->{swload} = $swload; + $pgs{$pg}->{user} = $user; + $pgs{$pg}->{sys} = $sys; + $pgs{$pg}->{idle} = $idle; + $pgs{$pg}->{utilization} = $utilization; + $pgs{$pg}->{capacity} = $capacity; + + # + # Record history + # + $history->{$pg}->{hwload} += $hwload if $hwload && $hwload >= 0; + $history->{$pg}->{swload} += $swload if $swload; + $history->{$pg}->{user} += $user if $user; + $history->{$pg}->{sys} += $sys if $sys; + $history->{$pg}->{idle} += $idle if $idle; + $history->{$pg}->{maxhwload} = $hwload if + !defined($history->{$pg}->{maxhwload}) || + $hwload > $history->{$pg}->{maxhwload}; + $history->{$pg}->{minhwload} = $hwload if + !defined($history->{$pg}->{minhwload}) || + $hwload < $history->{$pg}->{minhwload}; + $history->{$pg}->{maxswload} = $swload if + !defined($history->{$pg}->{maxswload}) || + $swload > $history->{$pg}->{maxswload}; + $history->{$pg}->{minswload} = $swload if + !defined($history->{$pg}->{minswload}) || + $swload < $history->{$pg}->{minswload}; + } + + # + # Sort the output + # + my @sorted_pgs; + my $npgs = scalar @pgs; + @sorted_pgs = pg_sort_by_key(\%pgs, $sort_key, $sort_reverse, @pgs); + + # + # Should only top N be displayed? + # + if ($show_top) { + $npgs = $show_top if $show_top < $npgs; + @sorted_pgs = @sorted_pgs[0..$npgs - 1]; + } + + # + # Now print everything + # + foreach my $pg (@sorted_pgs) { + my $shname = $name_of_pg{$pg}; + my $level; + + if ($do_levels) { + $level = $p->level($pg) - $minlevel; + $shname = (' ' x (LEVEL_OFFSET * $level)) . $shname; + } + + my $hwload = $pgs{$pg}->{hwload} || 0; + my $swload = $pgs{$pg}->{swload}; + + my @cpus = $p->cpus($pg); + @cpus = $p->intersect(\@cpus, \@online_cpus); + + my $cpus = $p->id_collapse(@cpus); + my $user = $pgs{$pg}->{user}; + my $sys = $pgs{$pg}->{sys}; + my $idle = $pgs{$pg}->{idle}; + my $utilization = $pgs{$pg}->{utilization}; + my $capacity = $pgs{$pg}->{capacity}; + + if (!$verbose) { + printf "%${max_pg_len}d %-${max_sharename_len}s " . + "%s %s %s\n", + $pg, $shname, + load2str($hwload), + load2str($swload), + $cpus; + } else { + printf + "%${max_pg_len}d %-${max_sharename_len}s " . + "%4s %4s %4s %4s %4s %4s %4s %s\n", + $pg, $shname, + load2str($hwload), + number_to_scaled_string($utilization), + number_to_scaled_string($capacity), + load2str($swload), + load2str($user), + load2str($sys), + load2str($idle), + $cpus; + } + + # + # If per-CPU utilization is requested, print it after each + # corresponding PG + # + if ($do_cpu_utilization) { + my $w = ${max_sharename_len} - length ('CPU'); + foreach my $cpu (sort {$a <=> $b } @cpus) { + my ($cpu_utilization, + $accuracy, $hw_utilization, + $swload) = + $p->cpu_utilization($p_dup, $pg, $cpu); + next unless defined $cpu_utilization; + my $cpuname = "CPU$cpu"; + if ($do_levels) { + $cpuname = + (' ' x (LEVEL_OFFSET * $level)) . + $cpuname; + + } + + printf "%-${max_pg_len}s " . + "%-${max_sharename_len}s ", + ' ', $cpuname; + if ($verbose) { + printf "%s %4s %4s\n", + load2str($cpu_utilization), + number_to_scaled_string($hw_utilization), + number_to_scaled_string($capacity); + } else { + printf "%s %s\n", + load2str($cpu_utilization), + load2str($swload); + } + } + } + } + + # + # Swap $p and $p_dup + # + ($p, $p_dup) = ($p_dup, $p); + + $total_iterations++; +} + +print_totals() if $do_aggregate; + + +#################################### +# End of main loop +#################################### + + +# +# Support Subroutines +# + +# +# Print aggregated information in the end +# +sub print_totals +{ + exit ($rc) unless $total_iterations > 1; + + printf gettext("\n%s SUMMARY: UTILIZATION OVER %d SECONDS\n\n"), + ' ' x 10, + $total_iterations * $interval; + + my @sorted_pgs; + my $npgs = scalar @pgs; + + %pgs = (); + + # + # Collect data per PG + # + foreach my $pg (@pgs) { + $pgs{$pg}->{pg} = $pg; + + my ($hwload, $utilization, $capacity, $accuracy) = + get_load($p_initial, $p_dup, $pg); + + my @cpus = $p->cpus ($pg); + my ($user, $sys, $idle, $swload) = + $p_dup->sw_utilization($p_initial, $pg); + + # Adjust idle and swload based on rounding + ($swload, $idle) = get_swload($user, $sys); + + $pgs{$pg}->{pg} = $pg; + $pgs{$pg}->{swload} = $swload; + $pgs{$pg}->{user} = $user; + $pgs{$pg}->{sys} = $sys; + $pgs{$pg}->{idle} = $idle; + $pgs{$pg}->{hwload} = $hwload; + $pgs{$pg}->{utilization} = number_to_scaled_string($utilization); + $pgs{$pg}->{capacity} = number_to_scaled_string($capacity); + $pgs{$pg}->{minhwload} = $history->{$pg}->{minhwload}; + $pgs{$pg}->{maxhwload} = $history->{$pg}->{maxhwload}; + $pgs{$pg}->{minswload} = $history->{$pg}->{minswload} || 0; + $pgs{$pg}->{maxswload} = $history->{$pg}->{maxswload} || 0; + } + + # + # Sort PGs according to the sorting options + # + @sorted_pgs = pg_sort_by_key(\%pgs, $sort_key, $sort_reverse, @pgs); + + # + # Trim to top N if needed + # + if ($show_top) { + $npgs = $show_top if $show_top < $npgs; + @sorted_pgs = @sorted_pgs[0..$npgs - 1]; + } + + # + # Print headers + # + my $d = ' ' . '-' x 4; + if ($verbose) { + printf "%${max_pg_len}s %-${max_sharename_len}s %s " . + " ------HARDWARE------ ------SOFTWARE------\n", + ' ', ' ', ' ' x 8; + + printf "%-${max_pg_len}s %-${max_sharename_len}s", + 'PG', 'RELATIONSHIP'; + + printf " %4s %4s", 'UTIL', ' CAP'; + printf " $pcnt_fmt $pcnt_fmt $pcnt_fmt $pcnt_fmt $pcnt_fmt $pcnt_fmt %s\n", + 'MIN', 'AVG', 'MAX', 'MIN', 'AVG', 'MAX', 'CPUS'; + } else { + printf "%${max_pg_len}s %-${max_sharename_len}s " . + "------HARDWARE------" . + " ------SOFTWARE------\n", ' ', ' '; + + printf "%-${max_pg_len}s %-${max_sharename_len}s", + 'PG', 'RELATIONSHIP'; + + printf " $pcnt_fmt $pcnt_fmt $pcnt_fmt $pcnt_fmt $pcnt_fmt $pcnt_fmt %s\n", + 'MIN', 'AVG', 'MAX', 'MIN', 'AVG', 'MAX', 'CPUS'; + } + + # + # Print information per PG + # + foreach my $pg (@sorted_pgs) { + my $cpus = $p->cpus($pg); + + my $shname = $name_of_pg{$pg}; + if ($sort_key eq 'breadth' || $sort_key eq 'depth') { + my $level = $p->level($pg) - $minlevel; + $shname = (' ' x (LEVEL_OFFSET * $level)) . $shname; + } + + printf "%${max_pg_len}d %-${max_sharename_len}s ", + $pg, $shname; + + if ($verbose) { + printf "%4s %4s ", + number_to_scaled_string($pgs{$pg}->{utilization}), + number_to_scaled_string($pgs{$pg}->{capacity}); + } + + if (!defined($pgs{$pg}->{hwload}) || + $pgs{$pg}->{hwload} == HWLOAD_UNDEF) { + printf "$pcnt_fmt $pcnt_fmt $pcnt_fmt ", + '-', '-', '-'; + } else { + printf "%s %s %s ", + load2str($pgs{$pg}->{minhwload}), + load2str($pgs{$pg}->{hwload}), + load2str($pgs{$pg}->{maxhwload}); + } + printf "%s %s %s", + load2str($pgs{$pg}->{minswload}), + load2str($pgs{$pg}->{swload}), + load2str($pgs{$pg}->{maxswload}); + + printf " %s\n", $cpus; + } + + exit ($rc); +} + +# +# pg_sort_by_key(pgs, key, inverse) +# Sort pgs according to the key specified +# +# Arguments: +# pgs hash indexed by PG ID +# sort keyword +# inverse - inverse sort result if this is T +# +sub pg_sort_by_key +{ + my $pgs = shift; + my $key = shift; + my $inverse = shift; + my @sorted; + + if ($key eq 'depth' || $key eq 'breadth') { + my $root = $p->root; + my @pgs = $key eq 'depth' ? + $p->all_depth_first() : + $p->all_breadth_first(); + @sorted = reverse(grep { exists($pgs{$_}) } @pgs); + } else { + @sorted = sort { $pgs{$a}->{$key} <=> $pgs{$b}->{$key} } @_; + } + + return ($inverse ? reverse(@sorted) : @sorted); +} + +# +# Convert numeric load to formatted string +# +sub load2str +{ + my $load = shift; + + return (sprintf "$pcnt_fmt", '-') if + !defined($load) || $load == HWLOAD_UNDEF; + return (sprintf "$pcnt_fmt", '?') if $load == HWLOAD_UNKNOWN; + return (sprintf "$pcnt%%", $load); +} + +# +# get_load(snapshot1, snapshot2, pg) +# +# Get various hardware load data for the given PG using two snapshots. +# Arguments: two PG snapshots and PG ID +# +# In scalar context returns the hardware load +# In list context returns a list +# (load, utilization, capacity, accuracy) +# +sub get_load +{ + my $p = shift; + my $p_dup = shift; + my $pg = shift; + + return HWLOAD_UNDEF if !$p->has_utilization($pg); + + my ($capacity, $utilization, $accuracy, $tdelta); + + + $accuracy = 100; + $utilization = 0; + + $utilization = $p->utilization($p_dup, $pg) || 0; + $capacity = $p_dup->capacity($pg); + $accuracy = $p->accuracy($p_dup, $pg) || 0; + $tdelta = $p->tdelta($p_dup, $pg); + my $utilization_per_second = $utilization; + $utilization_per_second /= $tdelta if $tdelta; + + my $load; + + if ($accuracy != 100) { + $load = HWLOAD_UNKNOWN; + } else { + $load = $capacity ? + $utilization_per_second * 100 / $capacity : + HWLOAD_UNKNOWN; + $capacity *= $tdelta if $tdelta; + } + + return (wantarray() ? + ($load, $utilization, $capacity, $accuracy) : + $load); +} + +# +# Make sure that with the rounding used, user + system + swload add up to 100%. +# +# +sub get_swload +{ + my $user = shift; + my $sys = shift; + my $swload; + my $idle; + + $user = sprintf "$pcnt", $user; + $sys = sprintf "$pcnt", $sys; + + $swload = $user + $sys; + $idle = 100 - $swload; + + return ($swload, $idle); +} + +# +# get_pg_list(cookie, pg_list, sharing_filter, sharing_filter_neg) Get list OF +# PGs to look at based on all PGs available, user-specified PGs and +# user-specified filters. +# +sub get_pg_list +{ + my $p = shift; + my $pg_list = shift; + my $sharing_filter = shift; + my $sharing_filter_neg = shift; + + my @all = $p->all(); + my @pg_list = scalar @$pg_list ? @$pg_list : @all; + my @pgs = $p->intersect(\@all_pgs, \@pg_list); + + # + # Now we have list of PGs to work with. Now apply filtering. First list + # only those matching -R + # + @pgs = grep { list_match($p->sh_name($_), \@sharing_filter, 0) } @pgs if + @sharing_filter; + + my @sharing_filter = @$sharing_filter; + my @sharing_filter_neg = @$sharing_filter_neg; + # Remove any that doesn't match -r + @pgs = grep { + !list_match($p->sh_name($_), \@sharing_filter_neg, 0) + } @pgs if + scalar @sharing_filter_neg; + + return (@pgs); +} + +# +# usage(rc) +# +# Print short usage message and exit with the given return code. +# If verbose is T, print a bit more information +# +sub usage +{ + my $rc = shift || E_SUCCESS; + + printf STDERR + gettext("Usage:\t%s [-A] [-C] [-p] [-s key | -S key] " . + "[-t number] [-T u | d]\n"), $cmdname; + print STDERR + gettext("\t\t[-r string] [-R string] [-P pg ...] [-c processor_id... ]\n"); + print STDERR + gettext("\t\t[interval [count]]\n\n"); + + exit ($rc); +} + +# +# list_match(val, list_ref, strict) +# Return T if argument matches any of the elements on the list, undef otherwise. +# +sub list_match +{ + my $arg = shift; + my $list = shift; + my $strict = shift; + + return first { $arg eq $_ } @$list if $strict; + return first { $arg =~ m/$_/i } @$list; +} + +# +# Convert a number to a string representation +# The number is scaled down until it is small enough to be in a good +# human readable format i.e. in the range 0 thru 1000. +# If it's smaller than 10 there's room enough to provide one decimal place. +# +sub number_to_scaled_string +{ + my $number = shift; + + return '-' unless defined ($number); + + # Remove any trailing spaces + $number =~ s/ //g; + + return $number unless $number =~ /^[.\d]+$/; + + my $scale = 1000; + + return sprintf("%4d", $number) if $number < $scale; + + my @measurement = ('K', 'M', 'B', 'T'); + my $uom = shift(@measurement); + my $result; + + my $save = $number; + + # Get size in K. + $number /= $scale; + + while (($number >= $scale) && $uom ne 'B') { + $uom = shift(@measurement); + $save = $number; + $number /= $scale; + } + + # check if we should output a decimal place after the point + if ($save && (($save / $scale) < 10)) { + $result = sprintf("%3.1f$uom", $save / $scale); + } else { + $result = sprintf("%3d$uom", $number); + } + + return ("$result"); +} + + +__END__ diff --git a/usr/src/lib/libsecdb/exec_attr.txt b/usr/src/lib/libsecdb/exec_attr.txt index 2b6e991979..afa14f719e 100644 --- a/usr/src/lib/libsecdb/exec_attr.txt +++ b/usr/src/lib/libsecdb/exec_attr.txt @@ -161,6 +161,9 @@ Maintenance and Repair:suser:cmd:::/usr/sbin/reboot:uid=0 Maintenance and Repair:suser:cmd:::/usr/sbin/syslogd:euid=0 Maintenance and Repair:suser:cmd:::/sbin/bootadm:euid=0 Maintenance and Repair:solaris:cmd:::/usr/sbin/ucodeadm:privs=all +Maintenance and Repair:solaris:cmd:::/usr/sbin/cpustat:privs=basic,cpc_cpu +Maintenance and Repair:solaris:cmd:::/usr/bin/pgstat:privs=basic,cpc_cpu +Maintenance and Repair:solaris:cmd:::/usr/bin/kstat:privs=basic,cpc_cpu Media Backup:suser:cmd:::/usr/bin/mt:euid=0 Media Backup:suser:cmd:::/usr/lib/fs/ufs/ufsdump:euid=0;gid=sys Media Backup:suser:cmd:::/usr/sbin/tar:euid=0 diff --git a/usr/src/pkg/manifests/runtime-perl-510-module-sun-solaris.mf b/usr/src/pkg/manifests/runtime-perl-510-module-sun-solaris.mf index a49e15c435..ae411adc74 100644 --- a/usr/src/pkg/manifests/runtime-perl-510-module-sun-solaris.mf +++ b/usr/src/pkg/manifests/runtime-perl-510-module-sun-solaris.mf @@ -61,6 +61,7 @@ dir path=usr/perl5/5.10.0/lib/PLAT-solaris-64int/auto/Sun/Solaris/Intrs dir path=usr/perl5/5.10.0/lib/PLAT-solaris-64int/auto/Sun/Solaris/Kstat dir path=usr/perl5/5.10.0/lib/PLAT-solaris-64int/auto/Sun/Solaris/Lgrp dir path=usr/perl5/5.10.0/lib/PLAT-solaris-64int/auto/Sun/Solaris/PerlGcc +dir path=usr/perl5/5.10.0/lib/PLAT-solaris-64int/auto/Sun/Solaris/Pg dir path=usr/perl5/5.10.0/lib/PLAT-solaris-64int/auto/Sun/Solaris/Privilege dir path=usr/perl5/5.10.0/lib/PLAT-solaris-64int/auto/Sun/Solaris/Project dir path=usr/perl5/5.10.0/lib/PLAT-solaris-64int/auto/Sun/Solaris/Task @@ -134,6 +135,7 @@ file \ path=usr/perl5/5.10.0/lib/PLAT-solaris-64int/auto/Sun/Solaris/Utils/Utils.so file path=usr/perl5/5.10.0/lib/Sun/Solaris/BSM/_BSMparse.pm file path=usr/perl5/5.10.0/lib/Sun/Solaris/PerlGcc/Config.pm +file path=usr/perl5/5.10.0/lib/Sun/Solaris/Pg.pm license cr_Sun license=cr_Sun license usr/src/cmd/perl/THIRDPARTYLICENSE \ license=usr/src/cmd/perl/THIRDPARTYLICENSE diff --git a/usr/src/pkg/manifests/runtime-perl-584.mf b/usr/src/pkg/manifests/runtime-perl-584.mf index cb917fb8a7..d784e76034 100644 --- a/usr/src/pkg/manifests/runtime-perl-584.mf +++ b/usr/src/pkg/manifests/runtime-perl-584.mf @@ -336,6 +336,7 @@ file path=usr/perl5/5.8.4/lib/Sun/Solaris/Exacct/Object.pm mode=0444 file path=usr/perl5/5.8.4/lib/Sun/Solaris/Intrs.pm mode=0444 file path=usr/perl5/5.8.4/lib/Sun/Solaris/Kstat.pm mode=0444 file path=usr/perl5/5.8.4/lib/Sun/Solaris/Lgrp.pm mode=0444 +file path=usr/perl5/5.8.4/lib/Sun/Solaris/Pg.pm mode=0444 file path=usr/perl5/5.8.4/lib/Sun/Solaris/Privilege.pm mode=0444 file path=usr/perl5/5.8.4/lib/Sun/Solaris/Project.pm mode=0444 file path=usr/perl5/5.8.4/lib/Sun/Solaris/Task.pm mode=0444 diff --git a/usr/src/pkg/manifests/system-extended-system-utilities.mf b/usr/src/pkg/manifests/system-extended-system-utilities.mf index dca0a84c9b..3bda6123fb 100644 --- a/usr/src/pkg/manifests/system-extended-system-utilities.mf +++ b/usr/src/pkg/manifests/system-extended-system-utilities.mf @@ -117,6 +117,8 @@ file path=usr/bin/newform mode=0555 file path=usr/bin/news mode=0555 file path=usr/bin/nl mode=0555 file path=usr/bin/pack mode=0555 +file path=usr/bin/pginfo mode=0555 +file path=usr/bin/pgstat mode=0555 file path=usr/bin/sdiff mode=0555 file path=usr/bin/spell mode=0555 file path=usr/bin/split mode=0555 diff --git a/usr/src/uts/common/disp/cmt.c b/usr/src/uts/common/disp/cmt.c index a5f1a52e34..3aa3b67e29 100644 --- a/usr/src/uts/common/disp/cmt.c +++ b/usr/src/uts/common/disp/cmt.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/systm.h> @@ -256,7 +255,7 @@ pg_cmt_policy(pghw_type_t hw) case PGHW_CHIP: return (CMT_BALANCE); case PGHW_CACHE: - return (CMT_AFFINITY); + return (CMT_AFFINITY | CMT_BALANCE); case PGHW_POW_ACTIVE: case PGHW_POW_IDLE: return (CMT_BALANCE); @@ -1470,6 +1469,11 @@ pg_cmt_prune(pg_cmt_t *pg_bad, pg_cmt_t **lineage, int *sz, cpu_pg_t *pgdata) ASSERT(MUTEX_HELD(&cpu_lock)); + /* + * Inform pghw layer that this PG is pruned. + */ + pghw_cmt_fini((pghw_t *)pg_bad); + hw = ((pghw_t *)pg_bad)->pghw_hw; if (hw == PGHW_POW_ACTIVE) { diff --git a/usr/src/uts/common/os/cap_util.c b/usr/src/uts/common/os/cap_util.c index 16ff7f45fd..8eca9bf0a6 100644 --- a/usr/src/uts/common/os/cap_util.c +++ b/usr/src/uts/common/os/cap_util.c @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -111,12 +110,14 @@ #include <sys/cmn_err.h> #include <sys/cpuvar.h> #include <sys/ddi.h> +#include <sys/systm.h> #include <sys/disp.h> #include <sys/sdt.h> #include <sys/sunddi.h> #include <sys/thread.h> #include <sys/pghw.h> #include <sys/cmt.h> +#include <sys/policy.h> #include <sys/x_call.h> #include <sys/cap_util.h> @@ -226,6 +227,8 @@ static kcpc_request_list_t *cu_cpc_reqs = NULL; * * cu_cpu_id CPU ID for this kstat * + * cu_pg_id PG ID for this kstat + * * cu_generation Generation value that increases whenever any CPU goes * offline or online. Two kstat snapshots for the same * CPU may only be compared if they have the same @@ -245,25 +248,29 @@ static kcpc_request_list_t *cu_cpc_reqs = NULL; * cu_cpu_rate Utilization rate, expressed in operations per second. * * cu_cpu_rate_max Maximum observed value of utilization rate. + * + * cu_cpu_relationship Name of sharing relationship for the PG in this kstat */ struct cu_cpu_kstat { kstat_named_t cu_cpu_id; - kstat_named_t cu_generation; kstat_named_t cu_pg_id; + kstat_named_t cu_generation; kstat_named_t cu_cpu_util; kstat_named_t cu_cpu_time_running; kstat_named_t cu_cpu_time_stopped; kstat_named_t cu_cpu_rate; kstat_named_t cu_cpu_rate_max; + kstat_named_t cu_cpu_relationship; } cu_cpu_kstat = { - { "id", KSTAT_DATA_UINT32 }, + { "cpu_id", KSTAT_DATA_UINT32 }, + { "pg_id", KSTAT_DATA_INT32 }, { "generation", KSTAT_DATA_UINT32 }, - { "pg_id", KSTAT_DATA_LONG }, { "hw_util", KSTAT_DATA_UINT64 }, { "hw_util_time_running", KSTAT_DATA_UINT64 }, { "hw_util_time_stopped", KSTAT_DATA_UINT64 }, { "hw_util_rate", KSTAT_DATA_UINT64 }, { "hw_util_rate_max", KSTAT_DATA_UINT64 }, + { "relationship", KSTAT_DATA_STRING }, }; /* @@ -1289,8 +1296,9 @@ cu_cpu_fini(cpu_t *cp) static void cu_cpu_kstat_create(pghw_t *pg, cu_cntr_info_t *cntr_info) { - char *class, *sh_name; kstat_t *ks; + char *sharing = pghw_type_string(pg->pghw_hw); + char name[KSTAT_STRLEN + 1]; /* * Just return when no counter info or CPU @@ -1299,14 +1307,14 @@ cu_cpu_kstat_create(pghw_t *pg, cu_cntr_info_t *cntr_info) return; /* - * Get the class name from the leaf PG that this CPU belongs to. - * If there are no PGs, just use the default class "cpu". + * Canonify PG name to conform to kstat name rules */ - class = pg ? pghw_type_string(pg->pghw_hw) : "cpu"; - sh_name = pg ? pghw_type_shortstring(pg->pghw_hw) : "cpu"; + (void) strncpy(name, pghw_type_string(pg->pghw_hw), KSTAT_STRLEN + 1); + strident_canon(name, TASKQ_NAMELEN + 1); - if ((ks = kstat_create_zone("pg_cpu", cntr_info->ci_cpu->cpu_id, - sh_name, class, KSTAT_TYPE_NAMED, + if ((ks = kstat_create_zone("pg_hw_perf_cpu", + cntr_info->ci_cpu->cpu_id, + name, "processor_group", KSTAT_TYPE_NAMED, sizeof (cu_cpu_kstat) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID)) == NULL) return; @@ -1314,6 +1322,7 @@ cu_cpu_kstat_create(pghw_t *pg, cu_cntr_info_t *cntr_info) ks->ks_lock = &pg_cpu_kstat_lock; ks->ks_data = &cu_cpu_kstat; ks->ks_update = cu_cpu_kstat_update; + ks->ks_data_size += strlen(sharing) + 1; ks->ks_private = cntr_info; cntr_info->ci_kstat = ks; @@ -1336,29 +1345,50 @@ cu_cpu_kstat_update(kstat_t *ksp, int rw) if (rw == KSTAT_WRITE) return (EACCES); + cp = cntr_info->ci_cpu; + pg = cntr_info->ci_pg; + kstat->cu_cpu_id.value.ui32 = cp->cpu_id; + kstat->cu_pg_id.value.i32 = ((pg_t *)pg)->pg_id; + + /* + * The caller should have priv_cpc_cpu privilege to get utilization + * data. Callers who do not have the privilege will see zeroes as the + * values. + */ + if (secpolicy_cpc_cpu(crgetcred()) != 0) { + kstat->cu_generation.value.ui32 = cp->cpu_generation; + kstat_named_setstr(&kstat->cu_cpu_relationship, + pghw_type_string(pg->pghw_hw)); + + kstat->cu_cpu_util.value.ui64 = 0; + kstat->cu_cpu_rate.value.ui64 = 0; + kstat->cu_cpu_rate_max.value.ui64 = 0; + kstat->cu_cpu_time_running.value.ui64 = 0; + kstat->cu_cpu_time_stopped.value.ui64 = 0; + + return (0); + } + kpreempt_disable(); /* * Update capacity and utilization statistics needed for CPU's PG (CPU) * kstats */ - cp = cntr_info->ci_cpu; + (void) cu_cpu_update(cp, B_TRUE); - pg = cntr_info->ci_pg; stats = cntr_info->ci_stats; - kstat->cu_cpu_id.value.ui32 = cp->cpu_id; kstat->cu_generation.value.ui32 = cp->cpu_generation; - if (pg == NULL) - kstat->cu_pg_id.value.l = -1; - else - kstat->cu_pg_id.value.l = pg->pghw_pg.pg_id; + kstat_named_setstr(&kstat->cu_cpu_relationship, + pghw_type_string(pg->pghw_hw)); kstat->cu_cpu_util.value.ui64 = stats->cs_value_total; kstat->cu_cpu_rate.value.ui64 = stats->cs_rate; kstat->cu_cpu_rate_max.value.ui64 = stats->cs_rate_max; kstat->cu_cpu_time_running.value.ui64 = stats->cs_time_running; kstat->cu_cpu_time_stopped.value.ui64 = stats->cs_time_stopped; + /* * Counters are stopped now, so the cs_time_stopped was last * updated at cs_time_start time. Add the time passed since then diff --git a/usr/src/uts/common/os/pghw.c b/usr/src/uts/common/os/pghw.c index 534cb2c540..7b4b46f6ab 100644 --- a/usr/src/uts/common/os/pghw.c +++ b/usr/src/uts/common/os/pghw.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ #include <sys/systm.h> @@ -30,6 +29,7 @@ #include <sys/cpuvar.h> #include <sys/kmem.h> #include <sys/cmn_err.h> +#include <sys/policy.h> #include <sys/group.h> #include <sys/pg.h> #include <sys/pghw.h> @@ -117,7 +117,7 @@ struct pghw_kstat { kstat_named_t pg_hw; kstat_named_t pg_policy; } pghw_kstat = { - { "id", KSTAT_DATA_UINT32 }, + { "id", KSTAT_DATA_INT32 }, { "pg_class", KSTAT_DATA_STRING }, { "ncpus", KSTAT_DATA_UINT32 }, { "instance_id", KSTAT_DATA_UINT32 }, @@ -135,13 +135,15 @@ kmutex_t pghw_kstat_lock; * * kstat fields: * - * pgid PG ID for PG described by this kstat + * pg_id PG ID for PG described by this kstat + * + * pg_parent Parent PG ID. The value -1 means "no parent". * * pg_ncpus Number of CPUs within this PG * * pg_cpus String describing CPUs within this PG * - * pg_sharing Name of sharing relationship for this PG + * pg_relationship Name of sharing relationship for this PG * * pg_generation Generation value that increases whenever any CPU leaves * or joins PG. Two kstat snapshots for the same @@ -164,6 +166,7 @@ kmutex_t pghw_kstat_lock; */ struct pghw_cu_kstat { kstat_named_t pg_id; + kstat_named_t pg_parent_id; kstat_named_t pg_ncpus; kstat_named_t pg_generation; kstat_named_t pg_hw_util; @@ -172,9 +175,10 @@ struct pghw_cu_kstat { kstat_named_t pg_hw_util_rate; kstat_named_t pg_hw_util_rate_max; kstat_named_t pg_cpus; - kstat_named_t pg_sharing; + kstat_named_t pg_relationship; } pghw_cu_kstat = { - { "id", KSTAT_DATA_UINT32 }, + { "pg_id", KSTAT_DATA_INT32 }, + { "parent_pg_id", KSTAT_DATA_INT32 }, { "ncpus", KSTAT_DATA_UINT32 }, { "generation", KSTAT_DATA_UINT32 }, { "hw_util", KSTAT_DATA_UINT64 }, @@ -183,7 +187,7 @@ struct pghw_cu_kstat { { "hw_util_rate", KSTAT_DATA_UINT64 }, { "hw_util_rate_max", KSTAT_DATA_UINT64 }, { "cpus", KSTAT_DATA_STRING }, - { "sharing_relation", KSTAT_DATA_STRING }, + { "relationship", KSTAT_DATA_STRING }, }; /* @@ -213,6 +217,7 @@ static void pghw_set_remove(group_t *, pghw_t *); static void pghw_cpulist_alloc(pghw_t *); static int cpu2id(void *); +static pgid_t pghw_parent_id(pghw_t *); /* * Initialize the physical portion of a hardware PG @@ -261,6 +266,8 @@ pghw_fini(pghw_t *pg) { group_t *hwset; + pghw_cmt_fini(pg); + hwset = pghw_set_lookup(pg->pghw_hw); ASSERT(hwset != NULL); @@ -271,6 +278,14 @@ pghw_fini(pghw_t *pg) if (pg->pghw_kstat != NULL) kstat_delete(pg->pghw_kstat); +} + +/* + * PG is removed from CMT hierarchy + */ +void +pghw_cmt_fini(pghw_t *pg) +{ /* * Destroy string representation of CPUs */ @@ -280,8 +295,13 @@ pghw_fini(pghw_t *pg) pg->pghw_cpulist = NULL; } - if (pg->pghw_cu_kstat != NULL) + /* + * Destroy CU kstats + */ + if (pg->pghw_cu_kstat != NULL) { kstat_delete(pg->pghw_cu_kstat); + pg->pghw_cu_kstat = NULL; + } } /* @@ -467,34 +487,6 @@ pghw_type_string(pghw_type_t hw) } /* - * Return a short string name given a pg_hw sharing type - */ -char * -pghw_type_shortstring(pghw_type_t hw) -{ - switch (hw) { - case PGHW_IPIPE: - return ("instr_pipeline"); - case PGHW_CACHE: - return ("Cache"); - case PGHW_FPU: - return ("FPU"); - case PGHW_MPIPE: - return ("memory_pipeline"); - case PGHW_CHIP: - return ("Socket"); - case PGHW_MEMORY: - return ("Memory"); - case PGHW_POW_ACTIVE: - return ("CPU_PM_Active"); - case PGHW_POW_IDLE: - return ("CPU_PM_Idle"); - default: - return ("unknown"); - } -} - -/* * Create / Update routines for PG hw kstats * * It is the intention of these kstats to provide some level @@ -504,10 +496,17 @@ pghw_type_shortstring(pghw_type_t hw) void pghw_kstat_create(pghw_t *pg) { - char *class = pghw_type_string(pg->pghw_hw); + char *sharing = pghw_type_string(pg->pghw_hw); + char name[KSTAT_STRLEN + 1]; /* - * Create a physical pg kstat + * Canonify PG name to conform to kstat name rules + */ + (void) strncpy(name, pghw_type_string(pg->pghw_hw), KSTAT_STRLEN + 1); + strident_canon(name, KSTAT_STRLEN + 1); + + /* + * Create a hardware performance kstat */ if ((pg->pghw_kstat = kstat_create("pg", ((pg_t *)pg)->pg_id, "pg", "pg", @@ -531,8 +530,8 @@ pghw_kstat_create(pghw_t *pg) /* * Create a physical pg kstat */ - if ((pg->pghw_cu_kstat = kstat_create("pg", ((pg_t *)pg)->pg_id, - "hardware", class, + if ((pg->pghw_cu_kstat = kstat_create("pg_hw_perf", ((pg_t *)pg)->pg_id, + name, "processor_group", KSTAT_TYPE_NAMED, sizeof (pghw_cu_kstat) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL)) != NULL) { @@ -540,7 +539,7 @@ pghw_kstat_create(pghw_t *pg) pg->pghw_cu_kstat->ks_data = &pghw_cu_kstat; pg->pghw_cu_kstat->ks_update = pghw_cu_kstat_update; pg->pghw_cu_kstat->ks_private = pg; - pg->pghw_cu_kstat->ks_data_size += strlen(class) + 1; + pg->pghw_cu_kstat->ks_data_size += strlen(sharing) + 1; /* Allow space for CPU strings */ pg->pghw_cu_kstat->ks_data_size += PGHW_KSTAT_STR_LEN_MAX; pg->pghw_cu_kstat->ks_data_size += pg_cpulist_maxlen; @@ -572,11 +571,21 @@ pghw_cu_kstat_update(kstat_t *ksp, int rw) struct pghw_cu_kstat *pgsp = &pghw_cu_kstat; pghw_t *pg = ksp->ks_private; pghw_util_t *hw_util = &pg->pghw_stats; + boolean_t has_cpc_privilege; if (rw == KSTAT_WRITE) return (EACCES); - pgsp->pg_id.value.ui32 = ((pg_t *)pg)->pg_id; + /* + * Check whether the caller has priv_cpc_cpu privilege. If he doesn't, + * he will not get hardware utilization data. + */ + + has_cpc_privilege = (secpolicy_cpc_cpu(crgetcred()) == 0); + + pgsp->pg_id.value.i32 = ((pg_t *)pg)->pg_id; + pgsp->pg_parent_id.value.i32 = (int)pghw_parent_id(pg); + pgsp->pg_ncpus.value.ui32 = GROUP_SIZE(&((pg_t *)pg)->pg_cpus); /* @@ -613,22 +622,37 @@ pghw_cu_kstat_update(kstat_t *ksp, int rw) (void) group2intlist(&(((pg_t *)pg)->pg_cpus), pg->pghw_cpulist, pg->pghw_cpulist_len, cpu2id); } - cu_pg_update(pg); + + if (has_cpc_privilege) + cu_pg_update(pg); + mutex_exit(&cpu_lock); } pgsp->pg_generation.value.ui32 = pg->pghw_kstat_gen; - pgsp->pg_hw_util.value.ui64 = hw_util->pghw_util; - pgsp->pg_hw_util_time_running.value.ui64 = hw_util->pghw_time_running; - pgsp->pg_hw_util_time_stopped.value.ui64 = hw_util->pghw_time_stopped; - pgsp->pg_hw_util_rate.value.ui64 = hw_util->pghw_rate; - pgsp->pg_hw_util_rate_max.value.ui64 = hw_util->pghw_rate_max; if (pg->pghw_cpulist != NULL) kstat_named_setstr(&pgsp->pg_cpus, pg->pghw_cpulist); else kstat_named_setstr(&pgsp->pg_cpus, ""); - kstat_named_setstr(&pgsp->pg_sharing, pghw_type_string(pg->pghw_hw)); + kstat_named_setstr(&pgsp->pg_relationship, + pghw_type_string(pg->pghw_hw)); + + if (has_cpc_privilege) { + pgsp->pg_hw_util.value.ui64 = hw_util->pghw_util; + pgsp->pg_hw_util_time_running.value.ui64 = + hw_util->pghw_time_running; + pgsp->pg_hw_util_time_stopped.value.ui64 = + hw_util->pghw_time_stopped; + pgsp->pg_hw_util_rate.value.ui64 = hw_util->pghw_rate; + pgsp->pg_hw_util_rate_max.value.ui64 = hw_util->pghw_rate_max; + } else { + pgsp->pg_hw_util.value.ui64 = 0; + pgsp->pg_hw_util_time_running.value.ui64 = 0; + pgsp->pg_hw_util_time_stopped.value.ui64 = 0; + pgsp->pg_hw_util_rate.value.ui64 = 0; + pgsp->pg_hw_util_rate_max.value.ui64 = 0; + } return (0); } @@ -701,3 +725,24 @@ cpu2id(void *v) return (cp->cpu_id); } + +/* + * Return parent ID or -1 if there is no parent. + * All hardware PGs are currently also CMT PGs, but for safety we check the + * class matches cmt before we upcast the pghw pointer to pg_cmt_t. + */ +static pgid_t +pghw_parent_id(pghw_t *pghw) +{ + pg_t *pg = (pg_t *)pghw; + pgid_t parent_id = -1; + + if (pg != NULL && strcmp(pg->pg_class->pgc_name, "cmt") == 0) { + pg_cmt_t *cmt = (pg_cmt_t *)pg; + pg_t *parent = (pg_t *)cmt->cmt_parent; + if (parent != NULL) + parent_id = parent->pg_id; + } + + return (parent_id); +} diff --git a/usr/src/uts/common/sys/pg.h b/usr/src/uts/common/sys/pg.h index 0a61530982..f2bd3df514 100644 --- a/usr/src/uts/common/sys/pg.h +++ b/usr/src/uts/common/sys/pg.h @@ -18,9 +18,9 @@ * * CDDL HEADER END */ + /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _PG_H @@ -43,7 +43,7 @@ extern "C" { #include <sys/types.h> #include <sys/kstat.h> -typedef uint_t pgid_t; /* processor group id */ +typedef int pgid_t; /* processor group id */ typedef uint_t pg_cid_t; /* processor group class id */ struct pg; diff --git a/usr/src/uts/common/sys/pghw.h b/usr/src/uts/common/sys/pghw.h index f0550dba7e..f8567c6f09 100644 --- a/usr/src/uts/common/sys/pghw.h +++ b/usr/src/uts/common/sys/pghw.h @@ -20,8 +20,7 @@ */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _PGHW_H @@ -153,6 +152,7 @@ extern void pghw_init(pghw_t *, cpu_t *, pghw_type_t); extern void pghw_fini(pghw_t *); extern void pghw_cpu_add(pghw_t *, cpu_t *); extern pghw_t *pghw_place_cpu(cpu_t *, pghw_type_t); +extern void pghw_cmt_fini(pghw_t *); /* * Physical ID cache creation / destruction @@ -177,7 +177,6 @@ extern pghw_type_t pg_plat_hw_rank(pghw_type_t, pghw_type_t); * String representation of the hardware type */ extern char *pghw_type_string(pghw_type_t); -extern char *pghw_type_shortstring(pghw_type_t); /* * What comprises a "core" may vary across processor implementations, |