summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2012-01-16 19:14:44 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2012-01-16 19:14:44 +0000
commitd39078e5cb9b528444b623c311246974e752e476 (patch)
tree9ed464aab5b2b0a62d4877f4c0f06bc96a9b97eb
parent887d2a84c612cea61b6ad544f54cf790cfb9de3e (diff)
downloadillumos-joyent-d39078e5cb9b528444b623c311246974e752e476.tar.gz
OS-507 need better fss observability
-rw-r--r--manifest2
-rw-r--r--usr/src/cmd/stat/Makefile5
-rw-r--r--usr/src/cmd/stat/zschedstat/Makefile50
-rw-r--r--usr/src/cmd/stat/zschedstat/zschedstat.c335
-rw-r--r--usr/src/man/man1m/Makefile2
-rw-r--r--usr/src/man/man1m/zschedstat.1m202
-rw-r--r--usr/src/uts/common/disp/fss.c60
-rw-r--r--usr/src/uts/common/os/kstat_fr.c6
-rw-r--r--usr/src/uts/common/os/zone.c13
-rw-r--r--usr/src/uts/common/sys/fss.h4
-rw-r--r--usr/src/uts/common/sys/zone.h21
11 files changed, 693 insertions, 7 deletions
diff --git a/manifest b/manifest
index 7c4a07b1c5..d9b0d647af 100644
--- a/manifest
+++ b/manifest
@@ -2351,6 +2351,7 @@ f usr/bin/zcat 0555 root bin
f usr/bin/ziostat 0555 root bin
s usr/bin/zonename=../../sbin/zonename
f usr/bin/zonestat 0555 root bin
+f usr/bin/zschedstat 0555 root bin
d usr/ccs 0755 root bin
d usr/ccs/bin 0755 root bin
d usr/ccs/bin/amd64 0755 root bin
@@ -11983,6 +11984,7 @@ f usr/share/man/man1m/zic.1m 0444 root bin
f usr/share/man/man1m/zoneadm.1m 0444 root bin
f usr/share/man/man1m/zonecfg.1m 0444 root bin
f usr/share/man/man1m/zpool.1m 0444 root bin
+f usr/share/man/man1m/zschedstat.1m 0444 root bin
f usr/share/man/man1m/zstreamdump.1m 0444 root bin
d usr/share/man/man2 0755 root bin
f usr/share/man/man2/Intro.2 0444 root bin
diff --git a/usr/src/cmd/stat/Makefile b/usr/src/cmd/stat/Makefile
index faaa19f42c..01b96d14d2 100644
--- a/usr/src/cmd/stat/Makefile
+++ b/usr/src/cmd/stat/Makefile
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2011 Joyent, Inc. All rights reserved.
+# Copyright 2011, 2012, Joyent, Inc. All rights reserved.
# Use is subject to license terms.
#
# cmd/stat/Makefile
@@ -33,7 +33,8 @@ SUBDIRS= arcstat \
mpstat \
vfsstat \
vmstat \
- ziostat
+ ziostat \
+ zschedstat
all := TARGET = all
install := TARGET = install
diff --git a/usr/src/cmd/stat/zschedstat/Makefile b/usr/src/cmd/stat/zschedstat/Makefile
new file mode 100644
index 0000000000..b8654d0ba4
--- /dev/null
+++ b/usr/src/cmd/stat/zschedstat/Makefile
@@ -0,0 +1,50 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
+#
+
+include $(SRC)/cmd/Makefile.cmd
+
+PROG= zschedstat
+OBJS = zschedstat.o
+SRCS =$(OBJS:%.o=%.c) $(COMMON_SRCS)
+
+LDLIBS += -lkstat
+
+lint := LINTFLAGS = -muxs
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all .WAIT $(ROOTPROG)
+
+clean:
+
+$(ROOTBINPROG): $(PROG)
+ $(INS.file)
+
+lint: lint_SRCS
+
+check:
+ $(CSTYLE) -pP $(SRCS:%=%)
+
+include $(SRC)/cmd/Makefile.targ
diff --git a/usr/src/cmd/stat/zschedstat/zschedstat.c b/usr/src/cmd/stat/zschedstat/zschedstat.c
new file mode 100644
index 0000000000..ba89e2403f
--- /dev/null
+++ b/usr/src/cmd/stat/zschedstat/zschedstat.c
@@ -0,0 +1,335 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2012 Joyent, Inc. All rights reserved.
+ */
+
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <kstat.h>
+#include <errno.h>
+#include <sys/zone.h>
+
+typedef struct {
+ boolean_t valid;
+ uint64_t rqueue;
+ uint64_t rticks;
+ uint32_t fss_share_pct;
+ uint64_t fss_pri_hi;
+ uint64_t fss_pri_avg;
+ double avrun1;
+ uint64_t ns_usr;
+ uint64_t ns_sys;
+ uint64_t ns_wt;
+ uint64_t cpu_cap;
+ uint64_t cpu_baseline;
+ uint64_t cpu_cap_usage;
+ uint64_t above_base_sec;
+ uint64_t delay_cnt;
+ uint64_t delay_time;
+ /* Values from the previous cycle so we can diff */
+ uint64_t prv_rticks;
+ uint64_t prv_ns_usr;
+ uint64_t prv_ns_sys;
+ uint64_t prv_ns_wt;
+ uint64_t prv_above_base_sec;
+ uint64_t prv_delay_cnt;
+ uint64_t prv_delay_time;
+} zinfo_t;
+
+/*
+ * MAX_ZONEID is only 10000, so it is a lot faster to go direct to the entry
+ * we want, even though valid entries in this array will be sparse.
+ */
+
+static zinfo_t zinfo[MAX_ZONEID];
+static uint32_t nsec_per_tick = 0;
+
+static void
+usage()
+{
+ (void) fprintf(stderr, "zschedstat [-r] [interval [count]]\n");
+ exit(1);
+}
+
+static void
+get_zone_misc(int zid, kstat_t *ksp)
+{
+ kstat_named_t *kp;
+
+ kp = (kstat_named_t *)kstat_data_lookup(ksp, "run_queue");
+ zinfo[zid].rqueue = kp->value.ui64;
+
+ kp = (kstat_named_t *)kstat_data_lookup(ksp, "run_ticks");
+ zinfo[zid].rticks = kp->value.ui64;
+
+ kp = (kstat_named_t *)kstat_data_lookup(ksp, "fss_share_percent");
+ zinfo[zid].fss_share_pct = kp->value.ui32;
+
+ kp = (kstat_named_t *)kstat_data_lookup(ksp, "fss_pri_hi");
+ zinfo[zid].fss_pri_hi = kp->value.ui64;
+
+ kp = (kstat_named_t *)kstat_data_lookup(ksp, "fss_pri_avg");
+ zinfo[zid].fss_pri_avg = kp->value.ui64;
+
+ kp = (kstat_named_t *)kstat_data_lookup(ksp, "avenrun_1min");
+ zinfo[zid].avrun1 = (double)kp->value.ui32 / FSCALE;
+
+ kp = (kstat_named_t *)kstat_data_lookup(ksp, "nsec_user");
+ zinfo[zid].ns_usr = kp->value.ui64;
+
+ kp = (kstat_named_t *)kstat_data_lookup(ksp, "nsec_sys");
+ zinfo[zid].ns_sys = kp->value.ui64;
+
+ kp = (kstat_named_t *)kstat_data_lookup(ksp, "nsec_waitrq");
+ zinfo[zid].ns_wt = kp->value.ui64;
+}
+
+static void
+get_zone_caps(int zid, kstat_t *ksp)
+{
+ kstat_named_t *kp;
+
+ kp = (kstat_named_t *)kstat_data_lookup(ksp, "value");
+ zinfo[zid].cpu_cap = kp->value.ui64;
+
+ kp = (kstat_named_t *)kstat_data_lookup(ksp, "baseline");
+ zinfo[zid].cpu_baseline = kp->value.ui64;
+
+ kp = (kstat_named_t *)kstat_data_lookup(ksp, "usage");
+ zinfo[zid].cpu_cap_usage = kp->value.ui64;
+
+ kp = (kstat_named_t *)kstat_data_lookup(ksp, "above_base_sec");
+ zinfo[zid].above_base_sec = kp->value.ui64;
+}
+
+static void
+get_zone_vfs(int zid, kstat_t *ksp)
+{
+ kstat_named_t *kp;
+
+ kp = (kstat_named_t *)kstat_data_lookup(ksp, "delay_cnt");
+ zinfo[zid].delay_cnt = kp->value.ui64;
+
+ kp = (kstat_named_t *)kstat_data_lookup(ksp, "delay_time");
+ zinfo[zid].delay_time = kp->value.ui64;
+}
+
+static void
+read_kstats()
+{
+ kstat_ctl_t *kc;
+ kstat_t *ksp;
+
+ if ((kc = kstat_open()) == NULL) {
+ (void) fprintf(stderr, "open failed\n");
+ exit(1);
+ }
+
+ for (ksp = kc->kc_chain; ksp; ksp = ksp->ks_next) {
+ if (strcmp("zones", ksp->ks_module) == 0 &&
+ strcmp("zone_misc", ksp->ks_class) == 0) {
+ if (kstat_read(kc, ksp, NULL) == -1) {
+ (void) fprintf(stderr, "read failed\n");
+ exit(1);
+ }
+ zinfo[ksp->ks_instance].valid = B_TRUE;
+
+ get_zone_misc(ksp->ks_instance, ksp);
+
+ } else if (strcmp("caps", ksp->ks_module) == 0 &&
+ strcmp("zone_caps", ksp->ks_class) == 0 &&
+ strncmp("cpucaps_zone", ksp->ks_name, 12) == 0) {
+ if (kstat_read(kc, ksp, NULL) == -1) {
+ (void) fprintf(stderr, "read failed\n");
+ exit(1);
+ }
+ zinfo[ksp->ks_instance].valid = B_TRUE;
+
+ get_zone_caps(ksp->ks_instance, ksp);
+
+ } else if (strcmp("zone_vfs", ksp->ks_module) == 0) {
+ if (kstat_read(kc, ksp, NULL) == -1) {
+ (void) fprintf(stderr, "read failed\n");
+ exit(1);
+ }
+ zinfo[ksp->ks_instance].valid = B_TRUE;
+
+ get_zone_vfs(ksp->ks_instance, ksp);
+
+ } else if (nsec_per_tick == 0 &&
+ strcmp("unix", ksp->ks_module) == 0 &&
+ strcmp("system_misc", ksp->ks_name) == 0) {
+ kstat_named_t *kp;
+
+ if (kstat_read(kc, ksp, NULL) == -1) {
+ (void) fprintf(stderr, "read failed\n");
+ exit(1);
+ }
+
+ kp = (kstat_named_t *)kstat_data_lookup(ksp,
+ "nsec_per_tick");
+ nsec_per_tick = kp->value.ui32;
+ }
+ }
+
+ (void) kstat_close(kc);
+}
+
+static float
+fmt_nsec(uint64_t curr, uint64_t prv)
+{
+ float s;
+ uint64_t nsec;
+
+ nsec = curr - prv;
+ s = (float)nsec / (long)NANOSEC;
+
+ return (s);
+}
+
+/* convert usecs to msecs */
+static float
+fmt_usec(uint64_t curr, uint64_t prv)
+{
+ float s;
+ uint64_t usec;
+
+ usec = curr - prv;
+ s = (float)usec / (long)MILLISEC;
+
+ return (s);
+}
+
+static float
+fmt_ticks(uint64_t curr, uint64_t prv)
+{
+ float s;
+ uint64_t ticks, nsec;
+
+ ticks = curr - prv;
+ nsec = ticks * nsec_per_tick;
+
+ s = (float)nsec / (long)NANOSEC;
+
+ return (s);
+}
+
+static void
+print_data(boolean_t parse)
+{
+ int i;
+ char *fmt;
+
+ if (parse) {
+ fmt = "%d,%lld,%.2f,%.1f,%lld,%lld,%lld,%lld,%lld,"
+ "%.2f,%lld,%.2f,%.2f,%.2f,%.2f\n";
+ } else {
+ fmt = "%4d %2lld %6.2f %5.1f %2lld %2lld %5lld %5lld %2lld "
+ "%5.2f %4lld %6.2f %6.2f %6.2f %6.2f\n";
+
+ (void) printf("%4s %2s %6s %5s %2s %2s %5s %5s %2s "
+ "%5s %4s %6s %6s %6s %6s\n",
+ "zid", "rq", "rsec", "sh%", "ph", "pa", "cap", "usage",
+ "bs", "1mla", "dcnt", "dms", "user", "sys", "wtrq");
+ }
+
+ for (i = 0; i < MAX_ZONEID; i++) {
+ if (zinfo[i].valid == B_FALSE)
+ continue;
+
+ /*LINTED E_SEC_PRINTF_VAR_FMT*/
+ (void) printf(fmt,
+ i,
+ zinfo[i].rqueue,
+ fmt_ticks(zinfo[i].rticks, zinfo[i].prv_rticks),
+ (float)zinfo[i].fss_share_pct / (float)10,
+ zinfo[i].fss_pri_hi,
+ zinfo[i].fss_pri_avg,
+ zinfo[i].cpu_cap,
+ zinfo[i].cpu_cap_usage,
+ zinfo[i].above_base_sec - zinfo[i].prv_above_base_sec,
+ zinfo[i].avrun1,
+ zinfo[i].delay_cnt - zinfo[i].prv_delay_cnt,
+ fmt_usec(zinfo[i].delay_time, zinfo[i].prv_delay_time),
+ fmt_nsec(zinfo[i].ns_usr, zinfo[i].prv_ns_usr),
+ fmt_nsec(zinfo[i].ns_sys, zinfo[i].prv_ns_sys),
+ fmt_nsec(zinfo[i].ns_wt, zinfo[i].prv_ns_wt));
+
+ zinfo[i].valid = B_FALSE;
+ zinfo[i].prv_rticks = zinfo[i].rticks;
+ zinfo[i].prv_ns_usr = zinfo[i].ns_usr;
+ zinfo[i].prv_ns_sys = zinfo[i].ns_sys;
+ zinfo[i].prv_ns_wt = zinfo[i].ns_wt;
+ zinfo[i].prv_above_base_sec = zinfo[i].above_base_sec;
+ zinfo[i].prv_delay_cnt = zinfo[i].delay_cnt;
+ zinfo[i].prv_delay_time = zinfo[i].delay_time;
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ int interval = 5;
+ int count;
+ int forever = 1;
+ int arg;
+ extern int optind;
+ boolean_t do_parse = B_FALSE;
+
+ while ((arg = getopt(argc, argv, "r")) != EOF) {
+ switch (arg) {
+ case 'r':
+ do_parse = B_TRUE;
+ break;
+ default:
+ usage();
+ }
+ }
+
+ if (argc > optind) {
+ interval = atoi(argv[optind]);
+ optind++;
+
+ if (argc > optind) {
+ count = atoi(argv[optind]);
+ forever = 0;
+ optind++;
+ }
+ }
+ if (argc > optind)
+ usage();
+
+ for (;;) {
+ read_kstats();
+ print_data(do_parse);
+ if (forever == 0 && --count == 0)
+ break;
+ (void) sleep(interval);
+ }
+
+ return (0);
+}
diff --git a/usr/src/man/man1m/Makefile b/usr/src/man/man1m/Makefile
index abe1aeaede..0ca2ef6ff9 100644
--- a/usr/src/man/man1m/Makefile
+++ b/usr/src/man/man1m/Makefile
@@ -12,6 +12,7 @@
#
# Copyright 2011, Richard Lowe
# Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+# Copyright 2012 Joyent, Inc. All rights reserved.
#
include ../../Makefile.master
@@ -597,6 +598,7 @@ COMMON_MANFILES = 6to4relay.1m \
zoneadmd.1m \
zonecfg.1m \
zpool.1m \
+ zschedstat.1m \
zstreamdump.1m
i386_MANFILES = lms.1m \
diff --git a/usr/src/man/man1m/zschedstat.1m b/usr/src/man/man1m/zschedstat.1m
new file mode 100644
index 0000000000..61ea8353bf
--- /dev/null
+++ b/usr/src/man/man1m/zschedstat.1m
@@ -0,0 +1,202 @@
+'\" te
+.\" Copyright (c) 2012, Joyent, Inc. All Rights reserved
+.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with
+.\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
+.TH ZSCHEDSTAT 1M "Jan 16, 2012"
+.SH NAME
+zschedstat \- report per-zone CPU scheduling statistics
+.SH SYNOPSIS
+.LP
+.nf
+\fB/usr/bin/zschedstat\fR [\fB-r] [\fIinterval\fR [\fIcount\fR]]
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The \fBzschedstat\fR utility iteratively reports per-zone CPU-scheduling
+activity. The first iteration of output is for all time since boot; each
+subsequent iteration is for the prior interval only.
+.sp
+.LP
+The output of the \fBzschedstat\fR utility shows the following information.
+.sp
+.ne 2
+.na
+\fB\fBzid\fR\fR
+.ad
+.RS 10n
+zone ID
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBrq\fR\fR
+.ad
+.RS 10n
+The number of threads FSS saw in the run queue in the last second.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBrsec\fR\fR
+.ad
+.RS 10n
+The total number of seconds that FSS counted processes running for this
+zone during the sampling interval.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBsh%\fR\fR
+.ad
+.RS 10n
+The percent of the active shares FSS calculated for this zone. This column
+may not sum exactly to 100% due to rounding.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBph\fR\fR
+.ad
+.RS 10n
+The highest priorty FSS calculated for a process during the last second
+(range 0-59). This will be 0 if FSS saw no runnable processes for the zone
+in the last second.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBpa\fR\fR
+.ad
+.RS 10n
+The average priorty FSS calculated for all runnable processes during the last
+second that there were runnable processes for this zone (range 0-59).
+This data might be several seconds old if there were no runnable processes
+in the interval.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBcap\fR\fR
+.ad
+.RS 10n
+The current cpu-cap for the zone (in percent of a CPU).
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBusage\fR\fR
+.ad
+.RS 10n
+The cpu-cap calculated usage for the zone in the interval (in percent of a CPU).
+The usage will be 0 if there is no cpu-cap.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBbs\fR\fR
+.ad
+.RS 10n
+The number of seconds during the interval that the zone was bursting.
+This will be 0 if there is no cpu-cap.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB1mla\fR\fR
+.ad
+.RS 10n
+The one minute load average for the zone.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBdcnt\fR\fR
+.ad
+.RS 10n
+The number of times that the ZFS I/O throttle delayed a process in the zone.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBdms\fR\fR
+.ad
+.RS 10n
+The total time, in milli-seconds, of ZFS I/O throttle delay for processes in the
+zone.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBuser\fR\fR
+.ad
+.RS 10n
+The total number of seconds processes were running in user-level code.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBsys\fR\fR
+.ad
+.RS 10n
+The total number of seconds processes were running in the kernel.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBwtrq\fR\fR
+.ad
+.RS 10n
+The total number of seconds processes were waiting in the run queue to run.
+.RE
+
+.SH OPTIONS
+.sp
+.LP
+The following options are supported:
+.sp
+.ne 2
+.na
+\fB\fB-r\fR\fR
+.ad
+.RS 12n
+Display data in a comma-separated format.
+.RE
+
+.SH OPERANDS
+.sp
+.LP
+The following operands are supported:
+.sp
+.ne 2
+.na
+\fB\fIcount\fR\fR
+.ad
+.RS 12n
+Display only \fIcount\fR reports.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fIinterval\fR\fR
+.ad
+.RS 12n
+Report once each \fIinterval\fR seconds.
+.RE
diff --git a/usr/src/uts/common/disp/fss.c b/usr/src/uts/common/disp/fss.c
index 62301d65d8..af8826780c 100644
--- a/usr/src/uts/common/disp/fss.c
+++ b/usr/src/uts/common/disp/fss.c
@@ -814,6 +814,7 @@ fss_decay_usage()
fsszone_t *fsszone;
fsspri_t maxfsspri;
int psetid;
+ struct zone *zp;
mutex_enter(&fsspsets_lock);
/*
@@ -824,6 +825,8 @@ fss_decay_usage()
fsspset = &fsspsets[psetid];
mutex_enter(&fsspset->fssps_lock);
+ fsspset->fssps_gen++;
+
if (fsspset->fssps_cpupart == NULL ||
(fssproj = fsspset->fssps_list) == NULL) {
mutex_exit(&fsspset->fssps_lock);
@@ -843,6 +846,21 @@ fss_decay_usage()
fsspset->fssps_maxfsspri = maxfsspri;
do {
+ fsszone = fssproj->fssp_fsszone;
+ zp = fsszone->fssz_zone;
+
+ /*
+ * Reset zone's FSS kstats if they are from a
+ * previous cycle.
+ */
+ if (fsspset->fssps_gen != zp->zone_fss_gen) {
+ zp->zone_fss_gen = fsspset->fssps_gen;
+ zp->zone_fss_pri_hi = 0;
+ zp->zone_runq_cntr = 0;
+ zp->zone_fss_shr_pct = 0;
+ zp->zone_proc_cnt = 0;
+ }
+
/*
* Decay usage for each project running on
* this cpu partition.
@@ -850,9 +868,18 @@ fss_decay_usage()
fssproj->fssp_usage =
(fssproj->fssp_usage * FSS_DECAY_USG) /
FSS_DECAY_BASE + fssproj->fssp_ticks;
+
fssproj->fssp_ticks = 0;
- fsszone = fssproj->fssp_fsszone;
+ zp->zone_run_ticks += fssproj->fssp_zone_ticks;
+ /*
+ * This is the count for this one second cycle only,
+ * and not cumulative.
+ */
+ zp->zone_runq_cntr += fssproj->fssp_runnable;
+
+ fssproj->fssp_zone_ticks = 0;
+
/*
* Readjust the project's number of shares if it has
* changed since we checked it last time.
@@ -871,7 +898,7 @@ fss_decay_usage()
* Readjust the zone's number of shares if it
* has changed since we checked it last time.
*/
- zone_ext_shares = fsszone->fssz_zone->zone_shares;
+ zone_ext_shares = zp->zone_shares;
if (fsszone->fssz_rshares != zone_ext_shares) {
if (fsszone->fssz_runnable != 0) {
fsspset->fssps_shares -=
@@ -883,6 +910,12 @@ fss_decay_usage()
}
zone_int_shares = fsszone->fssz_shares;
pset_shares = fsspset->fssps_shares;
+
+ if (zp->zone_runq_cntr > 0 && pset_shares > 0)
+ /* in tenths of a pct */
+ zp->zone_fss_shr_pct =
+ (zone_ext_shares * 1000) / pset_shares;
+
/*
* Calculate fssp_shusage value to be used
* for fsspri increments for the next second.
@@ -1050,6 +1083,8 @@ fss_update_list(int i)
fssproc_t *fssproc;
fssproj_t *fssproj;
fsspri_t fsspri;
+ struct zone *zp;
+ pri_t fss_umdpri;
kthread_t *t;
int updated = 0;
@@ -1073,6 +1108,7 @@ fss_update_list(int i)
fssproj = FSSPROC2FSSPROJ(fssproc);
if (fssproj == NULL)
goto next;
+
if (fssproj->fssp_shares != 0) {
/*
* Decay fsspri value.
@@ -1096,11 +1132,28 @@ fss_update_list(int i)
fss_newpri(fssproc);
updated = 1;
+ fss_umdpri = fssproc->fss_umdpri;
+
+ /*
+ * Summarize a zone's process priorities for runnable
+ * procs.
+ */
+ zp = fssproj->fssp_fsszone->fssz_zone;
+
+ if (fss_umdpri > zp->zone_fss_pri_hi)
+ zp->zone_fss_pri_hi = fss_umdpri;
+
+ if (zp->zone_proc_cnt++ == 0)
+ zp->zone_fss_pri_avg = fss_umdpri;
+ else
+ zp->zone_fss_pri_avg =
+ (zp->zone_fss_pri_avg + fss_umdpri) / 2;
+
/*
* Only dequeue the thread if it needs to be moved; otherwise
* it should just round-robin here.
*/
- if (t->t_pri != fssproc->fss_umdpri)
+ if (t->t_pri != fss_umdpri)
fss_change_priority(t, fssproc);
next:
thread_unlock(t);
@@ -2180,6 +2233,7 @@ fss_tick(kthread_t *t)
fsspset_t *fsspset = FSSPROJ2FSSPSET(fssproj);
disp_lock_enter_high(&fsspset->fssps_displock);
fssproj->fssp_ticks += fss_nice_tick[fssproc->fss_nice];
+ fssproj->fssp_zone_ticks++;
fssproc->fss_ticks++;
disp_lock_exit_high(&fsspset->fssps_displock);
}
diff --git a/usr/src/uts/common/os/kstat_fr.c b/usr/src/uts/common/os/kstat_fr.c
index 93376a9edf..25afef3259 100644
--- a/usr/src/uts/common/os/kstat_fr.c
+++ b/usr/src/uts/common/os/kstat_fr.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2011, 2012, Joyent, Inc. All rights reserved.
*/
/*
@@ -161,6 +161,7 @@ struct {
kstat_named_t avenrun_5min;
kstat_named_t avenrun_15min;
kstat_named_t boot_time;
+ kstat_named_t nsec_per_tick;
} system_misc_kstat = {
{ "ncpus", KSTAT_DATA_UINT32 },
{ "lbolt", KSTAT_DATA_UINT32 },
@@ -172,6 +173,7 @@ struct {
{ "avenrun_5min", KSTAT_DATA_UINT32 },
{ "avenrun_15min", KSTAT_DATA_UINT32 },
{ "boot_time", KSTAT_DATA_UINT32 },
+ { "nsec_per_tick", KSTAT_DATA_UINT32 },
};
struct {
@@ -855,6 +857,8 @@ system_misc_kstat_update(kstat_t *ksp, int rw)
system_misc_kstat.avenrun_15min.value.ui32 = (uint32_t)loadavgp[2];
system_misc_kstat.boot_time.value.ui32 = (uint32_t)
zone_boot_time;
+ system_misc_kstat.nsec_per_tick.value.ui32 = (uint32_t)
+ nsec_per_tick;
return (0);
}
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index 3ea0d0fe95..79f61ddcb9 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -2222,6 +2222,12 @@ zone_misc_kstat_update(kstat_t *ksp, int rw)
zmp->zm_avenrun5.value.ui32 = zone->zone_avenrun[1];
zmp->zm_avenrun15.value.ui32 = zone->zone_avenrun[2];
+ zmp->zm_run_ticks.value.ui64 = zone->zone_run_ticks;
+ zmp->zm_run_wait.value.ui64 = zone->zone_runq_cntr;
+ zmp->zm_fss_shr_pct.value.ui64 = zone->zone_fss_shr_pct;
+ zmp->zm_fss_pri_hi.value.ui64 = zone->zone_fss_pri_hi;
+ zmp->zm_fss_pri_avg.value.ui64 = zone->zone_fss_pri_avg;
+
return (0);
}
@@ -2255,6 +2261,13 @@ zone_misc_kstat_create(zone_t *zone)
kstat_named_init(&zmp->zm_avenrun5, "avenrun_5min", KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_avenrun15, "avenrun_15min",
KSTAT_DATA_UINT32);
+ kstat_named_init(&zmp->zm_run_ticks, "run_ticks", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_run_wait, "run_queue", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_fss_shr_pct, "fss_share_percent",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&zmp->zm_fss_pri_hi, "fss_pri_hi", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_fss_pri_avg, "fss_pri_avg",
+ KSTAT_DATA_UINT64);
ksp->ks_update = zone_misc_kstat_update;
ksp->ks_private = zone;
diff --git a/usr/src/uts/common/sys/fss.h b/usr/src/uts/common/sys/fss.h
index 583586fd75..cdb47beb7f 100644
--- a/usr/src/uts/common/sys/fss.h
+++ b/usr/src/uts/common/sys/fss.h
@@ -22,6 +22,7 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_FSS_H
@@ -86,6 +87,7 @@ typedef struct fsspset {
/* on the list */
struct fssproj *fssps_list; /* list of project parts */
struct fsszone *fssps_zones; /* list of fsszone_t's in pset */
+ uint32_t fssps_gen; /* generation for zone's kstats */
} fsspset_t;
/*
@@ -103,6 +105,8 @@ typedef struct fssproj {
/* protected by fssps_displock */
uint32_t fssp_ticks; /* total of all ticks */
/* protected by fssps_displock */
+ uint32_t fssp_zone_ticks; /* unscaled total of all ticks */
+ /* protected by fssps_displock */
fssusage_t fssp_usage; /* this project's decayed usage */
fssusage_t fssp_shusage; /* normalized usage */
struct fssproj *fssp_next; /* next project on this pset */
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index 08677a2f65..a2b7217fd4 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent Inc. All rights reserved.
+ * Copyright (c) 2011, 2012, Joyent Inc. All rights reserved.
*/
#ifndef _SYS_ZONE_H
@@ -441,6 +441,11 @@ typedef struct {
kstat_named_t zm_avenrun1;
kstat_named_t zm_avenrun5;
kstat_named_t zm_avenrun15;
+ kstat_named_t zm_run_ticks;
+ kstat_named_t zm_run_wait;
+ kstat_named_t zm_fss_shr_pct;
+ kstat_named_t zm_fss_pri_hi;
+ kstat_named_t zm_fss_pri_avg;
} zone_misc_kstat_t;
typedef struct zone {
@@ -671,6 +676,20 @@ typedef struct zone {
struct loadavg_s zone_loadavg; /* loadavg for this zone */
uint64_t zone_hp_avenrun[3]; /* high-precision avenrun */
int zone_avenrun[3]; /* FSCALED avg. run queue len */
+
+ /*
+ * FSS stats updated once per second by fss_decay_usage.
+ * zone_runq_cntr is an instantaneous accumulation of the number of
+ * processes in the run queue per project and is not computed over the
+ * one second interval.
+ */
+ uint32_t zone_fss_gen; /* FSS generation cntr */
+ uint32_t zone_proc_cnt; /* FSS process cntr */
+ uint64_t zone_run_ticks; /* tot # of ticks running */
+ uint64_t zone_runq_cntr; /* tot # of procs in runq */
+ uint32_t zone_fss_shr_pct; /* fss active shr % in intvl */
+ uint64_t zone_fss_pri_hi; /* fss high pri this interval */
+ uint64_t zone_fss_pri_avg; /* fss avg pri this interval */
} zone_t;
/*