summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2011-06-07 13:09:49 -0700
committerJerry Jelinek <jerry.jelinek@joyent.com>2011-06-07 13:09:49 -0700
commit4556ff31e9d24e3ee674f0c43f562f5dec112e2c (patch)
treef7b7ba8dfbc1471582b26633aca92dfa68cb666f
parent147d6cd9d5e4be94ebc983c3a343063500b1da48 (diff)
downloadillumos-joyent-4556ff31e9d24e3ee674f0c43f562f5dec112e2c.tar.gz
OS-424 want kstats for per-zone cpu usage
-rw-r--r--usr/src/uts/common/os/msacct.c36
-rw-r--r--usr/src/uts/common/os/zone.c63
-rw-r--r--usr/src/uts/common/sys/zone.h25
3 files changed, 121 insertions, 3 deletions
diff --git a/usr/src/uts/common/os/msacct.c b/usr/src/uts/common/os/msacct.c
index df975eb7ee..c10dce81ca 100644
--- a/usr/src/uts/common/os/msacct.c
+++ b/usr/src/uts/common/os/msacct.c
@@ -33,6 +33,7 @@
#include <sys/debug.h>
#include <sys/msacct.h>
#include <sys/time.h>
+#include <sys/zone.h>
/*
* Mega-theory block comment:
@@ -390,6 +391,7 @@ void
syscall_mstate(int fromms, int toms)
{
kthread_t *t = curthread;
+ zone_t *z = ttozone(t);
struct mstate *ms;
hrtime_t *mstimep;
hrtime_t curtime;
@@ -413,6 +415,10 @@ syscall_mstate(int fromms, int toms)
newtime = curtime - ms->ms_state_start;
}
*mstimep += newtime;
+ if (fromms == LMS_USER)
+ atomic_add_64(&z->zone_utime, newtime);
+ else if (fromms == LMS_SYSTEM)
+ atomic_add_64(&z->zone_stime, newtime);
t->t_mstate = toms;
ms->ms_state_start = curtime;
ms->ms_prev = fromms;
@@ -602,7 +608,10 @@ new_mstate(kthread_t *t, int new_state)
hrtime_t curtime;
hrtime_t newtime;
hrtime_t oldtime;
+ hrtime_t ztime;
+ hrtime_t origstart;
klwp_t *lwp;
+ zone_t *z;
ASSERT(new_state != LMS_WAIT_CPU);
ASSERT((unsigned)new_state < NMSTATES);
@@ -625,6 +634,7 @@ new_mstate(kthread_t *t, int new_state)
ms = &lwp->lwp_mstate;
state = t->t_mstate;
+ origstart = ms->ms_state_start;
do {
switch (state) {
case LMS_TFAULT:
@@ -637,7 +647,7 @@ new_mstate(kthread_t *t, int new_state)
mstimep = &ms->ms_acct[state];
break;
}
- newtime = curtime - ms->ms_state_start;
+ ztime = newtime = curtime - ms->ms_state_start;
if (newtime < 0) {
curtime = gethrtime_unscaled();
oldtime = *mstimep - 1; /* force CAS to fail */
@@ -648,6 +658,20 @@ new_mstate(kthread_t *t, int new_state)
t->t_mstate = new_state;
ms->ms_state_start = curtime;
} while (cas64((uint64_t *)mstimep, oldtime, newtime) != oldtime);
+
+ /*
+ * When the system boots the initial startup thread will have a
+ * ms_state_start of 0 which would add a huge system time to the global
+ * zone. We want to skip aggregating that initial bit of work.
+ */
+ if (origstart != 0) {
+ z = ttozone(t);
+ if (state == LMS_USER)
+ atomic_add_64(&z->zone_utime, ztime);
+ else if (state == LMS_SYSTEM)
+ atomic_add_64(&z->zone_stime, ztime);
+ }
+
/*
* Remember the previous running microstate.
*/
@@ -686,6 +710,8 @@ restore_mstate(kthread_t *t)
hrtime_t waitrq;
hrtime_t newtime;
hrtime_t oldtime;
+ hrtime_t waittime;
+ zone_t *z;
/*
* Don't call restore mstate of threads without lwps. (Kernel threads)
@@ -756,11 +782,15 @@ restore_mstate(kthread_t *t)
oldtime = *mstimep;
newtime += oldtime;
} while (cas64((uint64_t *)mstimep, oldtime, newtime) != oldtime);
+
/*
* Update the WAIT_CPU timer and per-cpu waitrq total.
*/
- ms->ms_acct[LMS_WAIT_CPU] += (curtime - waitrq);
- CPU->cpu_waitrq += (curtime - waitrq);
+ z = ttozone(t);
+ waittime = curtime - waitrq;
+ ms->ms_acct[LMS_WAIT_CPU] += waittime;
+ atomic_add_64(&z->zone_wtime, waittime);
+ CPU->cpu_waitrq += waittime;
ms->ms_state_start = curtime;
}
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index 5950f6ffb0..4b209e58cc 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -2117,6 +2117,59 @@ zone_mcap_kstat_create(zone_t *zone)
return (ksp);
}
+static int
+zone_misc_kstat_update(kstat_t *ksp, int rw)
+{
+ zone_t *zone = ksp->ks_private;
+ zone_misc_kstat_t *zmp = ksp->ks_data;
+ hrtime_t tmp;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ tmp = zone->zone_utime;
+ scalehrtime(&tmp);
+ zmp->zm_utime.value.ui64 = tmp;
+ tmp = zone->zone_stime;
+ scalehrtime(&tmp);
+ zmp->zm_stime.value.ui64 = tmp;
+ tmp = zone->zone_wtime;
+ scalehrtime(&tmp);
+ zmp->zm_wtime.value.ui64 = tmp;
+
+ return (0);
+}
+
+static kstat_t *
+zone_misc_kstat_create(zone_t *zone)
+{
+ kstat_t *ksp;
+ zone_misc_kstat_t *zmp;
+
+ if ((ksp = kstat_create_zone("zones", zone->zone_id,
+ zone->zone_name, "zone_misc", KSTAT_TYPE_NAMED,
+ sizeof (zone_misc_kstat_t) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL)
+ return (NULL);
+
+ if (zone->zone_id != GLOBAL_ZONEID)
+ kstat_zone_add(ksp, GLOBAL_ZONEID);
+
+ zmp = ksp->ks_data = kmem_zalloc(sizeof (zone_misc_kstat_t), KM_SLEEP);
+ ksp->ks_lock = &zone->zone_misc_lock;
+ zone->zone_misc_stats = zmp;
+
+ kstat_named_init(&zmp->zm_utime, "nsec_user", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_stime, "nsec_sys", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_wtime, "nsec_waitrq", KSTAT_DATA_UINT64);
+
+ ksp->ks_update = zone_misc_kstat_update;
+ ksp->ks_private = zone;
+
+ kstat_install(ksp);
+ return (ksp);
+}
+
static void
zone_kstat_create(zone_t *zone)
{
@@ -2143,6 +2196,11 @@ zone_kstat_create(zone_t *zone)
zone->zone_mcap_stats = kmem_zalloc(
sizeof (zone_mcap_kstat_t), KM_SLEEP);
}
+
+ if ((zone->zone_misc_ksp = zone_misc_kstat_create(zone)) == NULL) {
+ zone->zone_misc_stats = kmem_zalloc(
+ sizeof (zone_misc_kstat_t), KM_SLEEP);
+ }
}
static void
@@ -2176,6 +2234,8 @@ zone_kstat_delete(zone_t *zone)
sizeof (zone_zfs_kstat_t));
zone_kstat_delete_common(&zone->zone_mcap_ksp,
sizeof (zone_mcap_kstat_t));
+ zone_kstat_delete_common(&zone->zone_misc_ksp,
+ sizeof (zone_misc_kstat_t));
}
/*
@@ -2237,6 +2297,9 @@ zone_zsd_init(void)
zone0.zone_physmem_kstat = NULL;
zone0.zone_nprocs_kstat = NULL;
zone0.zone_zfs_io_pri = 1;
+ zone0.zone_stime = 0;
+ zone0.zone_utime = 0;
+ zone0.zone_wtime = 0;
list_create(&zone0.zone_ref_list, sizeof (zone_ref_t),
offsetof(zone_ref_t, zref_linkage));
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index 71fe145cff..0580293b3c 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -429,6 +429,12 @@ typedef struct {
kstat_named_t zm_anon_alloc_fail;
} zone_mcap_kstat_t;
+typedef struct {
+ kstat_named_t zm_utime;
+ kstat_named_t zm_stime;
+ kstat_named_t zm_wtime;
+} zone_misc_kstat_t;
+
typedef struct zone {
/*
* zone_name is never modified once set.
@@ -634,6 +640,25 @@ typedef struct zone {
uint64_t zone_execpgin; /* exec pages paged in */
uint64_t zone_fspgin; /* fs pages paged in */
uint64_t zone_anon_alloc_fail; /* cnt of anon alloc fails */
+
+ /*
+ * Misc. kstats and counters for zone cpu-usage aggregation.
+ * The zone_Xtime values are the sum of the micro-state accounting
+ * values for all threads that are running or have run in the zone.
+ * This is tracked in msacct.c as threads change state.
+ * The zone_stime is the sum of the LMS_SYSTEM times.
+ * The zone_utime is the sum of the LMS_USER times.
+ * The zone_wtime is the sum of the LMS_WAIT_CPU times.
+ * As with per-thread micro-state accounting values, these values are
+ * not scaled to nanosecs. The scaling is done by the
+ * zone_misc_kstat_update function when kstats are requested.
+ */
+ kmutex_t zone_misc_lock; /* protects misc statistics */
+ kstat_t *zone_misc_ksp;
+ zone_misc_kstat_t *zone_misc_stats;
+ uint64_t zone_stime; /* total system time */
+ uint64_t zone_utime; /* total user time */
+ uint64_t zone_wtime; /* total time waiting in runq */
} zone_t;
/*