diff options
author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2011-06-07 13:09:49 -0700 |
---|---|---|
committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2011-06-07 13:09:49 -0700 |
commit | 4556ff31e9d24e3ee674f0c43f562f5dec112e2c (patch) | |
tree | f7b7ba8dfbc1471582b26633aca92dfa68cb666f | |
parent | 147d6cd9d5e4be94ebc983c3a343063500b1da48 (diff) | |
download | illumos-joyent-4556ff31e9d24e3ee674f0c43f562f5dec112e2c.tar.gz |
OS-424 want kstats for per-zone cpu usage
-rw-r--r-- | usr/src/uts/common/os/msacct.c | 36 | ||||
-rw-r--r-- | usr/src/uts/common/os/zone.c | 63 | ||||
-rw-r--r-- | usr/src/uts/common/sys/zone.h | 25 |
3 files changed, 121 insertions, 3 deletions
diff --git a/usr/src/uts/common/os/msacct.c b/usr/src/uts/common/os/msacct.c index df975eb7ee..c10dce81ca 100644 --- a/usr/src/uts/common/os/msacct.c +++ b/usr/src/uts/common/os/msacct.c @@ -33,6 +33,7 @@ #include <sys/debug.h> #include <sys/msacct.h> #include <sys/time.h> +#include <sys/zone.h> /* * Mega-theory block comment: @@ -390,6 +391,7 @@ void syscall_mstate(int fromms, int toms) { kthread_t *t = curthread; + zone_t *z = ttozone(t); struct mstate *ms; hrtime_t *mstimep; hrtime_t curtime; @@ -413,6 +415,10 @@ syscall_mstate(int fromms, int toms) newtime = curtime - ms->ms_state_start; } *mstimep += newtime; + if (fromms == LMS_USER) + atomic_add_64(&z->zone_utime, newtime); + else if (fromms == LMS_SYSTEM) + atomic_add_64(&z->zone_stime, newtime); t->t_mstate = toms; ms->ms_state_start = curtime; ms->ms_prev = fromms; @@ -602,7 +608,10 @@ new_mstate(kthread_t *t, int new_state) hrtime_t curtime; hrtime_t newtime; hrtime_t oldtime; + hrtime_t ztime; + hrtime_t origstart; klwp_t *lwp; + zone_t *z; ASSERT(new_state != LMS_WAIT_CPU); ASSERT((unsigned)new_state < NMSTATES); @@ -625,6 +634,7 @@ new_mstate(kthread_t *t, int new_state) ms = &lwp->lwp_mstate; state = t->t_mstate; + origstart = ms->ms_state_start; do { switch (state) { case LMS_TFAULT: @@ -637,7 +647,7 @@ new_mstate(kthread_t *t, int new_state) mstimep = &ms->ms_acct[state]; break; } - newtime = curtime - ms->ms_state_start; + ztime = newtime = curtime - ms->ms_state_start; if (newtime < 0) { curtime = gethrtime_unscaled(); oldtime = *mstimep - 1; /* force CAS to fail */ @@ -648,6 +658,20 @@ new_mstate(kthread_t *t, int new_state) t->t_mstate = new_state; ms->ms_state_start = curtime; } while (cas64((uint64_t *)mstimep, oldtime, newtime) != oldtime); + + /* + * When the system boots the initial startup thread will have a + * ms_state_start of 0 which would add a huge system time to the global + * zone. We want to skip aggregating that initial bit of work. + */ + if (origstart != 0) { + z = ttozone(t); + if (state == LMS_USER) + atomic_add_64(&z->zone_utime, ztime); + else if (state == LMS_SYSTEM) + atomic_add_64(&z->zone_stime, ztime); + } + /* * Remember the previous running microstate. */ @@ -686,6 +710,8 @@ restore_mstate(kthread_t *t) hrtime_t waitrq; hrtime_t newtime; hrtime_t oldtime; + hrtime_t waittime; + zone_t *z; /* * Don't call restore mstate of threads without lwps. (Kernel threads) @@ -756,11 +782,15 @@ restore_mstate(kthread_t *t) oldtime = *mstimep; newtime += oldtime; } while (cas64((uint64_t *)mstimep, oldtime, newtime) != oldtime); + /* * Update the WAIT_CPU timer and per-cpu waitrq total. */ - ms->ms_acct[LMS_WAIT_CPU] += (curtime - waitrq); - CPU->cpu_waitrq += (curtime - waitrq); + z = ttozone(t); + waittime = curtime - waitrq; + ms->ms_acct[LMS_WAIT_CPU] += waittime; + atomic_add_64(&z->zone_wtime, waittime); + CPU->cpu_waitrq += waittime; ms->ms_state_start = curtime; } diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index 5950f6ffb0..4b209e58cc 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -2117,6 +2117,59 @@ zone_mcap_kstat_create(zone_t *zone) return (ksp); } +static int +zone_misc_kstat_update(kstat_t *ksp, int rw) +{ + zone_t *zone = ksp->ks_private; + zone_misc_kstat_t *zmp = ksp->ks_data; + hrtime_t tmp; + + if (rw == KSTAT_WRITE) + return (EACCES); + + tmp = zone->zone_utime; + scalehrtime(&tmp); + zmp->zm_utime.value.ui64 = tmp; + tmp = zone->zone_stime; + scalehrtime(&tmp); + zmp->zm_stime.value.ui64 = tmp; + tmp = zone->zone_wtime; + scalehrtime(&tmp); + zmp->zm_wtime.value.ui64 = tmp; + + return (0); +} + +static kstat_t * +zone_misc_kstat_create(zone_t *zone) +{ + kstat_t *ksp; + zone_misc_kstat_t *zmp; + + if ((ksp = kstat_create_zone("zones", zone->zone_id, + zone->zone_name, "zone_misc", KSTAT_TYPE_NAMED, + sizeof (zone_misc_kstat_t) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL) + return (NULL); + + if (zone->zone_id != GLOBAL_ZONEID) + kstat_zone_add(ksp, GLOBAL_ZONEID); + + zmp = ksp->ks_data = kmem_zalloc(sizeof (zone_misc_kstat_t), KM_SLEEP); + ksp->ks_lock = &zone->zone_misc_lock; + zone->zone_misc_stats = zmp; + + kstat_named_init(&zmp->zm_utime, "nsec_user", KSTAT_DATA_UINT64); + kstat_named_init(&zmp->zm_stime, "nsec_sys", KSTAT_DATA_UINT64); + kstat_named_init(&zmp->zm_wtime, "nsec_waitrq", KSTAT_DATA_UINT64); + + ksp->ks_update = zone_misc_kstat_update; + ksp->ks_private = zone; + + kstat_install(ksp); + return (ksp); +} + static void zone_kstat_create(zone_t *zone) { @@ -2143,6 +2196,11 @@ zone_kstat_create(zone_t *zone) zone->zone_mcap_stats = kmem_zalloc( sizeof (zone_mcap_kstat_t), KM_SLEEP); } + + if ((zone->zone_misc_ksp = zone_misc_kstat_create(zone)) == NULL) { + zone->zone_misc_stats = kmem_zalloc( + sizeof (zone_misc_kstat_t), KM_SLEEP); + } } static void @@ -2176,6 +2234,8 @@ zone_kstat_delete(zone_t *zone) sizeof (zone_zfs_kstat_t)); zone_kstat_delete_common(&zone->zone_mcap_ksp, sizeof (zone_mcap_kstat_t)); + zone_kstat_delete_common(&zone->zone_misc_ksp, + sizeof (zone_misc_kstat_t)); } /* @@ -2237,6 +2297,9 @@ zone_zsd_init(void) zone0.zone_physmem_kstat = NULL; zone0.zone_nprocs_kstat = NULL; zone0.zone_zfs_io_pri = 1; + zone0.zone_stime = 0; + zone0.zone_utime = 0; + zone0.zone_wtime = 0; list_create(&zone0.zone_ref_list, sizeof (zone_ref_t), offsetof(zone_ref_t, zref_linkage)); diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 71fe145cff..0580293b3c 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -429,6 +429,12 @@ typedef struct { kstat_named_t zm_anon_alloc_fail; } zone_mcap_kstat_t; +typedef struct { + kstat_named_t zm_utime; + kstat_named_t zm_stime; + kstat_named_t zm_wtime; +} zone_misc_kstat_t; + typedef struct zone { /* * zone_name is never modified once set. @@ -634,6 +640,25 @@ typedef struct zone { uint64_t zone_execpgin; /* exec pages paged in */ uint64_t zone_fspgin; /* fs pages paged in */ uint64_t zone_anon_alloc_fail; /* cnt of anon alloc fails */ + + /* + * Misc. kstats and counters for zone cpu-usage aggregation. + * The zone_Xtime values are the sum of the micro-state accounting + * values for all threads that are running or have run in the zone. + * This is tracked in msacct.c as threads change state. + * The zone_stime is the sum of the LMS_SYSTEM times. + * The zone_utime is the sum of the LMS_USER times. + * The zone_wtime is the sum of the LMS_WAIT_CPU times. + * As with per-thread micro-state accounting values, these values are + * not scaled to nanosecs. The scaling is done by the + * zone_misc_kstat_update function when kstats are requested. + */ + kmutex_t zone_misc_lock; /* protects misc statistics */ + kstat_t *zone_misc_ksp; + zone_misc_kstat_t *zone_misc_stats; + uint64_t zone_stime; /* total system time */ + uint64_t zone_utime; /* total user time */ + uint64_t zone_wtime; /* total time waiting in runq */ } zone_t; /* |