summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorBill Pijewski <wdp@joyent.com>2011-03-24 21:20:59 -0700
committerBill Pijewski <wdp@joyent.com>2011-03-24 21:20:59 -0700
commit278c9a7a00cbf7a53131a08ae5e86c28de7d2c8e (patch)
tree231ce8ea3443d129669f448b27f791f389b67e65 /usr/src
parent13152cb42116738a6f884f3c3b757186a33ab14b (diff)
downloadillumos-joyent-278c9a7a00cbf7a53131a08ae5e86c28de7d2c8e.tar.gz
OS-338 Kstat counters to show "slow" VFS operations
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/stat/vfsstat/vfsstat.pl13
-rw-r--r--usr/src/uts/common/fs/vnode.c64
-rw-r--r--usr/src/uts/common/os/zone.c100
-rw-r--r--usr/src/uts/common/sys/zone.h17
4 files changed, 158 insertions, 36 deletions
diff --git a/usr/src/cmd/stat/vfsstat/vfsstat.pl b/usr/src/cmd/stat/vfsstat/vfsstat.pl
index 2fd135274c..ff02ef1d70 100644
--- a/usr/src/cmd/stat/vfsstat/vfsstat.pl
+++ b/usr/src/cmd/stat/vfsstat/vfsstat.pl
@@ -93,6 +93,7 @@ my $DATA_FMT = $USE_COMMA ?
my $BYTES_PREFIX = $USE_MB ? "M" : "k";
my $BYTES_DIVISOR = $USE_MB ? 1024 * 1024 : 1024;
my $INTERVAL_SUFFIX = $USE_INTERVAL ? "i" : "s";
+my $NANOSEC = 1000000000;
my @fields = ( 'reads', 'writes', 'nread', 'nwritten', 'rtime', 'wtime',
'rlentime', 'wlentime', 'snaptime' );
@@ -179,16 +180,20 @@ sub print_stats {
my $w_tps = ($data->{'writes'} - $old->{'writes'}) / $etime;
# Calculate average length of active queue
- my $r_actv = ($data->{'rlentime'} - $old->{'rlentime'}) / $etime;
- my $w_actv = ($data->{'wlentime'} - $old->{'wlentime'}) / $etime;
+ my $r_actv = (($data->{'rlentime'} - $old->{'rlentime'}) / $NANOSEC) /
+ $etime;
+ my $w_actv = (($data->{'wlentime'} - $old->{'wlentime'}) / $NANOSEC) /
+ $etime;
# Calculate average service time
my $read_t = $r_tps > 0 ? $r_actv * (1000 / $r_tps) : 0.0;
my $writ_t = $w_tps > 0 ? $w_actv * (1000 / $w_tps) : 0.0;
# Calculate the % time the VFS layer is active
- my $r_b_pct = (($data->{'rtime'} - $old->{'rtime'}) / $etime) * 100;
- my $w_b_pct = (($data->{'wtime'} - $old->{'wtime'}) / $etime) * 100;
+ my $r_b_pct = ((($data->{'rtime'} - $old->{'rtime'}) / $NANOSEC) /
+ $etime) * 100;
+ my $w_b_pct = ((($data->{'wtime'} - $old->{'wtime'}) / $NANOSEC) /
+ $etime) * 100;
if (! $HIDE_ZEROES || $reads != 0.0 || $writes != 0.0 ||
$nread != 0.0 || $nwritten != 0.0) {
diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c
index 3c31a3f241..8567a1a10e 100644
--- a/usr/src/uts/common/fs/vnode.c
+++ b/usr/src/uts/common/fs/vnode.c
@@ -200,6 +200,10 @@ static void (**vsd_destructor)(void *);
cr = crgetmapped(cr); \
}
+#define VOP_LATENCY_10MS 10000000
+#define VOP_LATENCY_100MS 100000000
+#define VOP_LATENCY_1S 1000000000
+
/*
* Convert stat(2) formats to vnode types and vice versa. (Knows about
* numerical order of S_IFMT and vnode types.)
@@ -3221,14 +3225,19 @@ fop_read(
cred_t *cr,
caller_context_t *ct)
{
- int err;
ssize_t resid_start = uiop->uio_resid;
zone_t *zonep = curzone;
+ zone_vfs_kstat_t *zvp = zonep->zone_vfs_stats;
+
+ hrtime_t start, lat;
ssize_t len;
+ int err;
if (vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) {
+ start = gethrtime();
+
mutex_enter(&zonep->zone_vfs_lock);
- kstat_runq_enter(zonep->zone_vfs_kiop);
+ kstat_runq_enter(&zonep->zone_vfs_rwstats);
mutex_exit(&zonep->zone_vfs_lock);
}
@@ -3241,10 +3250,25 @@ fop_read(
if (vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) {
mutex_enter(&zonep->zone_vfs_lock);
- zonep->zone_vfs_kiop->reads++;
- zonep->zone_vfs_kiop->nread += len;
- kstat_runq_exit(zonep->zone_vfs_kiop);
+ zonep->zone_vfs_rwstats.reads++;
+ zonep->zone_vfs_rwstats.nread += len;
+ kstat_runq_exit(&zonep->zone_vfs_rwstats);
mutex_exit(&zonep->zone_vfs_lock);
+
+ lat = gethrtime() - start;
+
+ if (lat >= VOP_LATENCY_10MS) {
+ if (lat < VOP_LATENCY_100MS)
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ else if (lat < VOP_LATENCY_1S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ } else {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ }
+ }
}
return (err);
@@ -3258,10 +3282,13 @@ fop_write(
cred_t *cr,
caller_context_t *ct)
{
- int err;
ssize_t resid_start = uiop->uio_resid;
zone_t *zonep = curzone;
+ zone_vfs_kstat_t *zvp = zonep->zone_vfs_stats;
+
+ hrtime_t start, lat;
ssize_t len;
+ int err;
/*
* For the purposes of VFS kstat consumers, the "waitq" calculation is
@@ -3269,8 +3296,10 @@ fop_write(
* actual wait queue for VFS operations.
*/
if (vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) {
+ start = gethrtime();
+
mutex_enter(&zonep->zone_vfs_lock);
- kstat_waitq_enter(zonep->zone_vfs_kiop);
+ kstat_waitq_enter(&zonep->zone_vfs_rwstats);
mutex_exit(&zonep->zone_vfs_lock);
}
@@ -3283,10 +3312,25 @@ fop_write(
if (vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) {
mutex_enter(&zonep->zone_vfs_lock);
- zonep->zone_vfs_kiop->writes++;
- zonep->zone_vfs_kiop->nwritten += len;
- kstat_waitq_exit(zonep->zone_vfs_kiop);
+ zonep->zone_vfs_rwstats.writes++;
+ zonep->zone_vfs_rwstats.nwritten += len;
+ kstat_waitq_exit(&zonep->zone_vfs_rwstats);
mutex_exit(&zonep->zone_vfs_lock);
+
+ lat = gethrtime() - start;
+
+ if (lat >= VOP_LATENCY_10MS) {
+ if (lat < VOP_LATENCY_100MS)
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ else if (lat < VOP_LATENCY_1S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ } else {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ }
+ }
}
return (err);
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index c6d9e1ee83..ca02229671 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -1861,6 +1861,80 @@ zone_rctl_kstat_create_common(zone_t *zone, char *name,
return (ksp);
}
+static int
+zone_vfs_kstat_update(kstat_t *ksp, int rw)
+{
+ zone_t *zone = ksp->ks_private;
+ zone_vfs_kstat_t *zvp = ksp->ks_data;
+ kstat_io_t *kiop = &zone->zone_vfs_rwstats;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ /*
+ * Extract the VFS statistics from the kstat_io_t structure used by
+ * kstat_runq_enter() and related functions. Since the slow ops
+ * counters are updated directly by the VFS layer, there's no need to
+ * copy those statistics here.
+ *
+ * Note that kstat_runq_enter() and the related functions use
+ * gethrtime_unscaled(), so scale the time here.
+ */
+ zvp->zv_nread.value.ui64 = kiop->nread;
+ zvp->zv_reads.value.ui64 = kiop->reads;
+ zvp->zv_rtime.value.ui64 = kiop->rtime;
+ zvp->zv_rlentime.value.ui64 = kiop->rlentime;
+ zvp->zv_nwritten.value.ui64 = kiop->nwritten;
+ zvp->zv_writes.value.ui64 = kiop->writes;
+ zvp->zv_wtime.value.ui64 = kiop->wtime;
+ zvp->zv_wlentime.value.ui64 = kiop->wlentime;
+
+ scalehrtime((hrtime_t *)&zvp->zv_rtime.value.ui64);
+ scalehrtime((hrtime_t *)&zvp->zv_rlentime.value.ui64);
+ scalehrtime((hrtime_t *)&zvp->zv_wtime.value.ui64);
+ scalehrtime((hrtime_t *)&zvp->zv_wlentime.value.ui64);
+
+ return (0);
+}
+
+static kstat_t *
+zone_vfs_kstat_create(zone_t *zone)
+{
+ kstat_t *ksp;
+ zone_vfs_kstat_t *zvp;
+
+ if ((ksp = kstat_create_zone("zone_vfs", zone->zone_id,
+ zone->zone_name, "zone_vfs", KSTAT_TYPE_NAMED,
+ sizeof (zone_vfs_kstat_t) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL)
+ return (NULL);
+
+ if (zone->zone_id != GLOBAL_ZONEID)
+ kstat_zone_add(ksp, GLOBAL_ZONEID);
+
+ zvp = ksp->ks_data = kmem_zalloc(sizeof (zone_vfs_kstat_t), KM_SLEEP);
+ ksp->ks_lock = &zone->zone_vfs_lock;
+ zone->zone_vfs_stats = zvp;
+
+ kstat_named_init(&zvp->zv_nread, "nread", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_reads, "reads", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_rtime, "rtime", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_rlentime, "rlentime", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_nwritten, "nwritten", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_writes, "writes", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_wtime, "wtime", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_wlentime, "wlentime", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_10ms_ops, "10ms_ops", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_100ms_ops, "100ms_ops", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_1s_ops, "1s_ops", KSTAT_DATA_UINT64);
+
+ ksp->ks_update = zone_vfs_kstat_update;
+ ksp->ks_private = zone;
+
+ kstat_install(ksp);
+ return (ksp);
+}
+
static kstat_t *
zone_zfs_kstat_create(zone_t *zone)
{
@@ -1916,20 +1990,9 @@ zone_kstat_create(zone_t *zone)
sizeof (kstat_io_t), KM_SLEEP);
}
- zone->zone_vfs_ksp = kstat_create_zone("zone_vfs", zone->zone_id,
- zone->zone_name, "zone_vfs", KSTAT_TYPE_IO, 1,
- KSTAT_FLAG_PERSISTENT, zone->zone_id);
-
- if (zone->zone_vfs_ksp != NULL) {
- if (zone->zone_id != GLOBAL_ZONEID)
- kstat_zone_add(zone->zone_vfs_ksp, GLOBAL_ZONEID);
-
- zone->zone_vfs_ksp->ks_lock = &zone->zone_vfs_lock;
- kstat_install(zone->zone_vfs_ksp);
- zone->zone_vfs_kiop = zone->zone_vfs_ksp->ks_data;
- } else {
- zone->zone_vfs_kiop = kmem_zalloc(
- sizeof (kstat_io_t), KM_SLEEP);
+ if ((zone->zone_vfs_ksp = zone_vfs_kstat_create(zone)) == NULL) {
+ zone->zone_vfs_stats = kmem_zalloc(
+ sizeof (zone_vfs_kstat_t), KM_SLEEP);
}
if ((zone->zone_zfs_ksp = zone_zfs_kstat_create(zone)) == NULL) {
@@ -1968,13 +2031,8 @@ zone_kstat_delete(zone_t *zone)
kmem_free(zone->zone_io_kiop, sizeof (kstat_io_t));
}
- if (zone->zone_vfs_ksp != NULL) {
- kstat_delete(zone->zone_vfs_ksp);
- zone->zone_vfs_ksp = NULL;
- } else {
- kmem_free(zone->zone_vfs_kiop, sizeof (kstat_io_t));
- }
-
+ zone_kstat_delete_common(&zone->zone_vfs_ksp,
+ sizeof (zone_vfs_kstat_t));
zone_kstat_delete_common(&zone->zone_zfs_ksp,
sizeof (zone_zfs_kstat_t));
}
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index 8a82a6c1ad..4ab70fe624 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -388,6 +388,20 @@ typedef struct {
} sys_zio_cntr_t;
typedef struct {
+ kstat_named_t zv_nread;
+ kstat_named_t zv_reads;
+ kstat_named_t zv_rtime;
+ kstat_named_t zv_rlentime;
+ kstat_named_t zv_nwritten;
+ kstat_named_t zv_writes;
+ kstat_named_t zv_wtime;
+ kstat_named_t zv_wlentime;
+ kstat_named_t zv_10ms_ops;
+ kstat_named_t zv_100ms_ops;
+ kstat_named_t zv_1s_ops;
+} zone_vfs_kstat_t;
+
+typedef struct {
kstat_named_t zz_throttle_cnt;
kstat_named_t zz_throttle_time;
} zone_zfs_kstat_t;
@@ -560,7 +574,8 @@ typedef struct zone {
*/
kmutex_t zone_vfs_lock; /* protects VFS statistics */
kstat_t *zone_vfs_ksp;
- kstat_io_t *zone_vfs_kiop;
+ kstat_io_t zone_vfs_rwstats;
+ zone_vfs_kstat_t *zone_vfs_stats;
/*
* kstats for ZFS observability.