diff options
| author | Bill Pijewski <wdp@joyent.com> | 2011-03-24 21:20:59 -0700 |
|---|---|---|
| committer | Bill Pijewski <wdp@joyent.com> | 2011-03-24 21:20:59 -0700 |
| commit | 278c9a7a00cbf7a53131a08ae5e86c28de7d2c8e (patch) | |
| tree | 231ce8ea3443d129669f448b27f791f389b67e65 /usr/src | |
| parent | 13152cb42116738a6f884f3c3b757186a33ab14b (diff) | |
| download | illumos-joyent-278c9a7a00cbf7a53131a08ae5e86c28de7d2c8e.tar.gz | |
OS-338 Kstat counters to show "slow" VFS operations
Diffstat (limited to 'usr/src')
| -rw-r--r-- | usr/src/cmd/stat/vfsstat/vfsstat.pl | 13 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/vnode.c | 64 | ||||
| -rw-r--r-- | usr/src/uts/common/os/zone.c | 100 | ||||
| -rw-r--r-- | usr/src/uts/common/sys/zone.h | 17 |
4 files changed, 158 insertions, 36 deletions
diff --git a/usr/src/cmd/stat/vfsstat/vfsstat.pl b/usr/src/cmd/stat/vfsstat/vfsstat.pl index 2fd135274c..ff02ef1d70 100644 --- a/usr/src/cmd/stat/vfsstat/vfsstat.pl +++ b/usr/src/cmd/stat/vfsstat/vfsstat.pl @@ -93,6 +93,7 @@ my $DATA_FMT = $USE_COMMA ? my $BYTES_PREFIX = $USE_MB ? "M" : "k"; my $BYTES_DIVISOR = $USE_MB ? 1024 * 1024 : 1024; my $INTERVAL_SUFFIX = $USE_INTERVAL ? "i" : "s"; +my $NANOSEC = 1000000000; my @fields = ( 'reads', 'writes', 'nread', 'nwritten', 'rtime', 'wtime', 'rlentime', 'wlentime', 'snaptime' ); @@ -179,16 +180,20 @@ sub print_stats { my $w_tps = ($data->{'writes'} - $old->{'writes'}) / $etime; # Calculate average length of active queue - my $r_actv = ($data->{'rlentime'} - $old->{'rlentime'}) / $etime; - my $w_actv = ($data->{'wlentime'} - $old->{'wlentime'}) / $etime; + my $r_actv = (($data->{'rlentime'} - $old->{'rlentime'}) / $NANOSEC) / + $etime; + my $w_actv = (($data->{'wlentime'} - $old->{'wlentime'}) / $NANOSEC) / + $etime; # Calculate average service time my $read_t = $r_tps > 0 ? $r_actv * (1000 / $r_tps) : 0.0; my $writ_t = $w_tps > 0 ? $w_actv * (1000 / $w_tps) : 0.0; # Calculate the % time the VFS layer is active - my $r_b_pct = (($data->{'rtime'} - $old->{'rtime'}) / $etime) * 100; - my $w_b_pct = (($data->{'wtime'} - $old->{'wtime'}) / $etime) * 100; + my $r_b_pct = ((($data->{'rtime'} - $old->{'rtime'}) / $NANOSEC) / + $etime) * 100; + my $w_b_pct = ((($data->{'wtime'} - $old->{'wtime'}) / $NANOSEC) / + $etime) * 100; if (! $HIDE_ZEROES || $reads != 0.0 || $writes != 0.0 || $nread != 0.0 || $nwritten != 0.0) { diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c index 3c31a3f241..8567a1a10e 100644 --- a/usr/src/uts/common/fs/vnode.c +++ b/usr/src/uts/common/fs/vnode.c @@ -200,6 +200,10 @@ static void (**vsd_destructor)(void *); cr = crgetmapped(cr); \ } +#define VOP_LATENCY_10MS 10000000 +#define VOP_LATENCY_100MS 100000000 +#define VOP_LATENCY_1S 1000000000 + /* * Convert stat(2) formats to vnode types and vice versa. (Knows about * numerical order of S_IFMT and vnode types.) @@ -3221,14 +3225,19 @@ fop_read( cred_t *cr, caller_context_t *ct) { - int err; ssize_t resid_start = uiop->uio_resid; zone_t *zonep = curzone; + zone_vfs_kstat_t *zvp = zonep->zone_vfs_stats; + + hrtime_t start, lat; ssize_t len; + int err; if (vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) { + start = gethrtime(); + mutex_enter(&zonep->zone_vfs_lock); - kstat_runq_enter(zonep->zone_vfs_kiop); + kstat_runq_enter(&zonep->zone_vfs_rwstats); mutex_exit(&zonep->zone_vfs_lock); } @@ -3241,10 +3250,25 @@ fop_read( if (vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) { mutex_enter(&zonep->zone_vfs_lock); - zonep->zone_vfs_kiop->reads++; - zonep->zone_vfs_kiop->nread += len; - kstat_runq_exit(zonep->zone_vfs_kiop); + zonep->zone_vfs_rwstats.reads++; + zonep->zone_vfs_rwstats.nread += len; + kstat_runq_exit(&zonep->zone_vfs_rwstats); mutex_exit(&zonep->zone_vfs_lock); + + lat = gethrtime() - start; + + if (lat >= VOP_LATENCY_10MS) { + if (lat < VOP_LATENCY_100MS) + atomic_inc_64(&zvp->zv_10ms_ops.value.ui64); + else if (lat < VOP_LATENCY_1S) { + atomic_inc_64(&zvp->zv_10ms_ops.value.ui64); + atomic_inc_64(&zvp->zv_100ms_ops.value.ui64); + } else { + atomic_inc_64(&zvp->zv_10ms_ops.value.ui64); + atomic_inc_64(&zvp->zv_100ms_ops.value.ui64); + atomic_inc_64(&zvp->zv_1s_ops.value.ui64); + } + } } return (err); @@ -3258,10 +3282,13 @@ fop_write( cred_t *cr, caller_context_t *ct) { - int err; ssize_t resid_start = uiop->uio_resid; zone_t *zonep = curzone; + zone_vfs_kstat_t *zvp = zonep->zone_vfs_stats; + + hrtime_t start, lat; ssize_t len; + int err; /* * For the purposes of VFS kstat consumers, the "waitq" calculation is @@ -3269,8 +3296,10 @@ fop_write( * actual wait queue for VFS operations. */ if (vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) { + start = gethrtime(); + mutex_enter(&zonep->zone_vfs_lock); - kstat_waitq_enter(zonep->zone_vfs_kiop); + kstat_waitq_enter(&zonep->zone_vfs_rwstats); mutex_exit(&zonep->zone_vfs_lock); } @@ -3283,10 +3312,25 @@ fop_write( if (vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) { mutex_enter(&zonep->zone_vfs_lock); - zonep->zone_vfs_kiop->writes++; - zonep->zone_vfs_kiop->nwritten += len; - kstat_waitq_exit(zonep->zone_vfs_kiop); + zonep->zone_vfs_rwstats.writes++; + zonep->zone_vfs_rwstats.nwritten += len; + kstat_waitq_exit(&zonep->zone_vfs_rwstats); mutex_exit(&zonep->zone_vfs_lock); + + lat = gethrtime() - start; + + if (lat >= VOP_LATENCY_10MS) { + if (lat < VOP_LATENCY_100MS) + atomic_inc_64(&zvp->zv_10ms_ops.value.ui64); + else if (lat < VOP_LATENCY_1S) { + atomic_inc_64(&zvp->zv_10ms_ops.value.ui64); + atomic_inc_64(&zvp->zv_100ms_ops.value.ui64); + } else { + atomic_inc_64(&zvp->zv_10ms_ops.value.ui64); + atomic_inc_64(&zvp->zv_100ms_ops.value.ui64); + atomic_inc_64(&zvp->zv_1s_ops.value.ui64); + } + } } return (err); diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index c6d9e1ee83..ca02229671 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -1861,6 +1861,80 @@ zone_rctl_kstat_create_common(zone_t *zone, char *name, return (ksp); } +static int +zone_vfs_kstat_update(kstat_t *ksp, int rw) +{ + zone_t *zone = ksp->ks_private; + zone_vfs_kstat_t *zvp = ksp->ks_data; + kstat_io_t *kiop = &zone->zone_vfs_rwstats; + + if (rw == KSTAT_WRITE) + return (EACCES); + + /* + * Extract the VFS statistics from the kstat_io_t structure used by + * kstat_runq_enter() and related functions. Since the slow ops + * counters are updated directly by the VFS layer, there's no need to + * copy those statistics here. + * + * Note that kstat_runq_enter() and the related functions use + * gethrtime_unscaled(), so scale the time here. + */ + zvp->zv_nread.value.ui64 = kiop->nread; + zvp->zv_reads.value.ui64 = kiop->reads; + zvp->zv_rtime.value.ui64 = kiop->rtime; + zvp->zv_rlentime.value.ui64 = kiop->rlentime; + zvp->zv_nwritten.value.ui64 = kiop->nwritten; + zvp->zv_writes.value.ui64 = kiop->writes; + zvp->zv_wtime.value.ui64 = kiop->wtime; + zvp->zv_wlentime.value.ui64 = kiop->wlentime; + + scalehrtime((hrtime_t *)&zvp->zv_rtime.value.ui64); + scalehrtime((hrtime_t *)&zvp->zv_rlentime.value.ui64); + scalehrtime((hrtime_t *)&zvp->zv_wtime.value.ui64); + scalehrtime((hrtime_t *)&zvp->zv_wlentime.value.ui64); + + return (0); +} + +static kstat_t * +zone_vfs_kstat_create(zone_t *zone) +{ + kstat_t *ksp; + zone_vfs_kstat_t *zvp; + + if ((ksp = kstat_create_zone("zone_vfs", zone->zone_id, + zone->zone_name, "zone_vfs", KSTAT_TYPE_NAMED, + sizeof (zone_vfs_kstat_t) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL) + return (NULL); + + if (zone->zone_id != GLOBAL_ZONEID) + kstat_zone_add(ksp, GLOBAL_ZONEID); + + zvp = ksp->ks_data = kmem_zalloc(sizeof (zone_vfs_kstat_t), KM_SLEEP); + ksp->ks_lock = &zone->zone_vfs_lock; + zone->zone_vfs_stats = zvp; + + kstat_named_init(&zvp->zv_nread, "nread", KSTAT_DATA_UINT64); + kstat_named_init(&zvp->zv_reads, "reads", KSTAT_DATA_UINT64); + kstat_named_init(&zvp->zv_rtime, "rtime", KSTAT_DATA_UINT64); + kstat_named_init(&zvp->zv_rlentime, "rlentime", KSTAT_DATA_UINT64); + kstat_named_init(&zvp->zv_nwritten, "nwritten", KSTAT_DATA_UINT64); + kstat_named_init(&zvp->zv_writes, "writes", KSTAT_DATA_UINT64); + kstat_named_init(&zvp->zv_wtime, "wtime", KSTAT_DATA_UINT64); + kstat_named_init(&zvp->zv_wlentime, "wlentime", KSTAT_DATA_UINT64); + kstat_named_init(&zvp->zv_10ms_ops, "10ms_ops", KSTAT_DATA_UINT64); + kstat_named_init(&zvp->zv_100ms_ops, "100ms_ops", KSTAT_DATA_UINT64); + kstat_named_init(&zvp->zv_1s_ops, "1s_ops", KSTAT_DATA_UINT64); + + ksp->ks_update = zone_vfs_kstat_update; + ksp->ks_private = zone; + + kstat_install(ksp); + return (ksp); +} + static kstat_t * zone_zfs_kstat_create(zone_t *zone) { @@ -1916,20 +1990,9 @@ zone_kstat_create(zone_t *zone) sizeof (kstat_io_t), KM_SLEEP); } - zone->zone_vfs_ksp = kstat_create_zone("zone_vfs", zone->zone_id, - zone->zone_name, "zone_vfs", KSTAT_TYPE_IO, 1, - KSTAT_FLAG_PERSISTENT, zone->zone_id); - - if (zone->zone_vfs_ksp != NULL) { - if (zone->zone_id != GLOBAL_ZONEID) - kstat_zone_add(zone->zone_vfs_ksp, GLOBAL_ZONEID); - - zone->zone_vfs_ksp->ks_lock = &zone->zone_vfs_lock; - kstat_install(zone->zone_vfs_ksp); - zone->zone_vfs_kiop = zone->zone_vfs_ksp->ks_data; - } else { - zone->zone_vfs_kiop = kmem_zalloc( - sizeof (kstat_io_t), KM_SLEEP); + if ((zone->zone_vfs_ksp = zone_vfs_kstat_create(zone)) == NULL) { + zone->zone_vfs_stats = kmem_zalloc( + sizeof (zone_vfs_kstat_t), KM_SLEEP); } if ((zone->zone_zfs_ksp = zone_zfs_kstat_create(zone)) == NULL) { @@ -1968,13 +2031,8 @@ zone_kstat_delete(zone_t *zone) kmem_free(zone->zone_io_kiop, sizeof (kstat_io_t)); } - if (zone->zone_vfs_ksp != NULL) { - kstat_delete(zone->zone_vfs_ksp); - zone->zone_vfs_ksp = NULL; - } else { - kmem_free(zone->zone_vfs_kiop, sizeof (kstat_io_t)); - } - + zone_kstat_delete_common(&zone->zone_vfs_ksp, + sizeof (zone_vfs_kstat_t)); zone_kstat_delete_common(&zone->zone_zfs_ksp, sizeof (zone_zfs_kstat_t)); } diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 8a82a6c1ad..4ab70fe624 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -388,6 +388,20 @@ typedef struct { } sys_zio_cntr_t; typedef struct { + kstat_named_t zv_nread; + kstat_named_t zv_reads; + kstat_named_t zv_rtime; + kstat_named_t zv_rlentime; + kstat_named_t zv_nwritten; + kstat_named_t zv_writes; + kstat_named_t zv_wtime; + kstat_named_t zv_wlentime; + kstat_named_t zv_10ms_ops; + kstat_named_t zv_100ms_ops; + kstat_named_t zv_1s_ops; +} zone_vfs_kstat_t; + +typedef struct { kstat_named_t zz_throttle_cnt; kstat_named_t zz_throttle_time; } zone_zfs_kstat_t; @@ -560,7 +574,8 @@ typedef struct zone { */ kmutex_t zone_vfs_lock; /* protects VFS statistics */ kstat_t *zone_vfs_ksp; - kstat_io_t *zone_vfs_kiop; + kstat_io_t zone_vfs_rwstats; + zone_vfs_kstat_t *zone_vfs_stats; /* * kstats for ZFS observability. |
