diff options
author | Bill Pijewski <wdp@joyent.com> | 2010-12-21 10:28:26 -0800 |
---|---|---|
committer | Bill Pijewski <wdp@joyent.com> | 2010-12-21 10:28:26 -0800 |
commit | d4d1e080debc944cf51a057953dd796148700c55 (patch) | |
tree | 817cb3114b62a34fe125d86ec4e26df45b03255c /usr/src | |
parent | 85afc9464cce4c9bd48cfc9e184edc0dc31f21bd (diff) | |
download | illumos-joyent-d4d1e080debc944cf51a057953dd796148700c55.tar.gz |
OS-67 Want kstats for per-zone logical and physical IO bytes and operations
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/fs/zfs/dmu.c | 6 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/dmu_tx.c | 2 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/zfs_zone.h | 2 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zfs_zone.c | 43 | ||||
-rw-r--r-- | usr/src/uts/common/os/zone.c | 80 | ||||
-rw-r--r-- | usr/src/uts/common/sys/zone.h | 34 |
6 files changed, 114 insertions, 53 deletions
diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c index ec4d00585e..089c331200 100644 --- a/usr/src/uts/common/fs/zfs/dmu.c +++ b/usr/src/uts/common/fs/zfs/dmu.c @@ -959,7 +959,11 @@ dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) int numbufs, i, err; xuio_t *xuio = NULL; - zfs_zone_io_throttle(ZFS_ZONE_IOP_READ); + /* + * XXX There's a bug here in that I think the reader zone could be + * throttled even if all the reads are coming from the ARC. + */ + zfs_zone_io_throttle(ZFS_ZONE_IOP_READ, size); /* * NB: we could do this block-at-a-time, but it's nice diff --git a/usr/src/uts/common/fs/zfs/dmu_tx.c b/usr/src/uts/common/fs/zfs/dmu_tx.c index 3492fd5b12..75aa3cb9cb 100644 --- a/usr/src/uts/common/fs/zfs/dmu_tx.c +++ b/usr/src/uts/common/fs/zfs/dmu_tx.c @@ -220,7 +220,7 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) if (len == 0) return; - zfs_zone_io_throttle(ZFS_ZONE_IOP_LOGICAL_WRITE); + zfs_zone_io_throttle(ZFS_ZONE_IOP_LOGICAL_WRITE, len); min_bs = SPA_MINBLOCKSHIFT; max_bs = SPA_MAXBLOCKSHIFT; diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_zone.h b/usr/src/uts/common/fs/zfs/sys/zfs_zone.h index b9f32ef710..5c8ae4bbe9 100644 --- a/usr/src/uts/common/fs/zfs/sys/zfs_zone.h +++ b/usr/src/uts/common/fs/zfs/sys/zfs_zone.h @@ -42,7 +42,7 @@ typedef enum { ZFS_ZONE_IOP_LOGICAL_WRITE, } zfs_zone_iop_type_t; -extern void zfs_zone_io_throttle(zfs_zone_iop_type_t); +extern void zfs_zone_io_throttle(zfs_zone_iop_type_t, uint64_t); extern void zfs_zone_zio_init(zio_t *); extern void zfs_zone_zio_start(zio_t *); diff --git a/usr/src/uts/common/fs/zfs/zfs_zone.c b/usr/src/uts/common/fs/zfs/zfs_zone.c index d8501c8a8e..2086eb2d3a 100644 --- a/usr/src/uts/common/fs/zfs/zfs_zone.c +++ b/usr/src/uts/common/fs/zfs/zfs_zone.c @@ -33,7 +33,7 @@ */ void -zfs_zone_io_throttle(zfs_zone_iop_type_t type) +zfs_zone_io_throttle(zfs_zone_iop_type_t type, uint64_t size) { } @@ -561,8 +561,9 @@ zfs_zone_zio_init(zio_t *zp) } /* - * Called from dmu_tx_count_write when a write op goes into a transaction - * group (TXG). Increment our counter for logical zone write ops. + * Track IO operations per zone. Called from dmu_tx_count_write for write ops + * and dmu_read_uio for read ops. For each operation, increment that zone's + * counter based on the type of operation. * * There are three basic ways that we can see write ops: * 1) An application does write syscalls. Those ops go into a TXG which @@ -591,30 +592,39 @@ zfs_zone_zio_init(zio_t *zp) * Without this, it can look like a non-global zone never writes (case 1). * Depending on when the TXG is flushed, the counts may be in the same sample * bucket or in a different one. + * + * Tracking read operations is simpler due to their synchronous semantics. The + * zfs_read function -- called as a result of a read(2) syscall -- will always + * retrieve the data to be read through dmu_read_uio. */ void -zfs_zone_io_throttle(zfs_zone_iop_type_t type) +zfs_zone_io_throttle(zfs_zone_iop_type_t type, uint64_t size) { hrtime_t now; uint16_t wait; zone_t *zonep = curzone; - if (!zfs_zone_delay_enable) - return; - now = GET_USEC_TIME; /* - * Only bump the kstat for logical writes here. The kstats tracking - * reads and physical writes are bumped in zfs_zone_zio_done. + * Only bump the counters for logical operations here. The counters for + * tracking physical IO operations are handled in zfs_zone_zio_done. */ if (type == ZFS_ZONE_IOP_LOGICAL_WRITE) { mutex_enter(&zonep->zone_stg_io_lock); - zonep->zone_iops_lwrite++; add_iop(zonep, now, type, 0); mutex_exit(&zonep->zone_stg_io_lock); + + atomic_add_64(&zonep->zone_io_logwrite_ops, 1); + atomic_add_64(&zonep->zone_io_logwrite_bytes, size); + } else { + atomic_add_64(&zonep->zone_io_logread_ops, 1); + atomic_add_64(&zonep->zone_io_logread_bytes, size); } + if (!zfs_zone_delay_enable) + return; + /* * XXX There's a potential race here in that more than one thread may * update the zone delays concurrently. The worst outcome is corruption @@ -680,17 +690,18 @@ zfs_zone_zio_done(zio_t *zp) diff = now - zp->io_start; mutex_enter(&zonep->zone_stg_io_lock); + add_iop(zonep, now, zp->io_type == ZIO_TYPE_READ ? + ZFS_ZONE_IOP_READ : ZFS_ZONE_IOP_WRITE, diff); + mutex_exit(&zonep->zone_stg_io_lock); if (zp->io_type == ZIO_TYPE_READ) { - zonep->zone_iops_read++; - add_iop(zonep, now, ZFS_ZONE_IOP_READ, diff); + atomic_add_64(&zonep->zone_io_phyread_ops, 1); + atomic_add_64(&zonep->zone_io_phyread_bytes, zp->io_size); } else { - zonep->zone_iops_write++; - add_iop(zonep, now, ZFS_ZONE_IOP_WRITE, diff); + atomic_add_64(&zonep->zone_io_phywrite_ops, 1); + atomic_add_64(&zonep->zone_io_phywrite_bytes, zp->io_size); } - mutex_exit(&zonep->zone_stg_io_lock); - zone_rele(zonep); /* diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index a551c4e4bb..6f2730da74 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -1834,25 +1834,31 @@ zone_swapresv_kstat_update(kstat_t *ksp, int rw) } static int -zone_perf_kstat_update(kstat_t *ksp, int rw) +zone_io_kstat_update(kstat_t *ksp, int rw) { zone_t *zone = ksp->ks_private; - zone_perf_kstat_t *zk = ksp->ks_data; + zone_io_kstat_t *zk = ksp->ks_data; if (rw == KSTAT_WRITE) return (EACCES); - zk->zk_read_iops.value.ui64 = zone->zone_iops_read; - zk->zk_write_iops.value.ui64 = zone->zone_iops_write; - zk->zk_lwrite_iops.value.ui64 = zone->zone_iops_lwrite; + zk->zk_phyread_ops.value.ui64 = zone->zone_io_phyread_ops; + zk->zk_logread_ops.value.ui64 = zone->zone_io_logread_ops; + zk->zk_phywrite_ops.value.ui64 = zone->zone_io_phywrite_ops; + zk->zk_logwrite_ops.value.ui64 = zone->zone_io_logwrite_ops; + zk->zk_phyread_bytes.value.ui64 = zone->zone_io_phyread_bytes; + zk->zk_logread_bytes.value.ui64 = zone->zone_io_logread_bytes; + zk->zk_phywrite_bytes.value.ui64 = zone->zone_io_phywrite_bytes; + zk->zk_logwrite_bytes.value.ui64 = zone->zone_io_logwrite_bytes; + return (0); } static kstat_t * -zone_perf_kstat_create(zone_t *zone, int (*updatefunc) (kstat_t *, int)) +zone_io_kstat_create(zone_t *zone, int (*updatefunc) (kstat_t *, int)) { kstat_t *ksp; - zone_perf_kstat_t *zk; + zone_io_kstat_t *zk; char nm[KSTAT_STRLEN]; (void) snprintf(nm, KSTAT_STRLEN, "zone_%d", zone->zone_id); @@ -1860,7 +1866,7 @@ zone_perf_kstat_create(zone_t *zone, int (*updatefunc) (kstat_t *, int)) /* module, instance, name, class, type, ndata, flags, zoneid */ ksp = kstat_create_zone("zones", zone->zone_id, nm, "zone_stats", KSTAT_TYPE_NAMED, - sizeof (zone_perf_kstat_t) / sizeof (kstat_named_t), + sizeof (zone_io_kstat_t) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL, zone->zone_id); if (ksp == NULL) @@ -1869,14 +1875,30 @@ zone_perf_kstat_create(zone_t *zone, int (*updatefunc) (kstat_t *, int)) if (zone->zone_id != GLOBAL_ZONEID) kstat_zone_add(ksp, GLOBAL_ZONEID); - zk = ksp->ks_data = kmem_alloc(sizeof (zone_perf_kstat_t), KM_SLEEP); + zk = ksp->ks_data = kmem_alloc(sizeof (zone_io_kstat_t), KM_SLEEP); + ksp->ks_data_size += strlen(zone->zone_name) + 1; kstat_named_init(&zk->zk_zonename, "zonename", KSTAT_DATA_STRING); kstat_named_setstr(&zk->zk_zonename, zone->zone_name); - kstat_named_init(&zk->zk_read_iops, "read_iops", KSTAT_DATA_UINT64); - kstat_named_init(&zk->zk_write_iops, "write_iops", KSTAT_DATA_UINT64); - kstat_named_init(&zk->zk_lwrite_iops, "logical_write_iops", + + kstat_named_init(&zk->zk_phyread_ops, "io_physical_read_ops", + KSTAT_DATA_UINT64); + kstat_named_init(&zk->zk_logread_ops, "io_logical_read_ops", + KSTAT_DATA_UINT64); + kstat_named_init(&zk->zk_phywrite_ops, "io_physical_write_ops", + KSTAT_DATA_UINT64); + kstat_named_init(&zk->zk_logwrite_ops, "io_logical_write_ops", KSTAT_DATA_UINT64); + + kstat_named_init(&zk->zk_phyread_bytes, "io_physical_read_bytes", + KSTAT_DATA_UINT64); + kstat_named_init(&zk->zk_logread_bytes, "io_logical_read_bytes", + KSTAT_DATA_UINT64); + kstat_named_init(&zk->zk_phywrite_bytes, "io_physical_write_bytes", + KSTAT_DATA_UINT64); + kstat_named_init(&zk->zk_logwrite_bytes, "io_logical_write_bytes", + KSTAT_DATA_UINT64); + ksp->ks_update = updatefunc; ksp->ks_private = zone; kstat_install(ksp); @@ -1919,19 +1941,19 @@ zone_kstat_create(zone_t *zone) zone->zone_nprocs_kstat = zone_rctl_kstat_create_common(zone, "nprocs", zone_nprocs_kstat_update); - zone->zone_perf_kstat = zone_perf_kstat_create(zone, - zone_perf_kstat_update); + zone->zone_io_kstat = zone_io_kstat_create(zone, + zone_io_kstat_update); } static void -zone_perf_kstat_delete(kstat_t **pkstat) +zone_io_kstat_delete(kstat_t **pkstat) { void *data; if (*pkstat != NULL) { data = (*pkstat)->ks_data; kstat_delete(*pkstat); - kmem_free(data, sizeof (zone_perf_kstat_t)); + kmem_free(data, sizeof (zone_io_kstat_t)); *pkstat = NULL; } } @@ -1956,7 +1978,7 @@ zone_kstat_delete(zone_t *zone) zone_kstat_delete_common(&zone->zone_swapresv_kstat); zone_kstat_delete_common(&zone->zone_nprocs_kstat); - zone_perf_kstat_delete(&zone->zone_perf_kstat); + zone_io_kstat_delete(&zone->zone_io_kstat); } /* @@ -2015,10 +2037,15 @@ zone_zsd_init(void) zone0.zone_swapresv_kstat = NULL; zone0.zone_nprocs_kstat = NULL; zone0.zone_zfs_io_share = 1; - zone0.zone_iops_read = 0; - zone0.zone_iops_write = 0; - zone0.zone_iops_lwrite = 0; - zone0.zone_perf_kstat = NULL; + zone0.zone_io_kstat = NULL; + zone0.zone_io_phyread_ops = 0; + zone0.zone_io_phywrite_ops = 0; + zone0.zone_io_logread_ops = 0; + zone0.zone_io_logwrite_ops = 0; + zone0.zone_io_phyread_bytes = 0; + zone0.zone_io_phywrite_bytes = 0; + zone0.zone_io_logread_bytes = 0; + zone0.zone_io_logwrite_bytes = 0; list_create(&zone0.zone_ref_list, sizeof (zone_ref_t), offsetof(zone_ref_t, zref_linkage)); list_create(&zone0.zone_zsd, sizeof (struct zsd_entry), @@ -4295,9 +4322,14 @@ zone_create(const char *zone_name, const char *zone_root, zone->zone_lockedmem_kstat = NULL; zone->zone_swapresv_kstat = NULL; zone->zone_zfs_io_share= 1; - zone->zone_iops_read = 0; - zone->zone_iops_write = 0; - zone->zone_iops_lwrite = 0; + zone->zone_io_phyread_ops = 0; + zone->zone_io_phywrite_ops = 0; + zone->zone_io_logread_ops = 0; + zone->zone_io_logwrite_ops = 0; + zone->zone_io_phyread_bytes = 0; + zone->zone_io_phywrite_bytes = 0; + zone->zone_io_logread_bytes = 0; + zone->zone_io_logwrite_bytes = 0; /* * Zsched initializes the rctls. diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 396ced5a00..ce48aca6ee 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -378,12 +378,17 @@ typedef struct zone_kstat { /* * structure for zone performance kstats */ -typedef struct zone_perf_kstat { +typedef struct zone_io_kstat { kstat_named_t zk_zonename; - kstat_named_t zk_read_iops; - kstat_named_t zk_write_iops; - kstat_named_t zk_lwrite_iops; -} zone_perf_kstat_t; + kstat_named_t zk_phyread_ops; + kstat_named_t zk_logread_ops; + kstat_named_t zk_phywrite_ops; + kstat_named_t zk_logwrite_ops; + kstat_named_t zk_phyread_bytes; + kstat_named_t zk_logread_bytes; + kstat_named_t zk_phywrite_bytes; + kstat_named_t zk_logwrite_bytes; +} zone_io_kstat_t; struct cpucap; @@ -533,20 +538,29 @@ typedef struct zone { list_t zone_dl_list; netstack_t *zone_netstack; struct cpucap *zone_cpucap; /* CPU caps data */ + /* - * Data and kstats used for zfs storage fair-share IO. + * Data and counters used for fair-share disk IO. */ rctl_qty_t zone_zfs_io_share; /* ZFS IO share */ - uint64_t zone_iops_read; /* kstat ZFS read IOPS */ - uint64_t zone_iops_write; /* kstat ZFS write IOPS */ - uint64_t zone_iops_lwrite; /* kstat logical write IOPS */ uint64_t zone_io_util; /* IO utilization metric */ uint16_t zone_io_delay; /* IO delay on writes */ - kstat_t *zone_perf_kstat; kmutex_t zone_stg_io_lock; /* protects IO window data */ sys_zio_cntr_t rd_ops; /* Counters for ZFS reads, */ sys_zio_cntr_t wr_ops; /* writes and logical writes. */ sys_zio_cntr_t lwr_ops; + /* + * kstats and counters for IO ops and bytes. + */ + kstat_t *zone_io_kstat; + uint64_t zone_io_phyread_ops; /* ZFS physical read ops */ + uint64_t zone_io_logread_ops; /* ZFS logical read ops */ + uint64_t zone_io_phywrite_ops; /* ZFS physical write ops */ + uint64_t zone_io_logwrite_ops; /* ZFS logical write ops */ + uint64_t zone_io_phyread_bytes; /* ZFS physical read bytes */ + uint64_t zone_io_logread_bytes; /* ZFS logical read bytes */ + uint64_t zone_io_phywrite_bytes; /* ZFS physical write bytes */ + uint64_t zone_io_logwrite_bytes; /* ZFS logical write bytes */ /* * Solaris Auditing per-zone audit context |