summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/fs/zfs/dmu.c6
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_tx.c2
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfs_zone.h2
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_zone.c43
-rw-r--r--usr/src/uts/common/os/zone.c80
-rw-r--r--usr/src/uts/common/sys/zone.h34
6 files changed, 114 insertions, 53 deletions
diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c
index ec4d00585e..089c331200 100644
--- a/usr/src/uts/common/fs/zfs/dmu.c
+++ b/usr/src/uts/common/fs/zfs/dmu.c
@@ -959,7 +959,11 @@ dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
int numbufs, i, err;
xuio_t *xuio = NULL;
- zfs_zone_io_throttle(ZFS_ZONE_IOP_READ);
+ /*
+ * XXX There's a bug here in that I think the reader zone could be
+ * throttled even if all the reads are coming from the ARC.
+ */
+ zfs_zone_io_throttle(ZFS_ZONE_IOP_READ, size);
/*
* NB: we could do this block-at-a-time, but it's nice
diff --git a/usr/src/uts/common/fs/zfs/dmu_tx.c b/usr/src/uts/common/fs/zfs/dmu_tx.c
index 3492fd5b12..75aa3cb9cb 100644
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c
@@ -220,7 +220,7 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
if (len == 0)
return;
- zfs_zone_io_throttle(ZFS_ZONE_IOP_LOGICAL_WRITE);
+ zfs_zone_io_throttle(ZFS_ZONE_IOP_LOGICAL_WRITE, len);
min_bs = SPA_MINBLOCKSHIFT;
max_bs = SPA_MAXBLOCKSHIFT;
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_zone.h b/usr/src/uts/common/fs/zfs/sys/zfs_zone.h
index b9f32ef710..5c8ae4bbe9 100644
--- a/usr/src/uts/common/fs/zfs/sys/zfs_zone.h
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_zone.h
@@ -42,7 +42,7 @@ typedef enum {
ZFS_ZONE_IOP_LOGICAL_WRITE,
} zfs_zone_iop_type_t;
-extern void zfs_zone_io_throttle(zfs_zone_iop_type_t);
+extern void zfs_zone_io_throttle(zfs_zone_iop_type_t, uint64_t);
extern void zfs_zone_zio_init(zio_t *);
extern void zfs_zone_zio_start(zio_t *);
diff --git a/usr/src/uts/common/fs/zfs/zfs_zone.c b/usr/src/uts/common/fs/zfs/zfs_zone.c
index d8501c8a8e..2086eb2d3a 100644
--- a/usr/src/uts/common/fs/zfs/zfs_zone.c
+++ b/usr/src/uts/common/fs/zfs/zfs_zone.c
@@ -33,7 +33,7 @@
*/
void
-zfs_zone_io_throttle(zfs_zone_iop_type_t type)
+zfs_zone_io_throttle(zfs_zone_iop_type_t type, uint64_t size)
{
}
@@ -561,8 +561,9 @@ zfs_zone_zio_init(zio_t *zp)
}
/*
- * Called from dmu_tx_count_write when a write op goes into a transaction
- * group (TXG). Increment our counter for logical zone write ops.
+ * Track IO operations per zone. Called from dmu_tx_count_write for write ops
+ * and dmu_read_uio for read ops. For each operation, increment that zone's
+ * counter based on the type of operation.
*
* There are three basic ways that we can see write ops:
* 1) An application does write syscalls. Those ops go into a TXG which
@@ -591,30 +592,39 @@ zfs_zone_zio_init(zio_t *zp)
* Without this, it can look like a non-global zone never writes (case 1).
* Depending on when the TXG is flushed, the counts may be in the same sample
* bucket or in a different one.
+ *
+ * Tracking read operations is simpler due to their synchronous semantics. The
+ * zfs_read function -- called as a result of a read(2) syscall -- will always
+ * retrieve the data to be read through dmu_read_uio.
*/
void
-zfs_zone_io_throttle(zfs_zone_iop_type_t type)
+zfs_zone_io_throttle(zfs_zone_iop_type_t type, uint64_t size)
{
hrtime_t now;
uint16_t wait;
zone_t *zonep = curzone;
- if (!zfs_zone_delay_enable)
- return;
-
now = GET_USEC_TIME;
/*
- * Only bump the kstat for logical writes here. The kstats tracking
- * reads and physical writes are bumped in zfs_zone_zio_done.
+ * Only bump the counters for logical operations here. The counters for
+ * tracking physical IO operations are handled in zfs_zone_zio_done.
*/
if (type == ZFS_ZONE_IOP_LOGICAL_WRITE) {
mutex_enter(&zonep->zone_stg_io_lock);
- zonep->zone_iops_lwrite++;
add_iop(zonep, now, type, 0);
mutex_exit(&zonep->zone_stg_io_lock);
+
+ atomic_add_64(&zonep->zone_io_logwrite_ops, 1);
+ atomic_add_64(&zonep->zone_io_logwrite_bytes, size);
+ } else {
+ atomic_add_64(&zonep->zone_io_logread_ops, 1);
+ atomic_add_64(&zonep->zone_io_logread_bytes, size);
}
+ if (!zfs_zone_delay_enable)
+ return;
+
/*
* XXX There's a potential race here in that more than one thread may
* update the zone delays concurrently. The worst outcome is corruption
@@ -680,17 +690,18 @@ zfs_zone_zio_done(zio_t *zp)
diff = now - zp->io_start;
mutex_enter(&zonep->zone_stg_io_lock);
+ add_iop(zonep, now, zp->io_type == ZIO_TYPE_READ ?
+ ZFS_ZONE_IOP_READ : ZFS_ZONE_IOP_WRITE, diff);
+ mutex_exit(&zonep->zone_stg_io_lock);
if (zp->io_type == ZIO_TYPE_READ) {
- zonep->zone_iops_read++;
- add_iop(zonep, now, ZFS_ZONE_IOP_READ, diff);
+ atomic_add_64(&zonep->zone_io_phyread_ops, 1);
+ atomic_add_64(&zonep->zone_io_phyread_bytes, zp->io_size);
} else {
- zonep->zone_iops_write++;
- add_iop(zonep, now, ZFS_ZONE_IOP_WRITE, diff);
+ atomic_add_64(&zonep->zone_io_phywrite_ops, 1);
+ atomic_add_64(&zonep->zone_io_phywrite_bytes, zp->io_size);
}
- mutex_exit(&zonep->zone_stg_io_lock);
-
zone_rele(zonep);
/*
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index a551c4e4bb..6f2730da74 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -1834,25 +1834,31 @@ zone_swapresv_kstat_update(kstat_t *ksp, int rw)
}
static int
-zone_perf_kstat_update(kstat_t *ksp, int rw)
+zone_io_kstat_update(kstat_t *ksp, int rw)
{
zone_t *zone = ksp->ks_private;
- zone_perf_kstat_t *zk = ksp->ks_data;
+ zone_io_kstat_t *zk = ksp->ks_data;
if (rw == KSTAT_WRITE)
return (EACCES);
- zk->zk_read_iops.value.ui64 = zone->zone_iops_read;
- zk->zk_write_iops.value.ui64 = zone->zone_iops_write;
- zk->zk_lwrite_iops.value.ui64 = zone->zone_iops_lwrite;
+ zk->zk_phyread_ops.value.ui64 = zone->zone_io_phyread_ops;
+ zk->zk_logread_ops.value.ui64 = zone->zone_io_logread_ops;
+ zk->zk_phywrite_ops.value.ui64 = zone->zone_io_phywrite_ops;
+ zk->zk_logwrite_ops.value.ui64 = zone->zone_io_logwrite_ops;
+ zk->zk_phyread_bytes.value.ui64 = zone->zone_io_phyread_bytes;
+ zk->zk_logread_bytes.value.ui64 = zone->zone_io_logread_bytes;
+ zk->zk_phywrite_bytes.value.ui64 = zone->zone_io_phywrite_bytes;
+ zk->zk_logwrite_bytes.value.ui64 = zone->zone_io_logwrite_bytes;
+
return (0);
}
static kstat_t *
-zone_perf_kstat_create(zone_t *zone, int (*updatefunc) (kstat_t *, int))
+zone_io_kstat_create(zone_t *zone, int (*updatefunc) (kstat_t *, int))
{
kstat_t *ksp;
- zone_perf_kstat_t *zk;
+ zone_io_kstat_t *zk;
char nm[KSTAT_STRLEN];
(void) snprintf(nm, KSTAT_STRLEN, "zone_%d", zone->zone_id);
@@ -1860,7 +1866,7 @@ zone_perf_kstat_create(zone_t *zone, int (*updatefunc) (kstat_t *, int))
/* module, instance, name, class, type, ndata, flags, zoneid */
ksp = kstat_create_zone("zones", zone->zone_id, nm, "zone_stats",
KSTAT_TYPE_NAMED,
- sizeof (zone_perf_kstat_t) / sizeof (kstat_named_t),
+ sizeof (zone_io_kstat_t) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL, zone->zone_id);
if (ksp == NULL)
@@ -1869,14 +1875,30 @@ zone_perf_kstat_create(zone_t *zone, int (*updatefunc) (kstat_t *, int))
if (zone->zone_id != GLOBAL_ZONEID)
kstat_zone_add(ksp, GLOBAL_ZONEID);
- zk = ksp->ks_data = kmem_alloc(sizeof (zone_perf_kstat_t), KM_SLEEP);
+ zk = ksp->ks_data = kmem_alloc(sizeof (zone_io_kstat_t), KM_SLEEP);
+
ksp->ks_data_size += strlen(zone->zone_name) + 1;
kstat_named_init(&zk->zk_zonename, "zonename", KSTAT_DATA_STRING);
kstat_named_setstr(&zk->zk_zonename, zone->zone_name);
- kstat_named_init(&zk->zk_read_iops, "read_iops", KSTAT_DATA_UINT64);
- kstat_named_init(&zk->zk_write_iops, "write_iops", KSTAT_DATA_UINT64);
- kstat_named_init(&zk->zk_lwrite_iops, "logical_write_iops",
+
+ kstat_named_init(&zk->zk_phyread_ops, "io_physical_read_ops",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&zk->zk_logread_ops, "io_logical_read_ops",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&zk->zk_phywrite_ops, "io_physical_write_ops",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&zk->zk_logwrite_ops, "io_logical_write_ops",
KSTAT_DATA_UINT64);
+
+ kstat_named_init(&zk->zk_phyread_bytes, "io_physical_read_bytes",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&zk->zk_logread_bytes, "io_logical_read_bytes",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&zk->zk_phywrite_bytes, "io_physical_write_bytes",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&zk->zk_logwrite_bytes, "io_logical_write_bytes",
+ KSTAT_DATA_UINT64);
+
ksp->ks_update = updatefunc;
ksp->ks_private = zone;
kstat_install(ksp);
@@ -1919,19 +1941,19 @@ zone_kstat_create(zone_t *zone)
zone->zone_nprocs_kstat = zone_rctl_kstat_create_common(zone,
"nprocs", zone_nprocs_kstat_update);
- zone->zone_perf_kstat = zone_perf_kstat_create(zone,
- zone_perf_kstat_update);
+ zone->zone_io_kstat = zone_io_kstat_create(zone,
+ zone_io_kstat_update);
}
static void
-zone_perf_kstat_delete(kstat_t **pkstat)
+zone_io_kstat_delete(kstat_t **pkstat)
{
void *data;
if (*pkstat != NULL) {
data = (*pkstat)->ks_data;
kstat_delete(*pkstat);
- kmem_free(data, sizeof (zone_perf_kstat_t));
+ kmem_free(data, sizeof (zone_io_kstat_t));
*pkstat = NULL;
}
}
@@ -1956,7 +1978,7 @@ zone_kstat_delete(zone_t *zone)
zone_kstat_delete_common(&zone->zone_swapresv_kstat);
zone_kstat_delete_common(&zone->zone_nprocs_kstat);
- zone_perf_kstat_delete(&zone->zone_perf_kstat);
+ zone_io_kstat_delete(&zone->zone_io_kstat);
}
/*
@@ -2015,10 +2037,15 @@ zone_zsd_init(void)
zone0.zone_swapresv_kstat = NULL;
zone0.zone_nprocs_kstat = NULL;
zone0.zone_zfs_io_share = 1;
- zone0.zone_iops_read = 0;
- zone0.zone_iops_write = 0;
- zone0.zone_iops_lwrite = 0;
- zone0.zone_perf_kstat = NULL;
+ zone0.zone_io_kstat = NULL;
+ zone0.zone_io_phyread_ops = 0;
+ zone0.zone_io_phywrite_ops = 0;
+ zone0.zone_io_logread_ops = 0;
+ zone0.zone_io_logwrite_ops = 0;
+ zone0.zone_io_phyread_bytes = 0;
+ zone0.zone_io_phywrite_bytes = 0;
+ zone0.zone_io_logread_bytes = 0;
+ zone0.zone_io_logwrite_bytes = 0;
list_create(&zone0.zone_ref_list, sizeof (zone_ref_t),
offsetof(zone_ref_t, zref_linkage));
list_create(&zone0.zone_zsd, sizeof (struct zsd_entry),
@@ -4295,9 +4322,14 @@ zone_create(const char *zone_name, const char *zone_root,
zone->zone_lockedmem_kstat = NULL;
zone->zone_swapresv_kstat = NULL;
zone->zone_zfs_io_share= 1;
- zone->zone_iops_read = 0;
- zone->zone_iops_write = 0;
- zone->zone_iops_lwrite = 0;
+ zone->zone_io_phyread_ops = 0;
+ zone->zone_io_phywrite_ops = 0;
+ zone->zone_io_logread_ops = 0;
+ zone->zone_io_logwrite_ops = 0;
+ zone->zone_io_phyread_bytes = 0;
+ zone->zone_io_phywrite_bytes = 0;
+ zone->zone_io_logread_bytes = 0;
+ zone->zone_io_logwrite_bytes = 0;
/*
* Zsched initializes the rctls.
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index 396ced5a00..ce48aca6ee 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -378,12 +378,17 @@ typedef struct zone_kstat {
/*
* structure for zone performance kstats
*/
-typedef struct zone_perf_kstat {
+typedef struct zone_io_kstat {
kstat_named_t zk_zonename;
- kstat_named_t zk_read_iops;
- kstat_named_t zk_write_iops;
- kstat_named_t zk_lwrite_iops;
-} zone_perf_kstat_t;
+ kstat_named_t zk_phyread_ops;
+ kstat_named_t zk_logread_ops;
+ kstat_named_t zk_phywrite_ops;
+ kstat_named_t zk_logwrite_ops;
+ kstat_named_t zk_phyread_bytes;
+ kstat_named_t zk_logread_bytes;
+ kstat_named_t zk_phywrite_bytes;
+ kstat_named_t zk_logwrite_bytes;
+} zone_io_kstat_t;
struct cpucap;
@@ -533,20 +538,29 @@ typedef struct zone {
list_t zone_dl_list;
netstack_t *zone_netstack;
struct cpucap *zone_cpucap; /* CPU caps data */
+
/*
- * Data and kstats used for zfs storage fair-share IO.
+ * Data and counters used for fair-share disk IO.
*/
rctl_qty_t zone_zfs_io_share; /* ZFS IO share */
- uint64_t zone_iops_read; /* kstat ZFS read IOPS */
- uint64_t zone_iops_write; /* kstat ZFS write IOPS */
- uint64_t zone_iops_lwrite; /* kstat logical write IOPS */
uint64_t zone_io_util; /* IO utilization metric */
uint16_t zone_io_delay; /* IO delay on writes */
- kstat_t *zone_perf_kstat;
kmutex_t zone_stg_io_lock; /* protects IO window data */
sys_zio_cntr_t rd_ops; /* Counters for ZFS reads, */
sys_zio_cntr_t wr_ops; /* writes and logical writes. */
sys_zio_cntr_t lwr_ops;
+ /*
+ * kstats and counters for IO ops and bytes.
+ */
+ kstat_t *zone_io_kstat;
+ uint64_t zone_io_phyread_ops; /* ZFS physical read ops */
+ uint64_t zone_io_logread_ops; /* ZFS logical read ops */
+ uint64_t zone_io_phywrite_ops; /* ZFS physical write ops */
+ uint64_t zone_io_logwrite_ops; /* ZFS logical write ops */
+ uint64_t zone_io_phyread_bytes; /* ZFS physical read bytes */
+ uint64_t zone_io_logread_bytes; /* ZFS logical read bytes */
+ uint64_t zone_io_phywrite_bytes; /* ZFS physical write bytes */
+ uint64_t zone_io_logwrite_bytes; /* ZFS logical write bytes */
/*
* Solaris Auditing per-zone audit context