summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorahrens <none@none>2006-11-03 11:39:28 -0800
committerahrens <none@none>2006-11-03 11:39:28 -0800
commit614409b5be5411058e7e9b6cc93dddaff9fb13f7 (patch)
tree994d814287dee3e4d808d3f845b3f62e5c99acbc /usr/src
parentada9354b28215e27f2a3b25b9c352681c9cbdfa1 (diff)
downloadillumos-gate-614409b5be5411058e7e9b6cc93dddaff9fb13f7.tar.gz
6472021 vdev knobs can not be turned
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/mdb/common/modules/zfs/zfs.c120
-rw-r--r--usr/src/cmd/zdb/zdb.c8
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev.h14
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_impl.h10
-rw-r--r--usr/src/uts/common/fs/zfs/vdev.c117
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_cache.c55
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_file.c16
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_queue.c45
8 files changed, 156 insertions, 229 deletions
diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
index d3132acc61..57ee55f050 100644
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
@@ -329,6 +329,74 @@ zio_pipeline(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
/* ARGSUSED */
static int
+zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ /*
+ * This table can be approximately generated by running:
+ * egrep "^[a-z0-9_]+ [a-z0-9_]+( =.*)?;" *.c | cut -d ' ' -f 2
+ */
+ static const char *params[] = {
+ "arc_reduce_dnlc_percent",
+ "zfs_arc_max",
+ "zfs_arc_min",
+ "arc_kmem_reclaim_shift",
+ "zfs_mdcomp_disable",
+ "zfs_prefetch_disable",
+ "zfetch_max_streams",
+ "zfetch_min_sec_reap",
+ "zfetch_block_cap",
+ "zfetch_array_rd_sz",
+ "zfs_default_bs",
+ "zfs_default_ibs",
+ "metaslab_aliquot",
+ "reference_tracking_enable",
+ "reference_history",
+ "zio_taskq_threads",
+ "spa_max_replication_override",
+ "spa_mode",
+ "zfs_flags",
+ "txg_time",
+ "zfs_vdev_cache_max",
+ "zfs_vdev_cache_size",
+ "zfs_vdev_cache_bshift",
+ "vdev_mirror_shift",
+ "zfs_vdev_max_pending",
+ "zfs_vdev_min_pending",
+ "zfs_scrub_limit",
+ "zfs_vdev_time_shift",
+ "zfs_vdev_ramp_rate",
+ "zfs_vdev_aggregation_limit",
+ "fzap_default_block_shift",
+ "zfs_immediate_write_sz",
+ "zfs_read_chunk_size",
+ "zil_disable",
+ "zfs_nocacheflush",
+ "zio_gang_bang",
+ "zio_injection_enabled",
+ "zvol_immediate_write_sz",
+ };
+ int i;
+
+ for (i = 0; i < sizeof (params) / sizeof (params[0]); i++) {
+ int sz;
+ uint64_t val64;
+ uint32_t *val32p = (uint32_t *)&val64;
+
+ sz = mdb_readvar(&val64, params[i]);
+ if (sz == 4) {
+ mdb_printf("%s = 0x%x\n", params[i], *val32p);
+ } else if (sz == 8) {
+ mdb_printf("%s = 0x%llx\n", params[i], val64);
+ } else {
+ mdb_warn("variable %s not found", params[i]);
+ }
+ }
+
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
blkptr_t bp;
@@ -832,8 +900,8 @@ spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
void
vdev_help(void)
{
- mdb_printf("[vdev_t*]::vdev [-qr]\n"
- "\t-> -q display vdev_queue parameters\n"
+ mdb_printf("[vdev_t*]::vdev [-er]\n"
+ "\t-> -e display vdev stats\n"
"\t-> -r recursive (visit all children)\n");
}
@@ -845,21 +913,12 @@ vdev_help(void)
* ADDR STATE AUX DESC
* fffffffbcde23df0 HEALTHY - /dev/dsk/c0t0d0
*
- * or with "-q" to print out a vdev_t's vdev_queue parameters:
- *
- * vdev_t: c26ae4c0
- * c26ae73c min pending 0x2
- * c26ae744 max pending 0x23
- * c26ae74c agg limit 0x20000
- * c26ae754 time shift 0x4
- * c26ae75c ramp rate 0x2
- *
* If '-r' is specified, recursively visit all children.
*
* With '-e', the statistics associated with the vdev are printed as well.
*/
static int
-do_print_vdev(uintptr_t addr, int flags, int depth, int queue, int stats,
+do_print_vdev(uintptr_t addr, int flags, int depth, int stats,
int recursive)
{
vdev_t vdev;
@@ -954,32 +1013,6 @@ do_print_vdev(uintptr_t addr, int flags, int depth, int queue, int stats,
mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc);
- if (queue) {
- mdb_inc_indent(4);
- mdb_printf("\n");
- mdb_printf("%p min pending 0x%llx\n",
- (uintptr_t)(addr + offsetof(vdev_t,
- vdev_queue.vq_min_pending)),
- vdev.vdev_queue.vq_min_pending);
- mdb_printf("%p max pending 0x%llx\n",
- (uintptr_t)(addr + offsetof(vdev_t,
- vdev_queue.vq_max_pending)),
- vdev.vdev_queue.vq_max_pending);
- mdb_printf("%p agg limit 0x%llx\n",
- (uintptr_t)(addr + offsetof(vdev_t,
- vdev_queue.vq_agg_limit)),
- vdev.vdev_queue.vq_agg_limit);
- mdb_printf("%p time shift 0x%llx\n",
- (uintptr_t)(addr + offsetof(vdev_t,
- vdev_queue.vq_time_shift)),
- vdev.vdev_queue.vq_time_shift);
- mdb_printf("%p ramp rate 0x%llx\n",
- (uintptr_t)(addr + offsetof(vdev_t,
- vdev_queue.vq_ramp_rate)),
- vdev.vdev_queue.vq_ramp_rate);
- mdb_dec_indent(4);
- }
-
if (stats) {
vdev_stat_t *vs = &vdev.vdev_stat;
int i;
@@ -1008,7 +1041,7 @@ do_print_vdev(uintptr_t addr, int flags, int depth, int queue, int stats,
mdb_dec_indent(4);
}
- if (queue || stats)
+ if (stats)
mdb_printf("\n");
}
@@ -1025,7 +1058,7 @@ do_print_vdev(uintptr_t addr, int flags, int depth, int queue, int stats,
}
for (c = 0; c < children; c++) {
- if (do_print_vdev(child[c], flags, depth + 2, queue, stats,
+ if (do_print_vdev(child[c], flags, depth + 2, stats,
recursive))
return (DCMD_ERR);
}
@@ -1036,12 +1069,10 @@ do_print_vdev(uintptr_t addr, int flags, int depth, int queue, int stats,
static int
vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
- int print_queue = FALSE;
int recursive = FALSE;
int stats = FALSE;
if (mdb_getopts(argc, argv,
- 'q', MDB_OPT_SETBITS, TRUE, &print_queue,
'r', MDB_OPT_SETBITS, TRUE, &recursive,
'e', MDB_OPT_SETBITS, TRUE, &stats,
NULL) != argc)
@@ -1052,7 +1083,7 @@ vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
return (DCMD_ERR);
}
- return (do_print_vdev(addr, flags, 0, print_queue, stats, recursive));
+ return (do_print_vdev(addr, flags, 0, stats, recursive));
}
typedef struct metaslab_walk_data {
@@ -1546,8 +1577,9 @@ static const mdb_dcmd_t dcmds[] = {
{ "spa_verify", ":", "verify spa_t consistency", spa_verify },
{ "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space },
{ "spa_vdevs", ":", "given a spa_t, print vdev summary", spa_vdevs },
- { "vdev", ":[-qre]", "vdev_t summary", vdev_print },
+ { "vdev", ":[-re]", "vdev_t summary", vdev_print },
{ "zio_pipeline", ":", "decode a zio pipeline", zio_pipeline },
+ { "zfs_params", "", "print zfs tunable parameters", zfs_params },
{ NULL }
};
diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c
index 1fa0a6b408..a834f95e12 100644
--- a/usr/src/cmd/zdb/zdb.c
+++ b/usr/src/cmd/zdb/zdb.c
@@ -2056,6 +2056,8 @@ out:
int
main(int argc, char **argv)
{
+ extern int zfs_vdev_cache_size;
+
int i, c;
struct rlimit rl = { 1024, 1024 };
spa_t *spa;
@@ -2065,7 +2067,6 @@ main(int argc, char **argv)
int verbose = 0;
int error;
int flag, set;
- vdev_knob_t *vk;
(void) setrlimit(RLIMIT_NOFILE, &rl);
(void) enable_extended_FILE_stdio(-1, -1);
@@ -2147,10 +2148,7 @@ main(int argc, char **argv)
* Disable vdev caching. If we don't do this, live pool traversal
* won't make progress because it will never see disk updates.
*/
- for (vk = vdev_knob_next(NULL); vk != NULL; vk = vdev_knob_next(vk)) {
- if (strcmp(vk->vk_name, "cache_size") == 0)
- vk->vk_default = 0;
- }
+ zfs_vdev_cache_size = 0;
for (c = 0; c < 256; c++) {
if (dump_all && c != 'L' && c != 'l' && c != 'R')
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h
index a13c620421..ae8d157d1a 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev.h
@@ -41,18 +41,6 @@ extern "C" {
extern boolean_t zfs_nocacheflush;
/*
- * Vdev knobs.
- */
-typedef struct vdev_knob {
- char *vk_name; /* knob name */
- char *vk_desc; /* knob description */
- uint64_t vk_min; /* minimum legal value */
- uint64_t vk_max; /* maximum legal value */
- uint64_t vk_default; /* default value */
- size_t vk_offset; /* offset into vdev_t */
-} vdev_knob_t;
-
-/*
* Fault injection modes.
*/
#define VDEV_FAULT_NONE 0
@@ -113,8 +101,6 @@ extern void vdev_queue_fini(vdev_t *vd);
extern zio_t *vdev_queue_io(zio_t *zio);
extern void vdev_queue_io_done(zio_t *zio);
-extern vdev_knob_t *vdev_knob_next(vdev_knob_t *vk);
-
extern void vdev_config_dirty(vdev_t *vd);
extern void vdev_config_clean(vdev_t *vd);
extern int vdev_config_sync(vdev_t *vd, uint64_t txg);
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
index c41cf5402a..d136a8f527 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
@@ -91,22 +91,12 @@ struct vdev_cache_entry {
};
struct vdev_cache {
- uint64_t vc_size;
- uint64_t vc_bshift;
- uint64_t vc_blocksize;
- uint64_t vc_max;
avl_tree_t vc_offset_tree;
avl_tree_t vc_lastused_tree;
kmutex_t vc_lock;
};
struct vdev_queue {
- uint64_t vq_min_pending;
- uint64_t vq_max_pending;
- uint64_t vq_scrub_limit;
- uint64_t vq_agg_limit;
- uint64_t vq_time_shift;
- uint64_t vq_ramp_rate;
uint64_t vq_scrub_count;
avl_tree_t vq_deadline_tree;
avl_tree_t vq_read_tree;
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index 00eff00202..007833e95e 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -45,15 +45,6 @@
* Virtual device management.
*/
-/*
- * These tunables are for performance analysis, and override the
- * (not-easily-turnable) vdev "knobs".
- */
-int zfs_vdev_cache_max;
-int zfs_vdev_max_pending;
-int zfs_vdev_min_pending;
-int zfs_vdev_time_shift;
-
static vdev_ops_t *vdev_ops_table[] = {
&vdev_root_ops,
&vdev_raidz_ops,
@@ -774,7 +765,6 @@ int
vdev_open(vdev_t *vd)
{
int error;
- vdev_knob_t *vk;
int c;
uint64_t osize = 0;
uint64_t asize, psize;
@@ -791,23 +781,6 @@ vdev_open(vdev_t *vd)
vd->vdev_stat.vs_aux = VDEV_AUX_NONE;
- for (vk = vdev_knob_next(NULL); vk != NULL; vk = vdev_knob_next(vk)) {
- uint64_t *valp = (uint64_t *)((char *)vd + vk->vk_offset);
-
- *valp = vk->vk_default;
- *valp = MAX(*valp, vk->vk_min);
- *valp = MIN(*valp, vk->vk_max);
- }
-
- if (zfs_vdev_cache_max)
- vd->vdev_cache.vc_max = zfs_vdev_cache_max;
- if (zfs_vdev_max_pending)
- vd->vdev_queue.vq_max_pending = zfs_vdev_max_pending;
- if (zfs_vdev_min_pending)
- vd->vdev_queue.vq_min_pending = zfs_vdev_min_pending;
- if (zfs_vdev_time_shift)
- vd->vdev_queue.vq_time_shift = zfs_vdev_time_shift;
-
if (vd->vdev_ops->vdev_op_leaf) {
vdev_cache_init(vd);
vdev_queue_init(vd);
@@ -1748,96 +1721,6 @@ vdev_space_update(vdev_t *vd, int64_t space_delta, int64_t alloc_delta)
}
/*
- * Various knobs to tune a vdev.
- */
-static vdev_knob_t vdev_knob[] = {
- {
- "cache_size",
- "size of the read-ahead cache",
- 0,
- 1ULL << 30,
- 10ULL << 20,
- offsetof(struct vdev, vdev_cache.vc_size)
- },
- {
- "cache_bshift",
- "log2 of cache blocksize",
- SPA_MINBLOCKSHIFT,
- SPA_MAXBLOCKSHIFT,
- 16,
- offsetof(struct vdev, vdev_cache.vc_bshift)
- },
- {
- "cache_max",
- "largest block size to cache",
- 0,
- SPA_MAXBLOCKSIZE,
- 1ULL << 14,
- offsetof(struct vdev, vdev_cache.vc_max)
- },
- {
- "min_pending",
- "minimum pending I/Os to the disk",
- 1,
- 10000,
- 4,
- offsetof(struct vdev, vdev_queue.vq_min_pending)
- },
- {
- "max_pending",
- "maximum pending I/Os to the disk",
- 1,
- 10000,
- 35,
- offsetof(struct vdev, vdev_queue.vq_max_pending)
- },
- {
- "scrub_limit",
- "maximum scrub/resilver I/O queue",
- 0,
- 10000,
- 70,
- offsetof(struct vdev, vdev_queue.vq_scrub_limit)
- },
- {
- "agg_limit",
- "maximum size of aggregated I/Os",
- 0,
- SPA_MAXBLOCKSIZE,
- SPA_MAXBLOCKSIZE,
- offsetof(struct vdev, vdev_queue.vq_agg_limit)
- },
- {
- "time_shift",
- "deadline = pri + (lbolt >> time_shift)",
- 0,
- 63,
- 6,
- offsetof(struct vdev, vdev_queue.vq_time_shift)
- },
- {
- "ramp_rate",
- "exponential I/O issue ramp-up rate",
- 1,
- 10000,
- 2,
- offsetof(struct vdev, vdev_queue.vq_ramp_rate)
- },
-};
-
-vdev_knob_t *
-vdev_knob_next(vdev_knob_t *vk)
-{
- if (vk == NULL)
- return (vdev_knob);
-
- if (++vk == vdev_knob + sizeof (vdev_knob) / sizeof (vdev_knob_t))
- return (NULL);
-
- return (vk);
-}
-
-/*
* Mark a top-level vdev's config as dirty, placing it on the dirty list
* so that it will be written out next time the vdev configuration is synced.
* If the root vdev is specified (vdev_top == NULL), dirty all top-level vdevs.
diff --git a/usr/src/uts/common/fs/zfs/vdev_cache.c b/usr/src/uts/common/fs/zfs/vdev_cache.c
index 67a8924b52..2d8795c660 100644
--- a/usr/src/uts/common/fs/zfs/vdev_cache.c
+++ b/usr/src/uts/common/fs/zfs/vdev_cache.c
@@ -60,9 +60,24 @@
* (4) Write. Update cache contents after write completion.
*
* (5) Evict. When allocating a new entry, we evict the oldest (LRU) entry
- * if the total cache size exceeds vc_size.
+ * if the total cache size exceeds zfs_vdev_cache_size.
*/
+/*
+ * These tunables are for performance analysis.
+ */
+/*
+ * All i/os smaller than zfs_vdev_cache_max will be turned into
+ * 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software
+ * track buffer. At most zfs_vdev_cache_size bytes will be kept in each
+ * vdev's vdev_cache.
+ */
+int zfs_vdev_cache_max = 1<<14;
+int zfs_vdev_cache_size = 10ULL << 20;
+int zfs_vdev_cache_bshift = 16;
+
+#define VCBS (1 << zfs_vdev_cache_bshift)
+
static int
vdev_cache_offset_compare(const void *a1, const void *a2)
{
@@ -109,7 +124,7 @@ vdev_cache_evict(vdev_cache_t *vc, vdev_cache_entry_t *ve)
avl_remove(&vc->vc_lastused_tree, ve);
avl_remove(&vc->vc_offset_tree, ve);
- zio_buf_free(ve->ve_data, vc->vc_blocksize);
+ zio_buf_free(ve->ve_data, VCBS);
kmem_free(ve, sizeof (vdev_cache_entry_t));
}
@@ -122,20 +137,20 @@ static vdev_cache_entry_t *
vdev_cache_allocate(zio_t *zio)
{
vdev_cache_t *vc = &zio->io_vd->vdev_cache;
- uint64_t offset = P2ALIGN(zio->io_offset, vc->vc_blocksize);
+ uint64_t offset = P2ALIGN(zio->io_offset, VCBS);
vdev_cache_entry_t *ve;
ASSERT(MUTEX_HELD(&vc->vc_lock));
- if (vc->vc_size == 0)
+ if (zfs_vdev_cache_size == 0)
return (NULL);
/*
* If adding a new entry would exceed the cache size,
* evict the oldest entry (LRU).
*/
- if ((avl_numnodes(&vc->vc_lastused_tree) << vc->vc_bshift) >
- vc->vc_size) {
+ if ((avl_numnodes(&vc->vc_lastused_tree) << zfs_vdev_cache_bshift) >
+ zfs_vdev_cache_size) {
ve = avl_first(&vc->vc_lastused_tree);
if (ve->ve_fill_io != NULL) {
dprintf("can't evict in %p, still filling\n", vc);
@@ -148,7 +163,7 @@ vdev_cache_allocate(zio_t *zio)
ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_SLEEP);
ve->ve_offset = offset;
ve->ve_lastused = lbolt;
- ve->ve_data = zio_buf_alloc(vc->vc_blocksize);
+ ve->ve_data = zio_buf_alloc(VCBS);
avl_add(&vc->vc_offset_tree, ve);
avl_add(&vc->vc_lastused_tree, ve);
@@ -159,7 +174,7 @@ vdev_cache_allocate(zio_t *zio)
static void
vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio)
{
- uint64_t cache_phase = P2PHASE(zio->io_offset, vc->vc_blocksize);
+ uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS);
ASSERT(MUTEX_HELD(&vc->vc_lock));
ASSERT(ve->ve_fill_io == NULL);
@@ -185,7 +200,7 @@ vdev_cache_fill(zio_t *zio)
vdev_cache_entry_t *ve = zio->io_private;
zio_t *dio;
- ASSERT(zio->io_size == vc->vc_blocksize);
+ ASSERT(zio->io_size == VCBS);
/*
* Add data to the cache.
@@ -227,8 +242,8 @@ vdev_cache_read(zio_t *zio)
{
vdev_cache_t *vc = &zio->io_vd->vdev_cache;
vdev_cache_entry_t *ve, ve_search;
- uint64_t cache_offset = P2ALIGN(zio->io_offset, vc->vc_blocksize);
- uint64_t cache_phase = P2PHASE(zio->io_offset, vc->vc_blocksize);
+ uint64_t cache_offset = P2ALIGN(zio->io_offset, VCBS);
+ uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS);
zio_t *fio;
ASSERT(zio->io_type == ZIO_TYPE_READ);
@@ -236,17 +251,16 @@ vdev_cache_read(zio_t *zio)
if (zio->io_flags & ZIO_FLAG_DONT_CACHE)
return (EINVAL);
- if (zio->io_size > vc->vc_max)
+ if (zio->io_size > zfs_vdev_cache_max)
return (EOVERFLOW);
/*
* If the I/O straddles two or more cache blocks, don't cache it.
*/
- if (P2CROSS(zio->io_offset, zio->io_offset + zio->io_size - 1,
- vc->vc_blocksize))
+ if (P2CROSS(zio->io_offset, zio->io_offset + zio->io_size - 1, VCBS))
return (EXDEV);
- ASSERT(cache_phase + zio->io_size <= vc->vc_blocksize);
+ ASSERT(cache_phase + zio->io_size <= VCBS);
mutex_enter(&vc->vc_lock);
@@ -283,8 +297,7 @@ vdev_cache_read(zio_t *zio)
}
fio = zio_vdev_child_io(zio, NULL, zio->io_vd, cache_offset,
- ve->ve_data, vc->vc_blocksize, ZIO_TYPE_READ,
- ZIO_PRIORITY_CACHE_FILL,
+ ve->ve_data, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_CACHE_FILL,
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_PROPAGATE |
ZIO_FLAG_DONT_RETRY | ZIO_FLAG_NOBOOKMARK,
vdev_cache_fill, ve);
@@ -309,8 +322,8 @@ vdev_cache_write(zio_t *zio)
vdev_cache_entry_t *ve, ve_search;
uint64_t io_start = zio->io_offset;
uint64_t io_end = io_start + zio->io_size;
- uint64_t min_offset = P2ALIGN(io_start, vc->vc_blocksize);
- uint64_t max_offset = P2ROUNDUP(io_end, vc->vc_blocksize);
+ uint64_t min_offset = P2ALIGN(io_start, VCBS);
+ uint64_t max_offset = P2ROUNDUP(io_end, VCBS);
avl_index_t where;
ASSERT(zio->io_type == ZIO_TYPE_WRITE);
@@ -325,7 +338,7 @@ vdev_cache_write(zio_t *zio)
while (ve != NULL && ve->ve_offset < max_offset) {
uint64_t start = MAX(ve->ve_offset, io_start);
- uint64_t end = MIN(ve->ve_offset + vc->vc_blocksize, io_end);
+ uint64_t end = MIN(ve->ve_offset + VCBS, io_end);
if (ve->ve_fill_io != NULL) {
ve->ve_missed_update = 1;
@@ -352,8 +365,6 @@ vdev_cache_init(vdev_t *vd)
avl_create(&vc->vc_lastused_tree, vdev_cache_lastused_compare,
sizeof (vdev_cache_entry_t),
offsetof(struct vdev_cache_entry, ve_lastused_node));
-
- vc->vc_blocksize = 1ULL << vc->vc_bshift;
}
void
diff --git a/usr/src/uts/common/fs/zfs/vdev_file.c b/usr/src/uts/common/fs/zfs/vdev_file.c
index a82abf80b7..b8e79f8c0c 100644
--- a/usr/src/uts/common/fs/zfs/vdev_file.c
+++ b/usr/src/uts/common/fs/zfs/vdev_file.c
@@ -54,14 +54,6 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
-#ifdef _KERNEL
- /*
- * When using a file vdev in kernel context, the underlying filesystem
- * will already be caching the data. Don't cache it again here.
- */
- vd->vdev_cache.vc_size = 0;
-#endif
-
/*
* We always open the files from the root of the global zone, even if
* we're in a local zone. If the user has gotten to this point, the
@@ -156,8 +148,14 @@ vdev_file_io_start(zio_t *zio)
return;
}
+ /*
+ * In the kernel, don't bother double-caching, but in userland,
+ * we want to test the vdev_cache code.
+ */
+#ifndef _KERNEL
if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
return;
+#endif
if ((zio = vdev_queue_io(zio)) == NULL)
return;
@@ -186,8 +184,10 @@ vdev_file_io_done(zio_t *zio)
{
vdev_queue_io_done(zio);
+#ifndef _KERNEL
if (zio->io_type == ZIO_TYPE_WRITE)
vdev_cache_write(zio);
+#endif
if (zio_injection_enabled && zio->io_error == 0)
zio->io_error = zio_handle_device_injection(zio->io_vd, EIO);
diff --git a/usr/src/uts/common/fs/zfs/vdev_queue.c b/usr/src/uts/common/fs/zfs/vdev_queue.c
index 631948bb1b..6b0b2a6f6c 100644
--- a/usr/src/uts/common/fs/zfs/vdev_queue.c
+++ b/usr/src/uts/common/fs/zfs/vdev_queue.c
@@ -32,6 +32,33 @@
#include <sys/avl.h>
/*
+ * These tunables are for performance analysis.
+ */
+/*
+ * zfs_vdev_max_pending is the maximum number of i/os concurrently
+ * pending to each device. zfs_vdev_min_pending is the initial number
+ * of i/os pending to each device (before it starts ramping up to
+ * max_pending).
+ */
+int zfs_vdev_max_pending = 35;
+int zfs_vdev_min_pending = 4;
+
+/* maximum scrub/resilver I/O queue */
+int zfs_scrub_limit = 70;
+
+/* deadline = pri + (lbolt >> time_shift) */
+int zfs_vdev_time_shift = 6;
+
+/* exponential I/O issue ramp-up rate */
+int zfs_vdev_ramp_rate = 2;
+
+/*
+ * i/os will be aggregated into a single large i/o up to
+ * zfs_vdev_aggregation_limit bytes long.
+ */
+int zfs_vdev_aggregation_limit = SPA_MAXBLOCKSIZE;
+
+/*
* Virtual device vector for disk I/O scheduling.
*/
int
@@ -119,7 +146,7 @@ vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
avl_add(zio->io_vdev_tree, zio);
if ((zio->io_flags & ZIO_FLAG_SCRUB_THREAD) &&
- ++vq->vq_scrub_count >= vq->vq_scrub_limit)
+ ++vq->vq_scrub_count >= zfs_scrub_limit)
spa_scrub_throttle(zio->io_spa, 1);
}
@@ -127,7 +154,7 @@ static void
vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
{
if ((zio->io_flags & ZIO_FLAG_SCRUB_THREAD) &&
- vq->vq_scrub_count-- >= vq->vq_scrub_limit)
+ vq->vq_scrub_count-- >= zfs_scrub_limit)
spa_scrub_throttle(zio->io_spa, -1);
avl_remove(&vq->vq_deadline_tree, zio);
@@ -182,14 +209,14 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit,
size = fio->io_size;
while ((dio = AVL_PREV(tree, fio)) != NULL && IS_ADJACENT(dio, fio) &&
- size + dio->io_size <= vq->vq_agg_limit) {
+ size + dio->io_size <= zfs_vdev_aggregation_limit) {
dio->io_delegate_next = fio;
fio = dio;
size += dio->io_size;
}
while ((dio = AVL_NEXT(tree, lio)) != NULL && IS_ADJACENT(lio, dio) &&
- size + dio->io_size <= vq->vq_agg_limit) {
+ size + dio->io_size <= zfs_vdev_aggregation_limit) {
lio->io_delegate_next = dio;
lio = dio;
size += dio->io_size;
@@ -200,7 +227,7 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit,
uint64_t offset = 0;
int nagg = 0;
- ASSERT(size <= vq->vq_agg_limit);
+ ASSERT(size <= zfs_vdev_aggregation_limit);
aio = zio_vdev_child_io(fio, NULL, fio->io_vd,
fio->io_offset, buf, size, fio->io_type,
@@ -266,12 +293,12 @@ vdev_queue_io(zio_t *zio)
mutex_enter(&vq->vq_lock);
- zio->io_deadline = (zio->io_timestamp >> vq->vq_time_shift) +
+ zio->io_deadline = (zio->io_timestamp >> zfs_vdev_time_shift) +
zio->io_priority;
vdev_queue_io_add(vq, zio);
- nio = vdev_queue_io_to_issue(vq, vq->vq_min_pending, &func);
+ nio = vdev_queue_io_to_issue(vq, zfs_vdev_min_pending, &func);
mutex_exit(&vq->vq_lock);
@@ -294,8 +321,8 @@ vdev_queue_io_done(zio_t *zio)
avl_remove(&vq->vq_pending_tree, zio);
- for (i = 0; i < vq->vq_ramp_rate; i++) {
- nio = vdev_queue_io_to_issue(vq, vq->vq_max_pending, &func);
+ for (i = 0; i < zfs_vdev_ramp_rate; i++) {
+ nio = vdev_queue_io_to_issue(vq, zfs_vdev_max_pending, &func);
if (nio == NULL)
break;
mutex_exit(&vq->vq_lock);