6472021 vdev knobs can not be turned

author: ahrens <none@none> 2006-11-03 11:39:28 -0800
committer: ahrens <none@none> 2006-11-03 11:39:28 -0800
commit: 614409b5be5411058e7e9b6cc93dddaff9fb13f7 (patch)
tree: 994d814287dee3e4d808d3f845b3f62e5c99acbc /usr/src
parent: ada9354b28215e27f2a3b25b9c352681c9cbdfa1 (diff)
download: illumos-gate-614409b5be5411058e7e9b6cc93dddaff9fb13f7.tar.gz
8 files changed, 156 insertions, 229 deletions
diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
index d3132acc61..57ee55f050 100644
--- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c
+++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c
@@ -329,6 +329,74 @@ zio_pipeline(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 
 /* ARGSUSED */
 static int
+zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+	/*
+	 * This table can be approximately generated by running:
+	 * egrep "^[a-z0-9_]+ [a-z0-9_]+( =.*)?;" *.c | cut -d ' ' -f 2
+	 */
+	static const char *params[] = {
+		"arc_reduce_dnlc_percent",
+		"zfs_arc_max",
+		"zfs_arc_min",
+		"arc_kmem_reclaim_shift",
+		"zfs_mdcomp_disable",
+		"zfs_prefetch_disable",
+		"zfetch_max_streams",
+		"zfetch_min_sec_reap",
+		"zfetch_block_cap",
+		"zfetch_array_rd_sz",
+		"zfs_default_bs",
+		"zfs_default_ibs",
+		"metaslab_aliquot",
+		"reference_tracking_enable",
+		"reference_history",
+		"zio_taskq_threads",
+		"spa_max_replication_override",
+		"spa_mode",
+		"zfs_flags",
+		"txg_time",
+		"zfs_vdev_cache_max",
+		"zfs_vdev_cache_size",
+		"zfs_vdev_cache_bshift",
+		"vdev_mirror_shift",
+		"zfs_vdev_max_pending",
+		"zfs_vdev_min_pending",
+		"zfs_scrub_limit",
+		"zfs_vdev_time_shift",
+		"zfs_vdev_ramp_rate",
+		"zfs_vdev_aggregation_limit",
+		"fzap_default_block_shift",
+		"zfs_immediate_write_sz",
+		"zfs_read_chunk_size",
+		"zil_disable",
+		"zfs_nocacheflush",
+		"zio_gang_bang",
+		"zio_injection_enabled",
+		"zvol_immediate_write_sz",
+	};
+	int i;
+
+	for (i = 0; i < sizeof (params) / sizeof (params[0]); i++) {
+		int sz;
+		uint64_t val64;
+		uint32_t *val32p = (uint32_t *)&val64;
+
+		sz = mdb_readvar(&val64, params[i]);
+		if (sz == 4) {
+			mdb_printf("%s = 0x%x\n", params[i], *val32p);
+		} else if (sz == 8) {
+			mdb_printf("%s = 0x%llx\n", params[i], val64);
+		} else {
+			mdb_warn("variable %s not found", params[i]);
+		}
+	}
+
+	return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
 blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 {
 	blkptr_t bp;
@@ -832,8 +900,8 @@ spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 void
 vdev_help(void)
 {
-	mdb_printf("[vdev_t*]::vdev [-qr]\n"
-		"\t-> -q display vdev_queue parameters\n"
+	mdb_printf("[vdev_t*]::vdev [-er]\n"
+		"\t-> -e display vdev stats\n"
 		"\t-> -r recursive (visit all children)\n");
 }
 
@@ -845,21 +913,12 @@ vdev_help(void)
  * ADDR             STATE	AUX            DESC
  * fffffffbcde23df0 HEALTHY	-              /dev/dsk/c0t0d0
  *
- * or with "-q" to print out a vdev_t's vdev_queue parameters:
- *
- *  vdev_t: c26ae4c0
- *     c26ae73c min pending         0x2
- *     c26ae744 max pending         0x23
- *     c26ae74c agg limit           0x20000
- *     c26ae754 time shift          0x4
- *     c26ae75c ramp rate           0x2
- *
  * If '-r' is specified, recursively visit all children.
  *
  * With '-e', the statistics associated with the vdev are printed as well.
  */
 static int
-do_print_vdev(uintptr_t addr, int flags, int depth, int queue, int stats,
+do_print_vdev(uintptr_t addr, int flags, int depth, int stats,
     int recursive)
 {
 	vdev_t vdev;
@@ -954,32 +1013,6 @@ do_print_vdev(uintptr_t addr, int flags, int depth, int queue, int stats,
 
 		mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc);
 
-		if (queue) {
-			mdb_inc_indent(4);
-			mdb_printf("\n");
-			mdb_printf("%p min pending		0x%llx\n",
-			    (uintptr_t)(addr + offsetof(vdev_t,
-			    vdev_queue.vq_min_pending)),
-			    vdev.vdev_queue.vq_min_pending);
-			mdb_printf("%p max pending		0x%llx\n",
-			    (uintptr_t)(addr + offsetof(vdev_t,
-			    vdev_queue.vq_max_pending)),
-			    vdev.vdev_queue.vq_max_pending);
-			mdb_printf("%p agg limit		0x%llx\n",
-			    (uintptr_t)(addr + offsetof(vdev_t,
-			    vdev_queue.vq_agg_limit)),
-			    vdev.vdev_queue.vq_agg_limit);
-			mdb_printf("%p time shift		0x%llx\n",
-			    (uintptr_t)(addr + offsetof(vdev_t,
-			    vdev_queue.vq_time_shift)),
-			    vdev.vdev_queue.vq_time_shift);
-			mdb_printf("%p ramp rate 		0x%llx\n",
-			    (uintptr_t)(addr + offsetof(vdev_t,
-			    vdev_queue.vq_ramp_rate)),
-			    vdev.vdev_queue.vq_ramp_rate);
-			mdb_dec_indent(4);
-		}
-
 		if (stats) {
 			vdev_stat_t *vs = &vdev.vdev_stat;
 			int i;
@@ -1008,7 +1041,7 @@ do_print_vdev(uintptr_t addr, int flags, int depth, int queue, int stats,
 			mdb_dec_indent(4);
 		}
 
-		if (queue || stats)
+		if (stats)
 			mdb_printf("\n");
 	}
 
@@ -1025,7 +1058,7 @@ do_print_vdev(uintptr_t addr, int flags, int depth, int queue, int stats,
 	}
 
 	for (c = 0; c < children; c++) {
-		if (do_print_vdev(child[c], flags, depth + 2, queue, stats,
+		if (do_print_vdev(child[c], flags, depth + 2, stats,
 		    recursive))
 			return (DCMD_ERR);
 	}
@@ -1036,12 +1069,10 @@ do_print_vdev(uintptr_t addr, int flags, int depth, int queue, int stats,
 static int
 vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 {
-	int print_queue = FALSE;
 	int recursive = FALSE;
 	int stats = FALSE;
 
 	if (mdb_getopts(argc, argv,
-	    'q', MDB_OPT_SETBITS, TRUE, &print_queue,
 	    'r', MDB_OPT_SETBITS, TRUE, &recursive,
 	    'e', MDB_OPT_SETBITS, TRUE, &stats,
 	    NULL) != argc)
@@ -1052,7 +1083,7 @@ vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 		return (DCMD_ERR);
 	}
 
-	return (do_print_vdev(addr, flags, 0, print_queue, stats, recursive));
+	return (do_print_vdev(addr, flags, 0, stats, recursive));
 }
 
 typedef struct metaslab_walk_data {
@@ -1546,8 +1577,9 @@ static const mdb_dcmd_t dcmds[] = {
 	{ "spa_verify", ":", "verify spa_t consistency", spa_verify },
 	{ "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space },
 	{ "spa_vdevs", ":", "given a spa_t, print vdev summary", spa_vdevs },
-	{ "vdev", ":[-qre]", "vdev_t summary", vdev_print },
+	{ "vdev", ":[-re]", "vdev_t summary", vdev_print },
 	{ "zio_pipeline", ":", "decode a zio pipeline", zio_pipeline },
+	{ "zfs_params", "", "print zfs tunable parameters", zfs_params },
 	{ NULL }
 };
 
diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c
index 1fa0a6b408..a834f95e12 100644
--- a/usr/src/cmd/zdb/zdb.c
+++ b/usr/src/cmd/zdb/zdb.c
@@ -2056,6 +2056,8 @@ out:
 int
 main(int argc, char **argv)
 {
+	extern int zfs_vdev_cache_size;
+
 	int i, c;
 	struct rlimit rl = { 1024, 1024 };
 	spa_t *spa;
@@ -2065,7 +2067,6 @@ main(int argc, char **argv)
 	int verbose = 0;
 	int error;
 	int flag, set;
-	vdev_knob_t *vk;
 
 	(void) setrlimit(RLIMIT_NOFILE, &rl);
 	(void) enable_extended_FILE_stdio(-1, -1);
@@ -2147,10 +2148,7 @@ main(int argc, char **argv)
 	 * Disable vdev caching.  If we don't do this, live pool traversal
 	 * won't make progress because it will never see disk updates.
 	 */
-	for (vk = vdev_knob_next(NULL); vk != NULL; vk = vdev_knob_next(vk)) {
-		if (strcmp(vk->vk_name, "cache_size") == 0)
-			vk->vk_default = 0;
-	}
+	zfs_vdev_cache_size = 0;
 
 	for (c = 0; c < 256; c++) {
 		if (dump_all && c != 'L' && c != 'l' && c != 'R')
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h
index a13c620421..ae8d157d1a 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev.h
@@ -41,18 +41,6 @@ extern "C" {
 extern boolean_t zfs_nocacheflush;
 
 /*
- * Vdev knobs.
- */
-typedef struct vdev_knob {
-	char		*vk_name;		/* knob name		*/
-	char		*vk_desc;		/* knob description	*/
-	uint64_t	vk_min;			/* minimum legal value	*/
-	uint64_t	vk_max;			/* maximum legal value	*/
-	uint64_t	vk_default;		/* default value	*/
-	size_t		vk_offset;		/* offset into vdev_t	*/
-} vdev_knob_t;
-
-/*
  * Fault injection modes.
  */
 #define	VDEV_FAULT_NONE		0
@@ -113,8 +101,6 @@ extern void vdev_queue_fini(vdev_t *vd);
 extern zio_t *vdev_queue_io(zio_t *zio);
 extern void vdev_queue_io_done(zio_t *zio);
 
-extern vdev_knob_t *vdev_knob_next(vdev_knob_t *vk);
-
 extern void vdev_config_dirty(vdev_t *vd);
 extern void vdev_config_clean(vdev_t *vd);
 extern int vdev_config_sync(vdev_t *vd, uint64_t txg);
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
index c41cf5402a..d136a8f527 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
@@ -91,22 +91,12 @@ struct vdev_cache_entry {
 };
 
 struct vdev_cache {
-	uint64_t	vc_size;
-	uint64_t	vc_bshift;
-	uint64_t	vc_blocksize;
-	uint64_t	vc_max;
 	avl_tree_t	vc_offset_tree;
 	avl_tree_t	vc_lastused_tree;
 	kmutex_t	vc_lock;
 };
 
 struct vdev_queue {
-	uint64_t	vq_min_pending;
-	uint64_t	vq_max_pending;
-	uint64_t	vq_scrub_limit;
-	uint64_t	vq_agg_limit;
-	uint64_t	vq_time_shift;
-	uint64_t	vq_ramp_rate;
 	uint64_t	vq_scrub_count;
 	avl_tree_t	vq_deadline_tree;
 	avl_tree_t	vq_read_tree;
diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c
index 00eff00202..007833e95e 100644
--- a/usr/src/uts/common/fs/zfs/vdev.c
+++ b/usr/src/uts/common/fs/zfs/vdev.c
@@ -45,15 +45,6 @@
  * Virtual device management.
  */
 
-/*
- * These tunables are for performance analysis, and override the
- * (not-easily-turnable) vdev "knobs".
- */
-int zfs_vdev_cache_max;
-int zfs_vdev_max_pending;
-int zfs_vdev_min_pending;
-int zfs_vdev_time_shift;
-
 static vdev_ops_t *vdev_ops_table[] = {
 	&vdev_root_ops,
 	&vdev_raidz_ops,
@@ -774,7 +765,6 @@ int
 vdev_open(vdev_t *vd)
 {
 	int error;
-	vdev_knob_t *vk;
 	int c;
 	uint64_t osize = 0;
 	uint64_t asize, psize;
@@ -791,23 +781,6 @@ vdev_open(vdev_t *vd)
 
 	vd->vdev_stat.vs_aux = VDEV_AUX_NONE;
 
-	for (vk = vdev_knob_next(NULL); vk != NULL; vk = vdev_knob_next(vk)) {
-		uint64_t *valp = (uint64_t *)((char *)vd + vk->vk_offset);
-
-		*valp = vk->vk_default;
-		*valp = MAX(*valp, vk->vk_min);
-		*valp = MIN(*valp, vk->vk_max);
-	}
-
-	if (zfs_vdev_cache_max)
-		vd->vdev_cache.vc_max = zfs_vdev_cache_max;
-	if (zfs_vdev_max_pending)
-		vd->vdev_queue.vq_max_pending = zfs_vdev_max_pending;
-	if (zfs_vdev_min_pending)
-		vd->vdev_queue.vq_min_pending = zfs_vdev_min_pending;
-	if (zfs_vdev_time_shift)
-		vd->vdev_queue.vq_time_shift = zfs_vdev_time_shift;
-
 	if (vd->vdev_ops->vdev_op_leaf) {
 		vdev_cache_init(vd);
 		vdev_queue_init(vd);
@@ -1748,96 +1721,6 @@ vdev_space_update(vdev_t *vd, int64_t space_delta, int64_t alloc_delta)
 }
 
 /*
- * Various knobs to tune a vdev.
- */
-static vdev_knob_t vdev_knob[] = {
-	{
-		"cache_size",
-		"size of the read-ahead cache",
-		0,
-		1ULL << 30,
-		10ULL << 20,
-		offsetof(struct vdev, vdev_cache.vc_size)
-	},
-	{
-		"cache_bshift",
-		"log2 of cache blocksize",
-		SPA_MINBLOCKSHIFT,
-		SPA_MAXBLOCKSHIFT,
-		16,
-		offsetof(struct vdev, vdev_cache.vc_bshift)
-	},
-	{
-		"cache_max",
-		"largest block size to cache",
-		0,
-		SPA_MAXBLOCKSIZE,
-		1ULL << 14,
-		offsetof(struct vdev, vdev_cache.vc_max)
-	},
-	{
-		"min_pending",
-		"minimum pending I/Os to the disk",
-		1,
-		10000,
-		4,
-		offsetof(struct vdev, vdev_queue.vq_min_pending)
-	},
-	{
-		"max_pending",
-		"maximum pending I/Os to the disk",
-		1,
-		10000,
-		35,
-		offsetof(struct vdev, vdev_queue.vq_max_pending)
-	},
-	{
-		"scrub_limit",
-		"maximum scrub/resilver I/O queue",
-		0,
-		10000,
-		70,
-		offsetof(struct vdev, vdev_queue.vq_scrub_limit)
-	},
-	{
-		"agg_limit",
-		"maximum size of aggregated I/Os",
-		0,
-		SPA_MAXBLOCKSIZE,
-		SPA_MAXBLOCKSIZE,
-		offsetof(struct vdev, vdev_queue.vq_agg_limit)
-	},
-	{
-		"time_shift",
-		"deadline = pri + (lbolt >> time_shift)",
-		0,
-		63,
-		6,
-		offsetof(struct vdev, vdev_queue.vq_time_shift)
-	},
-	{
-		"ramp_rate",
-		"exponential I/O issue ramp-up rate",
-		1,
-		10000,
-		2,
-		offsetof(struct vdev, vdev_queue.vq_ramp_rate)
-	},
-};
-
-vdev_knob_t *
-vdev_knob_next(vdev_knob_t *vk)
-{
-	if (vk == NULL)
-		return (vdev_knob);
-
-	if (++vk == vdev_knob + sizeof (vdev_knob) / sizeof (vdev_knob_t))
-		return (NULL);
-
-	return (vk);
-}
-
-/*
  * Mark a top-level vdev's config as dirty, placing it on the dirty list
  * so that it will be written out next time the vdev configuration is synced.
  * If the root vdev is specified (vdev_top == NULL), dirty all top-level vdevs.
diff --git a/usr/src/uts/common/fs/zfs/vdev_cache.c b/usr/src/uts/common/fs/zfs/vdev_cache.c
index 67a8924b52..2d8795c660 100644
--- a/usr/src/uts/common/fs/zfs/vdev_cache.c
+++ b/usr/src/uts/common/fs/zfs/vdev_cache.c
@@ -60,9 +60,24 @@
  * (4) Write.  Update cache contents after write completion.
  *
  * (5) Evict.  When allocating a new entry, we evict the oldest (LRU) entry
- *     if the total cache size exceeds vc_size.
+ *     if the total cache size exceeds zfs_vdev_cache_size.
  */
 
+/*
+ * These tunables are for performance analysis.
+ */
+/*
+ * All i/os smaller than zfs_vdev_cache_max will be turned into
+ * 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software
+ * track buffer.  At most zfs_vdev_cache_size bytes will be kept in each
+ * vdev's vdev_cache.
+ */
+int zfs_vdev_cache_max = 1<<14;
+int zfs_vdev_cache_size = 10ULL << 20;
+int zfs_vdev_cache_bshift = 16;
+
+#define	VCBS (1 << zfs_vdev_cache_bshift)
+
 static int
 vdev_cache_offset_compare(const void *a1, const void *a2)
 {
@@ -109,7 +124,7 @@ vdev_cache_evict(vdev_cache_t *vc, vdev_cache_entry_t *ve)
 
 	avl_remove(&vc->vc_lastused_tree, ve);
 	avl_remove(&vc->vc_offset_tree, ve);
-	zio_buf_free(ve->ve_data, vc->vc_blocksize);
+	zio_buf_free(ve->ve_data, VCBS);
 	kmem_free(ve, sizeof (vdev_cache_entry_t));
 }
 
@@ -122,20 +137,20 @@ static vdev_cache_entry_t *
 vdev_cache_allocate(zio_t *zio)
 {
 	vdev_cache_t *vc = &zio->io_vd->vdev_cache;
-	uint64_t offset = P2ALIGN(zio->io_offset, vc->vc_blocksize);
+	uint64_t offset = P2ALIGN(zio->io_offset, VCBS);
 	vdev_cache_entry_t *ve;
 
 	ASSERT(MUTEX_HELD(&vc->vc_lock));
 
-	if (vc->vc_size == 0)
+	if (zfs_vdev_cache_size == 0)
 		return (NULL);
 
 	/*
 	 * If adding a new entry would exceed the cache size,
 	 * evict the oldest entry (LRU).
 	 */
-	if ((avl_numnodes(&vc->vc_lastused_tree) << vc->vc_bshift) >
-	    vc->vc_size) {
+	if ((avl_numnodes(&vc->vc_lastused_tree) << zfs_vdev_cache_bshift) >
+	    zfs_vdev_cache_size) {
 		ve = avl_first(&vc->vc_lastused_tree);
 		if (ve->ve_fill_io != NULL) {
 			dprintf("can't evict in %p, still filling\n", vc);
@@ -148,7 +163,7 @@ vdev_cache_allocate(zio_t *zio)
 	ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_SLEEP);
 	ve->ve_offset = offset;
 	ve->ve_lastused = lbolt;
-	ve->ve_data = zio_buf_alloc(vc->vc_blocksize);
+	ve->ve_data = zio_buf_alloc(VCBS);
 
 	avl_add(&vc->vc_offset_tree, ve);
 	avl_add(&vc->vc_lastused_tree, ve);
@@ -159,7 +174,7 @@ vdev_cache_allocate(zio_t *zio)
 static void
 vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio)
 {
-	uint64_t cache_phase = P2PHASE(zio->io_offset, vc->vc_blocksize);
+	uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS);
 
 	ASSERT(MUTEX_HELD(&vc->vc_lock));
 	ASSERT(ve->ve_fill_io == NULL);
@@ -185,7 +200,7 @@ vdev_cache_fill(zio_t *zio)
 	vdev_cache_entry_t *ve = zio->io_private;
 	zio_t *dio;
 
-	ASSERT(zio->io_size == vc->vc_blocksize);
+	ASSERT(zio->io_size == VCBS);
 
 	/*
 	 * Add data to the cache.
@@ -227,8 +242,8 @@ vdev_cache_read(zio_t *zio)
 {
 	vdev_cache_t *vc = &zio->io_vd->vdev_cache;
 	vdev_cache_entry_t *ve, ve_search;
-	uint64_t cache_offset = P2ALIGN(zio->io_offset, vc->vc_blocksize);
-	uint64_t cache_phase = P2PHASE(zio->io_offset, vc->vc_blocksize);
+	uint64_t cache_offset = P2ALIGN(zio->io_offset, VCBS);
+	uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS);
 	zio_t *fio;
 
 	ASSERT(zio->io_type == ZIO_TYPE_READ);
@@ -236,17 +251,16 @@ vdev_cache_read(zio_t *zio)
 	if (zio->io_flags & ZIO_FLAG_DONT_CACHE)
 		return (EINVAL);
 
-	if (zio->io_size > vc->vc_max)
+	if (zio->io_size > zfs_vdev_cache_max)
 		return (EOVERFLOW);
 
 	/*
 	 * If the I/O straddles two or more cache blocks, don't cache it.
 	 */
-	if (P2CROSS(zio->io_offset, zio->io_offset + zio->io_size - 1,
-	    vc->vc_blocksize))
+	if (P2CROSS(zio->io_offset, zio->io_offset + zio->io_size - 1, VCBS))
 		return (EXDEV);
 
-	ASSERT(cache_phase + zio->io_size <= vc->vc_blocksize);
+	ASSERT(cache_phase + zio->io_size <= VCBS);
 
 	mutex_enter(&vc->vc_lock);
 
@@ -283,8 +297,7 @@ vdev_cache_read(zio_t *zio)
 	}
 
 	fio = zio_vdev_child_io(zio, NULL, zio->io_vd, cache_offset,
-	    ve->ve_data, vc->vc_blocksize, ZIO_TYPE_READ,
-	    ZIO_PRIORITY_CACHE_FILL,
+	    ve->ve_data, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_CACHE_FILL,
 	    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_PROPAGATE |
 	    ZIO_FLAG_DONT_RETRY | ZIO_FLAG_NOBOOKMARK,
 	    vdev_cache_fill, ve);
@@ -309,8 +322,8 @@ vdev_cache_write(zio_t *zio)
 	vdev_cache_entry_t *ve, ve_search;
 	uint64_t io_start = zio->io_offset;
 	uint64_t io_end = io_start + zio->io_size;
-	uint64_t min_offset = P2ALIGN(io_start, vc->vc_blocksize);
-	uint64_t max_offset = P2ROUNDUP(io_end, vc->vc_blocksize);
+	uint64_t min_offset = P2ALIGN(io_start, VCBS);
+	uint64_t max_offset = P2ROUNDUP(io_end, VCBS);
 	avl_index_t where;
 
 	ASSERT(zio->io_type == ZIO_TYPE_WRITE);
@@ -325,7 +338,7 @@ vdev_cache_write(zio_t *zio)
 
 	while (ve != NULL && ve->ve_offset < max_offset) {
 		uint64_t start = MAX(ve->ve_offset, io_start);
-		uint64_t end = MIN(ve->ve_offset + vc->vc_blocksize, io_end);
+		uint64_t end = MIN(ve->ve_offset + VCBS, io_end);
 
 		if (ve->ve_fill_io != NULL) {
 			ve->ve_missed_update = 1;
@@ -352,8 +365,6 @@ vdev_cache_init(vdev_t *vd)
 	avl_create(&vc->vc_lastused_tree, vdev_cache_lastused_compare,
 	    sizeof (vdev_cache_entry_t),
 	    offsetof(struct vdev_cache_entry, ve_lastused_node));
-
-	vc->vc_blocksize = 1ULL << vc->vc_bshift;
 }
 
 void
diff --git a/usr/src/uts/common/fs/zfs/vdev_file.c b/usr/src/uts/common/fs/zfs/vdev_file.c
index a82abf80b7..b8e79f8c0c 100644
--- a/usr/src/uts/common/fs/zfs/vdev_file.c
+++ b/usr/src/uts/common/fs/zfs/vdev_file.c
@@ -54,14 +54,6 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
 
 	vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
 
-#ifdef _KERNEL
-	/*
-	 * When using a file vdev in kernel context, the underlying filesystem
-	 * will already be caching the data.  Don't cache it again here.
-	 */
-	vd->vdev_cache.vc_size = 0;
-#endif
-
 	/*
 	 * We always open the files from the root of the global zone, even if
 	 * we're in a local zone.  If the user has gotten to this point, the
@@ -156,8 +148,14 @@ vdev_file_io_start(zio_t *zio)
 		return;
 	}
 
+	/*
+	 * In the kernel, don't bother double-caching, but in userland,
+	 * we want to test the vdev_cache code.
+	 */
+#ifndef _KERNEL
 	if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
 		return;
+#endif
 
 	if ((zio = vdev_queue_io(zio)) == NULL)
 		return;
@@ -186,8 +184,10 @@ vdev_file_io_done(zio_t *zio)
 {
 	vdev_queue_io_done(zio);
 
+#ifndef _KERNEL
 	if (zio->io_type == ZIO_TYPE_WRITE)
 		vdev_cache_write(zio);
+#endif
 
 	if (zio_injection_enabled && zio->io_error == 0)
 		zio->io_error = zio_handle_device_injection(zio->io_vd, EIO);
diff --git a/usr/src/uts/common/fs/zfs/vdev_queue.c b/usr/src/uts/common/fs/zfs/vdev_queue.c
index 631948bb1b..6b0b2a6f6c 100644
--- a/usr/src/uts/common/fs/zfs/vdev_queue.c
+++ b/usr/src/uts/common/fs/zfs/vdev_queue.c
@@ -32,6 +32,33 @@
 #include <sys/avl.h>
 
 /*
+ * These tunables are for performance analysis.
+ */
+/*
+ * zfs_vdev_max_pending is the maximum number of i/os concurrently
+ * pending to each device.  zfs_vdev_min_pending is the initial number
+ * of i/os pending to each device (before it starts ramping up to
+ * max_pending).
+ */
+int zfs_vdev_max_pending = 35;
+int zfs_vdev_min_pending = 4;
+
+/* maximum scrub/resilver I/O queue */
+int zfs_scrub_limit = 70;
+
+/* deadline = pri + (lbolt >> time_shift) */
+int zfs_vdev_time_shift = 6;
+
+/* exponential I/O issue ramp-up rate */
+int zfs_vdev_ramp_rate = 2;
+
+/*
+ * i/os will be aggregated into a single large i/o up to
+ * zfs_vdev_aggregation_limit bytes long.
+ */
+int zfs_vdev_aggregation_limit = SPA_MAXBLOCKSIZE;
+
+/*
  * Virtual device vector for disk I/O scheduling.
  */
 int
@@ -119,7 +146,7 @@ vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
 	avl_add(zio->io_vdev_tree, zio);
 
 	if ((zio->io_flags & ZIO_FLAG_SCRUB_THREAD) &&
-	    ++vq->vq_scrub_count >= vq->vq_scrub_limit)
+	    ++vq->vq_scrub_count >= zfs_scrub_limit)
 		spa_scrub_throttle(zio->io_spa, 1);
 }
 
@@ -127,7 +154,7 @@ static void
 vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
 {
 	if ((zio->io_flags & ZIO_FLAG_SCRUB_THREAD) &&
-	    vq->vq_scrub_count-- >= vq->vq_scrub_limit)
+	    vq->vq_scrub_count-- >= zfs_scrub_limit)
 		spa_scrub_throttle(zio->io_spa, -1);
 
 	avl_remove(&vq->vq_deadline_tree, zio);
@@ -182,14 +209,14 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit,
 	size = fio->io_size;
 
 	while ((dio = AVL_PREV(tree, fio)) != NULL && IS_ADJACENT(dio, fio) &&
-	    size + dio->io_size <= vq->vq_agg_limit) {
+	    size + dio->io_size <= zfs_vdev_aggregation_limit) {
 		dio->io_delegate_next = fio;
 		fio = dio;
 		size += dio->io_size;
 	}
 
 	while ((dio = AVL_NEXT(tree, lio)) != NULL && IS_ADJACENT(lio, dio) &&
-	    size + dio->io_size <= vq->vq_agg_limit) {
+	    size + dio->io_size <= zfs_vdev_aggregation_limit) {
 		lio->io_delegate_next = dio;
 		lio = dio;
 		size += dio->io_size;
@@ -200,7 +227,7 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit,
 		uint64_t offset = 0;
 		int nagg = 0;
 
-		ASSERT(size <= vq->vq_agg_limit);
+		ASSERT(size <= zfs_vdev_aggregation_limit);
 
 		aio = zio_vdev_child_io(fio, NULL, fio->io_vd,
 		    fio->io_offset, buf, size, fio->io_type,
@@ -266,12 +293,12 @@ vdev_queue_io(zio_t *zio)
 
 	mutex_enter(&vq->vq_lock);
 
-	zio->io_deadline = (zio->io_timestamp >> vq->vq_time_shift) +
+	zio->io_deadline = (zio->io_timestamp >> zfs_vdev_time_shift) +
 	    zio->io_priority;
 
 	vdev_queue_io_add(vq, zio);
 
-	nio = vdev_queue_io_to_issue(vq, vq->vq_min_pending, &func);
+	nio = vdev_queue_io_to_issue(vq, zfs_vdev_min_pending, &func);
 
 	mutex_exit(&vq->vq_lock);
 
@@ -294,8 +321,8 @@ vdev_queue_io_done(zio_t *zio)
 
 	avl_remove(&vq->vq_pending_tree, zio);
 
-	for (i = 0; i < vq->vq_ramp_rate; i++) {
-		nio = vdev_queue_io_to_issue(vq, vq->vq_max_pending, &func);
+	for (i = 0; i < zfs_vdev_ramp_rate; i++) {
+		nio = vdev_queue_io_to_issue(vq, zfs_vdev_max_pending, &func);
 		if (nio == NULL)
 			break;
 		mutex_exit(&vq->vq_lock);
author	ahrens <none@none>	2006-11-03 11:39:28 -0800
committer	ahrens <none@none>	2006-11-03 11:39:28 -0800
commit	614409b5be5411058e7e9b6cc93dddaff9fb13f7 (patch)
tree	994d814287dee3e4d808d3f845b3f62e5c99acbc /usr/src
parent	ada9354b28215e27f2a3b25b9c352681c9cbdfa1 (diff)
download	illumos-gate-614409b5be5411058e7e9b6cc93dddaff9fb13f7.tar.gz