summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNeil Perrin <Neil.Perrin@Sun.COM>2009-08-14 11:18:12 -0600
committerNeil Perrin <Neil.Perrin@Sun.COM>2009-08-14 11:18:12 -0600
commite09fa4dacfb671e707d50a55ae9b5cc191e1b8cb (patch)
treec030579dcc87f0eab65e5e5152a6ca3ac6405dd4
parent63f531d1cf94e7ff3e74e15ca709808d96e239f3 (diff)
downloadillumos-gate-e09fa4dacfb671e707d50a55ae9b5cc191e1b8cb.tar.gz
PSARC 2009/423 ZFS logbias property
6832481 ZFS separate intent log bypass property
-rw-r--r--usr/src/common/zfs/zfs_prop.c9
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_objset.c23
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dmu.h1
-rw-r--r--usr/src/uts/common/fs/zfs/sys/dmu_objset.h1
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zil.h2
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zil_impl.h1
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zio.h2
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_log.c9
-rw-r--r--usr/src/uts/common/fs/zfs/zil.c12
-rw-r--r--usr/src/uts/common/fs/zfs/zio.c9
-rw-r--r--usr/src/uts/common/fs/zfs/zvol.c9
-rw-r--r--usr/src/uts/common/sys/fs/zfs.h6
12 files changed, 73 insertions, 11 deletions
diff --git a/usr/src/common/zfs/zfs_prop.c b/usr/src/common/zfs/zfs_prop.c
index 6a3284609c..86a399bd59 100644
--- a/usr/src/common/zfs/zfs_prop.c
+++ b/usr/src/common/zfs/zfs_prop.c
@@ -152,6 +152,12 @@ zfs_prop_init(void)
{ NULL }
};
+ static zprop_index_t logbias_table[] = {
+ { "latency", ZFS_LOGBIAS_LATENCY },
+ { "throughput", ZFS_LOGBIAS_THROUGHPUT },
+ { NULL }
+ };
+
static zprop_index_t canmount_table[] = {
{ "off", ZFS_CANMOUNT_OFF },
{ "on", ZFS_CANMOUNT_ON },
@@ -196,6 +202,9 @@ zfs_prop_init(void)
ZFS_CACHE_ALL, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME,
"all | none | metadata", "SECONDARYCACHE", cache_table);
+ register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY,
+ PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "latency | throughput", "LOGBIAS", logbias_table);
/* inherit index (boolean) properties */
register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c
index 6431fd76ad..7bfe8fe874 100644
--- a/usr/src/uts/common/fs/zfs/dmu_objset.c
+++ b/usr/src/uts/common/fs/zfs/dmu_objset.c
@@ -91,6 +91,12 @@ dmu_objset_id(objset_t *os)
return (ds ? ds->ds_object : 0);
}
+uint64_t
+dmu_objset_logbias(objset_t *os)
+{
+ return (os->os_logbias);
+}
+
static void
checksum_changed_cb(void *arg, uint64_t newval)
{
@@ -159,6 +165,18 @@ secondary_cache_changed_cb(void *arg, uint64_t newval)
os->os_secondary_cache = newval;
}
+static void
+logbias_changed_cb(void *arg, uint64_t newval)
+{
+ objset_t *os = arg;
+
+ ASSERT(newval == ZFS_LOGBIAS_LATENCY ||
+ newval == ZFS_LOGBIAS_THROUGHPUT);
+ os->os_logbias = newval;
+ if (os->os_zil)
+ zil_set_logbias(os->os_zil, newval);
+}
+
void
dmu_objset_byteswap(void *buf, size_t size)
{
@@ -262,6 +280,9 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
if (err == 0)
err = dsl_prop_register(ds, "copies",
copies_changed_cb, os);
+ if (err == 0)
+ err = dsl_prop_register(ds, "logbias",
+ logbias_changed_cb, os);
}
if (err) {
VERIFY(arc_buf_remove_ref(os->os_phys_buf,
@@ -447,6 +468,8 @@ dmu_objset_evict(objset_t *os)
compression_changed_cb, os));
VERIFY(0 == dsl_prop_unregister(ds, "copies",
copies_changed_cb, os));
+ VERIFY(0 == dsl_prop_unregister(ds, "logbias",
+ logbias_changed_cb, os));
}
VERIFY(0 == dsl_prop_unregister(ds, "primarycache",
primary_cache_changed_cb, os));
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h
index ac22e0e322..4e40985f67 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu.h
@@ -566,6 +566,7 @@ extern struct dsl_dataset *dmu_objset_ds(objset_t *os);
extern void dmu_objset_name(objset_t *os, char *buf);
extern dmu_objset_type_t dmu_objset_type(objset_t *os);
extern uint64_t dmu_objset_id(objset_t *os);
+extern uint64_t dmu_objset_logbias(objset_t *os);
extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h
index a1509e0b26..5c60ab1626 100644
--- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h
+++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h
@@ -72,6 +72,7 @@ struct objset {
uint8_t os_copies; /* can change, under dsl_dir's locks */
uint8_t os_primary_cache; /* can change, under dsl_dir's locks */
uint8_t os_secondary_cache; /* can change, under dsl_dir's locks */
+ uint8_t os_logbias; /* can change, under dsl_dir's locks */
/* no lock needed: */
struct dmu_tx *os_synctx; /* XXX sketchy */
diff --git a/usr/src/uts/common/fs/zfs/sys/zil.h b/usr/src/uts/common/fs/zfs/sys/zil.h
index 2aff8cd68c..acec5da2d9 100644
--- a/usr/src/uts/common/fs/zfs/sys/zil.h
+++ b/usr/src/uts/common/fs/zfs/sys/zil.h
@@ -397,6 +397,8 @@ extern void zil_resume(zilog_t *zilog);
extern void zil_add_block(zilog_t *zilog, blkptr_t *bp);
+extern void zil_set_logbias(zilog_t *zilog, uint64_t slogval);
+
extern int zil_disable;
#ifdef __cplusplus
diff --git a/usr/src/uts/common/fs/zfs/sys/zil_impl.h b/usr/src/uts/common/fs/zfs/sys/zil_impl.h
index 685305fb56..32daeaf8b7 100644
--- a/usr/src/uts/common/fs/zfs/sys/zil_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/zil_impl.h
@@ -83,6 +83,7 @@ struct zilog {
uint8_t zl_stop_sync; /* for debugging */
uint8_t zl_writer; /* boolean: write setup in progress */
uint8_t zl_log_error; /* boolean: log write error */
+ uint8_t zl_logbias; /* latency or throughput */
list_t zl_itx_list; /* in-memory itx list */
uint64_t zl_itx_list_sz; /* total size of records on list */
uint64_t zl_cur_used; /* current commit log size used */
diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h
index e47d8f468c..a85a1cdfcb 100644
--- a/usr/src/uts/common/fs/zfs/sys/zio.h
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h
@@ -383,7 +383,7 @@ extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
boolean_t labels);
extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp,
- blkptr_t *old_bp, uint64_t txg);
+ blkptr_t *old_bp, uint64_t txg, boolean_t bypass_slog);
extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg);
extern void zio_flush(zio_t *zio, vdev_t *vd);
diff --git a/usr/src/uts/common/fs/zfs/zfs_log.c b/usr/src/uts/common/fs/zfs/zfs_log.c
index 3f0b6b0ed3..4df7115f58 100644
--- a/usr/src/uts/common/fs/zfs/zfs_log.c
+++ b/usr/src/uts/common/fs/zfs/zfs_log.c
@@ -474,14 +474,19 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
itx_wr_state_t write_state;
boolean_t slogging;
uintptr_t fsync_cnt;
+ ssize_t immediate_write_sz;
if (zilog == NULL || zp->z_unlinked)
return;
ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
- slogging = spa_has_slogs(zilog->zl_spa);
- if (resid > zfs_immediate_write_sz && !slogging && resid <= zp->z_blksz)
+ immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+ ? 0 : zfs_immediate_write_sz;
+
+ slogging = spa_has_slogs(zilog->zl_spa) &&
+ (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
+ if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz)
write_state = WR_INDIRECT;
else if (ioflag & (FSYNC | FDSYNC))
write_state = WR_COPIED;
diff --git a/usr/src/uts/common/fs/zfs/zil.c b/usr/src/uts/common/fs/zfs/zil.c
index 78060bc8c9..1845715a85 100644
--- a/usr/src/uts/common/fs/zfs/zil.c
+++ b/usr/src/uts/common/fs/zfs/zil.c
@@ -367,7 +367,7 @@ zil_create(zilog_t *zilog)
}
error = zio_alloc_blk(zilog->zl_spa, ZIL_MIN_BLKSZ, &blk,
- NULL, txg);
+ NULL, txg, zilog->zl_logbias != ZFS_LOGBIAS_LATENCY);
if (error == 0)
zil_init_log_chain(zilog, &blk);
@@ -791,7 +791,8 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
BP_ZERO(bp);
/* pass the old blkptr in order to spread log blocks across devs */
- error = zio_alloc_blk(spa, zil_blksz, bp, &lwb->lwb_blk, txg);
+ error = zio_alloc_blk(spa, zil_blksz, bp, &lwb->lwb_blk, txg,
+ zilog->zl_logbias != ZFS_LOGBIAS_LATENCY);
if (error) {
dmu_tx_t *tx = dmu_tx_create_assigned(zilog->zl_dmu_pool, txg);
@@ -1280,6 +1281,12 @@ zil_fini(void)
kmem_cache_destroy(zil_lwb_cache);
}
+void
+zil_set_logbias(zilog_t *zilog, uint64_t logbias)
+{
+ zilog->zl_logbias = logbias;
+}
+
zilog_t *
zil_alloc(objset_t *os, zil_header_t *zh_phys)
{
@@ -1292,6 +1299,7 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys)
zilog->zl_spa = dmu_objset_spa(os);
zilog->zl_dmu_pool = dmu_objset_pool(os);
zilog->zl_destroy_txg = TXG_INITIAL - 1;
+ zilog->zl_logbias = dmu_objset_logbias(os);
mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL);
diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c
index a2bdab9a7a..b94de2de5a 100644
--- a/usr/src/uts/common/fs/zfs/zio.c
+++ b/usr/src/uts/common/fs/zfs/zio.c
@@ -1716,12 +1716,13 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp)
*/
int
zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp, blkptr_t *old_bp,
- uint64_t txg)
+ uint64_t txg, boolean_t bypass_slog)
{
- int error;
+ int error = 1;
- error = metaslab_alloc(spa, spa->spa_log_class, size,
- new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID);
+ if (!bypass_slog)
+ error = metaslab_alloc(spa, spa->spa_log_class, size,
+ new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID);
if (error)
error = metaslab_alloc(spa, spa->spa_normal_class, size,
diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c
index 678416dfa3..70d9d1b440 100644
--- a/usr/src/uts/common/fs/zfs/zvol.c
+++ b/usr/src/uts/common/fs/zfs/zvol.c
@@ -990,6 +990,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
uint32_t blocksize = zv->zv_volblocksize;
zilog_t *zilog = zv->zv_zilog;
boolean_t slogging;
+ ssize_t immediate_write_sz;
if (zil_disable)
return;
@@ -1001,7 +1002,11 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
return;
}
- slogging = spa_has_slogs(zilog->zl_spa);
+ immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+ ? 0 : zvol_immediate_write_sz;
+
+ slogging = spa_has_slogs(zilog->zl_spa) &&
+ (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
while (resid) {
itx_t *itx;
@@ -1013,7 +1018,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid,
* Unlike zfs_log_write() we can be called with
* upto DMU_MAX_ACCESS/2 (5MB) writes.
*/
- if (blocksize > zvol_immediate_write_sz && !slogging &&
+ if (blocksize > immediate_write_sz && !slogging &&
resid >= blocksize && off % blocksize == 0) {
write_state = WR_INDIRECT; /* uses dmu_sync */
len = blocksize;
diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h
index 86b36a8ae9..65a82f9570 100644
--- a/usr/src/uts/common/sys/fs/zfs.h
+++ b/usr/src/uts/common/sys/fs/zfs.h
@@ -116,6 +116,7 @@ typedef enum {
ZFS_PROP_STMF_SHAREINFO, /* not exposed to the user */
ZFS_PROP_DEFER_DESTROY,
ZFS_PROP_USERREFS,
+ ZFS_PROP_LOGBIAS,
ZFS_NUM_PROPS
} zfs_prop_t;
@@ -245,6 +246,11 @@ typedef enum {
ZFS_CANMOUNT_NOAUTO = 2
} zfs_canmount_type_t;
+typedef enum {
+ ZFS_LOGBIAS_LATENCY = 0,
+ ZFS_LOGBIAS_THROUGHPUT = 1
+} zfs_logbias_op_t;
+
typedef enum zfs_share_op {
ZFS_SHARE_NFS = 0,
ZFS_UNSHARE_NFS = 1,