diff options
author | Neil Perrin <Neil.Perrin@Sun.COM> | 2009-08-14 11:18:12 -0600 |
---|---|---|
committer | Neil Perrin <Neil.Perrin@Sun.COM> | 2009-08-14 11:18:12 -0600 |
commit | e09fa4dacfb671e707d50a55ae9b5cc191e1b8cb (patch) | |
tree | c030579dcc87f0eab65e5e5152a6ca3ac6405dd4 | |
parent | 63f531d1cf94e7ff3e74e15ca709808d96e239f3 (diff) | |
download | illumos-gate-e09fa4dacfb671e707d50a55ae9b5cc191e1b8cb.tar.gz |
PSARC 2009/423 ZFS logbias property
6832481 ZFS separate intent log bypass property
-rw-r--r-- | usr/src/common/zfs/zfs_prop.c | 9 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/dmu_objset.c | 23 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/dmu.h | 1 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/dmu_objset.h | 1 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/zil.h | 2 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/zil_impl.h | 1 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/zio.h | 2 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zfs_log.c | 9 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zil.c | 12 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zio.c | 9 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zvol.c | 9 | ||||
-rw-r--r-- | usr/src/uts/common/sys/fs/zfs.h | 6 |
12 files changed, 73 insertions, 11 deletions
diff --git a/usr/src/common/zfs/zfs_prop.c b/usr/src/common/zfs/zfs_prop.c index 6a3284609c..86a399bd59 100644 --- a/usr/src/common/zfs/zfs_prop.c +++ b/usr/src/common/zfs/zfs_prop.c @@ -152,6 +152,12 @@ zfs_prop_init(void) { NULL } }; + static zprop_index_t logbias_table[] = { + { "latency", ZFS_LOGBIAS_LATENCY }, + { "throughput", ZFS_LOGBIAS_THROUGHPUT }, + { NULL } + }; + static zprop_index_t canmount_table[] = { { "off", ZFS_CANMOUNT_OFF }, { "on", ZFS_CANMOUNT_ON }, @@ -196,6 +202,9 @@ zfs_prop_init(void) ZFS_CACHE_ALL, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, "all | none | metadata", "SECONDARYCACHE", cache_table); + register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY, + PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + "latency | throughput", "LOGBIAS", logbias_table); /* inherit index (boolean) properties */ register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT, diff --git a/usr/src/uts/common/fs/zfs/dmu_objset.c b/usr/src/uts/common/fs/zfs/dmu_objset.c index 6431fd76ad..7bfe8fe874 100644 --- a/usr/src/uts/common/fs/zfs/dmu_objset.c +++ b/usr/src/uts/common/fs/zfs/dmu_objset.c @@ -91,6 +91,12 @@ dmu_objset_id(objset_t *os) return (ds ? ds->ds_object : 0); } +uint64_t +dmu_objset_logbias(objset_t *os) +{ + return (os->os_logbias); +} + static void checksum_changed_cb(void *arg, uint64_t newval) { @@ -159,6 +165,18 @@ secondary_cache_changed_cb(void *arg, uint64_t newval) os->os_secondary_cache = newval; } +static void +logbias_changed_cb(void *arg, uint64_t newval) +{ + objset_t *os = arg; + + ASSERT(newval == ZFS_LOGBIAS_LATENCY || + newval == ZFS_LOGBIAS_THROUGHPUT); + os->os_logbias = newval; + if (os->os_zil) + zil_set_logbias(os->os_zil, newval); +} + void dmu_objset_byteswap(void *buf, size_t size) { @@ -262,6 +280,9 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, if (err == 0) err = dsl_prop_register(ds, "copies", copies_changed_cb, os); + if (err == 0) + err = dsl_prop_register(ds, "logbias", + logbias_changed_cb, os); } if (err) { VERIFY(arc_buf_remove_ref(os->os_phys_buf, @@ -447,6 +468,8 @@ dmu_objset_evict(objset_t *os) compression_changed_cb, os)); VERIFY(0 == dsl_prop_unregister(ds, "copies", copies_changed_cb, os)); + VERIFY(0 == dsl_prop_unregister(ds, "logbias", + logbias_changed_cb, os)); } VERIFY(0 == dsl_prop_unregister(ds, "primarycache", primary_cache_changed_cb, os)); diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h index ac22e0e322..4e40985f67 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu.h @@ -566,6 +566,7 @@ extern struct dsl_dataset *dmu_objset_ds(objset_t *os); extern void dmu_objset_name(objset_t *os, char *buf); extern dmu_objset_type_t dmu_objset_type(objset_t *os); extern uint64_t dmu_objset_id(objset_t *os); +extern uint64_t dmu_objset_logbias(objset_t *os); extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name, uint64_t *id, uint64_t *offp, boolean_t *case_conflict); extern int dmu_snapshot_realname(objset_t *os, char *name, char *real, diff --git a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h index a1509e0b26..5c60ab1626 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu_objset.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu_objset.h @@ -72,6 +72,7 @@ struct objset { uint8_t os_copies; /* can change, under dsl_dir's locks */ uint8_t os_primary_cache; /* can change, under dsl_dir's locks */ uint8_t os_secondary_cache; /* can change, under dsl_dir's locks */ + uint8_t os_logbias; /* can change, under dsl_dir's locks */ /* no lock needed: */ struct dmu_tx *os_synctx; /* XXX sketchy */ diff --git a/usr/src/uts/common/fs/zfs/sys/zil.h b/usr/src/uts/common/fs/zfs/sys/zil.h index 2aff8cd68c..acec5da2d9 100644 --- a/usr/src/uts/common/fs/zfs/sys/zil.h +++ b/usr/src/uts/common/fs/zfs/sys/zil.h @@ -397,6 +397,8 @@ extern void zil_resume(zilog_t *zilog); extern void zil_add_block(zilog_t *zilog, blkptr_t *bp); +extern void zil_set_logbias(zilog_t *zilog, uint64_t slogval); + extern int zil_disable; #ifdef __cplusplus diff --git a/usr/src/uts/common/fs/zfs/sys/zil_impl.h b/usr/src/uts/common/fs/zfs/sys/zil_impl.h index 685305fb56..32daeaf8b7 100644 --- a/usr/src/uts/common/fs/zfs/sys/zil_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/zil_impl.h @@ -83,6 +83,7 @@ struct zilog { uint8_t zl_stop_sync; /* for debugging */ uint8_t zl_writer; /* boolean: write setup in progress */ uint8_t zl_log_error; /* boolean: log write error */ + uint8_t zl_logbias; /* latency or throughput */ list_t zl_itx_list; /* in-memory itx list */ uint64_t zl_itx_list_sz; /* total size of records on list */ uint64_t zl_cur_used; /* current commit log size used */ diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h index e47d8f468c..a85a1cdfcb 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio.h +++ b/usr/src/uts/common/fs/zfs/sys/zio.h @@ -383,7 +383,7 @@ extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, boolean_t labels); extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp, - blkptr_t *old_bp, uint64_t txg); + blkptr_t *old_bp, uint64_t txg, boolean_t bypass_slog); extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg); extern void zio_flush(zio_t *zio, vdev_t *vd); diff --git a/usr/src/uts/common/fs/zfs/zfs_log.c b/usr/src/uts/common/fs/zfs/zfs_log.c index 3f0b6b0ed3..4df7115f58 100644 --- a/usr/src/uts/common/fs/zfs/zfs_log.c +++ b/usr/src/uts/common/fs/zfs/zfs_log.c @@ -474,14 +474,19 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, itx_wr_state_t write_state; boolean_t slogging; uintptr_t fsync_cnt; + ssize_t immediate_write_sz; if (zilog == NULL || zp->z_unlinked) return; ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */ - slogging = spa_has_slogs(zilog->zl_spa); - if (resid > zfs_immediate_write_sz && !slogging && resid <= zp->z_blksz) + immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) + ? 0 : zfs_immediate_write_sz; + + slogging = spa_has_slogs(zilog->zl_spa) && + (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); + if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz) write_state = WR_INDIRECT; else if (ioflag & (FSYNC | FDSYNC)) write_state = WR_COPIED; diff --git a/usr/src/uts/common/fs/zfs/zil.c b/usr/src/uts/common/fs/zfs/zil.c index 78060bc8c9..1845715a85 100644 --- a/usr/src/uts/common/fs/zfs/zil.c +++ b/usr/src/uts/common/fs/zfs/zil.c @@ -367,7 +367,7 @@ zil_create(zilog_t *zilog) } error = zio_alloc_blk(zilog->zl_spa, ZIL_MIN_BLKSZ, &blk, - NULL, txg); + NULL, txg, zilog->zl_logbias != ZFS_LOGBIAS_LATENCY); if (error == 0) zil_init_log_chain(zilog, &blk); @@ -791,7 +791,8 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb) BP_ZERO(bp); /* pass the old blkptr in order to spread log blocks across devs */ - error = zio_alloc_blk(spa, zil_blksz, bp, &lwb->lwb_blk, txg); + error = zio_alloc_blk(spa, zil_blksz, bp, &lwb->lwb_blk, txg, + zilog->zl_logbias != ZFS_LOGBIAS_LATENCY); if (error) { dmu_tx_t *tx = dmu_tx_create_assigned(zilog->zl_dmu_pool, txg); @@ -1280,6 +1281,12 @@ zil_fini(void) kmem_cache_destroy(zil_lwb_cache); } +void +zil_set_logbias(zilog_t *zilog, uint64_t logbias) +{ + zilog->zl_logbias = logbias; +} + zilog_t * zil_alloc(objset_t *os, zil_header_t *zh_phys) { @@ -1292,6 +1299,7 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys) zilog->zl_spa = dmu_objset_spa(os); zilog->zl_dmu_pool = dmu_objset_pool(os); zilog->zl_destroy_txg = TXG_INITIAL - 1; + zilog->zl_logbias = dmu_objset_logbias(os); mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL); diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c index a2bdab9a7a..b94de2de5a 100644 --- a/usr/src/uts/common/fs/zfs/zio.c +++ b/usr/src/uts/common/fs/zfs/zio.c @@ -1716,12 +1716,13 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp) */ int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp, blkptr_t *old_bp, - uint64_t txg) + uint64_t txg, boolean_t bypass_slog) { - int error; + int error = 1; - error = metaslab_alloc(spa, spa->spa_log_class, size, - new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID); + if (!bypass_slog) + error = metaslab_alloc(spa, spa->spa_log_class, size, + new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID); if (error) error = metaslab_alloc(spa, spa->spa_normal_class, size, diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c index 678416dfa3..70d9d1b440 100644 --- a/usr/src/uts/common/fs/zfs/zvol.c +++ b/usr/src/uts/common/fs/zfs/zvol.c @@ -990,6 +990,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid, uint32_t blocksize = zv->zv_volblocksize; zilog_t *zilog = zv->zv_zilog; boolean_t slogging; + ssize_t immediate_write_sz; if (zil_disable) return; @@ -1001,7 +1002,11 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid, return; } - slogging = spa_has_slogs(zilog->zl_spa); + immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT) + ? 0 : zvol_immediate_write_sz; + + slogging = spa_has_slogs(zilog->zl_spa) && + (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY); while (resid) { itx_t *itx; @@ -1013,7 +1018,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t resid, * Unlike zfs_log_write() we can be called with * upto DMU_MAX_ACCESS/2 (5MB) writes. */ - if (blocksize > zvol_immediate_write_sz && !slogging && + if (blocksize > immediate_write_sz && !slogging && resid >= blocksize && off % blocksize == 0) { write_state = WR_INDIRECT; /* uses dmu_sync */ len = blocksize; diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h index 86b36a8ae9..65a82f9570 100644 --- a/usr/src/uts/common/sys/fs/zfs.h +++ b/usr/src/uts/common/sys/fs/zfs.h @@ -116,6 +116,7 @@ typedef enum { ZFS_PROP_STMF_SHAREINFO, /* not exposed to the user */ ZFS_PROP_DEFER_DESTROY, ZFS_PROP_USERREFS, + ZFS_PROP_LOGBIAS, ZFS_NUM_PROPS } zfs_prop_t; @@ -245,6 +246,11 @@ typedef enum { ZFS_CANMOUNT_NOAUTO = 2 } zfs_canmount_type_t; +typedef enum { + ZFS_LOGBIAS_LATENCY = 0, + ZFS_LOGBIAS_THROUGHPUT = 1 +} zfs_logbias_op_t; + typedef enum zfs_share_op { ZFS_SHARE_NFS = 0, ZFS_UNSHARE_NFS = 1, |