diff options
author | Matthew Ahrens <mahrens@delphix.com> | 2014-10-07 10:10:05 -0800 |
---|---|---|
committer | Christopher Siden <chris@delphix.com> | 2014-10-07 11:10:05 -0700 |
commit | f8554bb92be6be88a684bea58293b41a6168a538 (patch) | |
tree | fd66f4aa5c79e3f4de8c6009090ae14696d2ac00 /usr/src | |
parent | 06be98028b8a84e7f327188613dd09a708294d1a (diff) | |
download | illumos-joyent-f8554bb92be6be88a684bea58293b41a6168a538.tar.gz |
5175 implement dmu_read_uio_dbuf() to improve cached read performance
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Alex Reece <alex.reece@delphix.com>
Reviewed by: George Wilson <george@delphix.com>
Reviewed by: Richard Elling <richard.elling@gmail.com>
Approved by: Robert Mustacchi <rm@joyent.com>
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/fs/zfs/dmu.c | 74 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/dmu.h | 1 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zfs_vnops.c | 14 | ||||
-rw-r--r-- | usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_zvol.c | 9 |
4 files changed, 81 insertions, 17 deletions
diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c index 4fdcb14750..13c189831f 100644 --- a/usr/src/uts/common/fs/zfs/dmu.c +++ b/usr/src/uts/common/fs/zfs/dmu.c @@ -1015,8 +1015,8 @@ xuio_stat_wbuf_nocopy() } #ifdef _KERNEL -int -dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) +static int +dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size) { dmu_buf_t **dbp; int numbufs, i, err; @@ -1026,8 +1026,8 @@ dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) * NB: we could do this block-at-a-time, but it's nice * to be reading in parallel. */ - err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, TRUE, FTAG, - &numbufs, &dbp); + err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size, + TRUE, FTAG, &numbufs, &dbp, 0); if (err) return (err); @@ -1072,6 +1072,58 @@ dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) return (err); } +/* + * Read 'size' bytes into the uio buffer. + * From object zdb->db_object. + * Starting at offset uio->uio_loffset. + * + * If the caller already has a dbuf in the target object + * (e.g. its bonus buffer), this routine is faster than dmu_read_uio(), + * because we don't have to find the dnode_t for the object. + */ +int +dmu_read_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)zdb; + dnode_t *dn; + int err; + + if (size == 0) + return (0); + + DB_DNODE_ENTER(db); + dn = DB_DNODE(db); + err = dmu_read_uio_dnode(dn, uio, size); + DB_DNODE_EXIT(db); + + return (err); +} + +/* + * Read 'size' bytes into the uio buffer. + * From the specified object + * Starting at offset uio->uio_loffset. + */ +int +dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) +{ + dnode_t *dn; + int err; + + if (size == 0) + return (0); + + err = dnode_hold(os, object, FTAG, &dn); + if (err) + return (err); + + err = dmu_read_uio_dnode(dn, uio, size); + + dnode_rele(dn, FTAG); + + return (err); +} + static int dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) { @@ -1124,6 +1176,15 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) return (err); } +/* + * Write 'size' bytes from the uio buffer. + * To object zdb->db_object. + * Starting at offset uio->uio_loffset. + * + * If the caller already has a dbuf in the target object + * (e.g. its bonus buffer), this routine is faster than dmu_write_uio(), + * because we don't have to find the dnode_t for the object. + */ int dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size, dmu_tx_t *tx) @@ -1143,6 +1204,11 @@ dmu_write_uio_dbuf(dmu_buf_t *zdb, uio_t *uio, uint64_t size, return (err); } +/* + * Write 'size' bytes from the uio buffer. + * To the specified object. + * Starting at offset uio->uio_loffset. + */ int dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size, dmu_tx_t *tx) diff --git a/usr/src/uts/common/fs/zfs/sys/dmu.h b/usr/src/uts/common/fs/zfs/sys/dmu.h index b73de06493..ed374d5f80 100644 --- a/usr/src/uts/common/fs/zfs/sys/dmu.h +++ b/usr/src/uts/common/fs/zfs/sys/dmu.h @@ -614,6 +614,7 @@ void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_tx_t *tx); int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size); +int dmu_read_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size); int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size, dmu_tx_t *tx); int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size, diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c index e87c002737..c608a79fd2 100644 --- a/usr/src/uts/common/fs/zfs/zfs_vnops.c +++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c @@ -406,7 +406,6 @@ static int mappedread(vnode_t *vp, int nbytes, uio_t *uio) { znode_t *zp = VTOZ(vp); - objset_t *os = zp->z_zfsvfs->z_os; int64_t start, off; int len = nbytes; int error = 0; @@ -425,7 +424,8 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio) zfs_unmap_page(pp, va); page_unlock(pp); } else { - error = dmu_read_uio(os, zp->z_id, uio, bytes); + error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), + uio, bytes); } len -= bytes; off = 0; @@ -460,7 +460,6 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; - objset_t *os; ssize_t n, nbytes; int error = 0; rl_t *rl; @@ -468,7 +467,6 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); - os = zfsvfs->z_os; if (zp->z_pflags & ZFS_AV_QUARANTINED) { ZFS_EXIT(zfsvfs); @@ -558,10 +556,12 @@ zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) nbytes = MIN(n, zfs_read_chunk_size - P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); - if (vn_has_cached_data(vp)) + if (vn_has_cached_data(vp)) { error = mappedread(vp, nbytes, uio); - else - error = dmu_read_uio(os, zp->z_id, uio, nbytes); + } else { + error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), + uio, nbytes); + } if (error) { /* convert checksum errors into IO errors */ if (error == ECKSUM) diff --git a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_zvol.c b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_zvol.c index 40b1aeeac3..0e96e2ec96 100644 --- a/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_zvol.c +++ b/usr/src/uts/common/io/comstar/lu/stmf_sbd/sbd_zvol.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014 by Delphix. All rights reserved. */ #include <sys/conf.h> @@ -402,7 +403,7 @@ sbd_zvol_copy_read(sbd_lu_t *sl, uio_t *uio) rl = zfs_range_lock(sl->sl_zvol_rl_hdl, offset, len, RL_READER); - error = dmu_read_uio(sl->sl_zvol_objset_hdl, ZVOL_OBJ, uio, len); + error = dmu_read_uio_dbuf(sl->sl_zvol_bonus_hdl, uio, len); zfs_range_unlock(rl); if (error == ECKSUM) @@ -441,11 +442,7 @@ sbd_zvol_copy_write(sbd_lu_t *sl, uio_t *uio, int flags) if (error) { dmu_tx_abort(tx); } else { - /* - * XXX use the new bonus handle entry. - */ - error = dmu_write_uio(sl->sl_zvol_objset_hdl, ZVOL_OBJ, - uio, len, tx); + error = dmu_write_uio_dbuf(sl->sl_zvol_bonus_hdl, uio, len, tx); if (error == 0) { zvol_log_write_minor(sl->sl_zvol_minor_hdl, tx, offset, (ssize_t)len, sync); |