From 4ee0199ec059ca9417a72125beae8d89fbc5a9df Mon Sep 17 00:00:00 2001 From: Robert Mustacchi Date: Tue, 25 Apr 2017 22:06:29 +0000 Subject: backout: 8021 ARC buf data scatter-ization (causes 8100) --- usr/src/cmd/mdb/common/modules/zfs/zfs.c | 1 + usr/src/cmd/zdb/zdb.c | 47 +- usr/src/cmd/zdb/zdb_il.c | 48 +- usr/src/cmd/ztest/ztest.c | 18 +- usr/src/common/zfs/zfs_fletcher.c | 106 +-- usr/src/common/zfs/zfs_fletcher.h | 16 +- usr/src/lib/libzfs/common/libzfs_sendrecv.c | 15 +- usr/src/lib/libzpool/common/llib-lzpool | 2 - usr/src/pkg/manifests/system-test-zfstest.mf | 9 +- usr/src/test/zfs-tests/cmd/memory_balloon/Makefile | 22 - .../zfs-tests/cmd/memory_balloon/memory_balloon.c | 103 --- .../test/zfs-tests/runfiles/perf-regression.run | 8 +- .../zfs-tests/tests/functional/mdb/mdb_001_pos.ksh | 57 +- usr/src/test/zfs-tests/tests/perf/perf.shlib | 12 - .../tests/perf/regression/random_reads.ksh | 5 +- .../tests/perf/regression/random_readwrite.ksh | 5 +- .../tests/perf/regression/random_writes.ksh | 5 +- .../tests/perf/regression/sequential_reads.ksh | 7 +- .../regression/sequential_reads_arc_cached.ksh | 78 -- .../sequential_reads_arc_cached_clone.ksh | 94 --- .../perf/regression/sequential_reads_cached.ksh | 77 ++ .../regression/sequential_reads_cached_clone.ksh | 93 ++ .../regression/sequential_reads_dbuf_cached.ksh | 82 -- .../tests/perf/regression/sequential_writes.ksh | 5 +- .../test/zfs-tests/tests/perf/scripts/profile.d | 37 - usr/src/uts/common/Makefile.files | 1 - usr/src/uts/common/fs/zfs/abd.c | 940 --------------------- usr/src/uts/common/fs/zfs/arc.c | 379 ++++----- usr/src/uts/common/fs/zfs/blkptr.c | 2 +- usr/src/uts/common/fs/zfs/dbuf.c | 8 +- usr/src/uts/common/fs/zfs/ddt.c | 12 +- usr/src/uts/common/fs/zfs/dmu.c | 12 +- usr/src/uts/common/fs/zfs/dmu_send.c | 14 +- usr/src/uts/common/fs/zfs/dsl_scan.c | 12 +- usr/src/uts/common/fs/zfs/edonr_zfs.c | 24 +- usr/src/uts/common/fs/zfs/lz4.c | 3 - usr/src/uts/common/fs/zfs/sha256.c | 26 +- usr/src/uts/common/fs/zfs/skein_zfs.c | 28 +- usr/src/uts/common/fs/zfs/spa.c | 8 +- usr/src/uts/common/fs/zfs/sys/abd.h | 150 ---- usr/src/uts/common/fs/zfs/sys/ddt.h | 5 +- usr/src/uts/common/fs/zfs/sys/spa.h | 11 +- usr/src/uts/common/fs/zfs/sys/vdev_impl.h | 3 +- usr/src/uts/common/fs/zfs/sys/zio.h | 29 +- usr/src/uts/common/fs/zfs/sys/zio_checksum.h | 34 +- usr/src/uts/common/fs/zfs/sys/zio_compress.h | 25 +- usr/src/uts/common/fs/zfs/vdev.c | 11 +- usr/src/uts/common/fs/zfs/vdev_cache.c | 38 +- usr/src/uts/common/fs/zfs/vdev_disk.c | 17 +- usr/src/uts/common/fs/zfs/vdev_file.c | 17 +- usr/src/uts/common/fs/zfs/vdev_label.c | 79 +- usr/src/uts/common/fs/zfs/vdev_mirror.c | 15 +- usr/src/uts/common/fs/zfs/vdev_queue.c | 20 +- usr/src/uts/common/fs/zfs/vdev_raidz.c | 603 +++++-------- usr/src/uts/common/fs/zfs/zil.c | 6 +- usr/src/uts/common/fs/zfs/zio.c | 244 ++---- usr/src/uts/common/fs/zfs/zio_checksum.c | 108 +-- usr/src/uts/common/fs/zfs/zio_compress.c | 80 +- 58 files changed, 982 insertions(+), 2934 deletions(-) delete mode 100644 usr/src/test/zfs-tests/cmd/memory_balloon/Makefile delete mode 100644 usr/src/test/zfs-tests/cmd/memory_balloon/memory_balloon.c delete mode 100644 usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_arc_cached.ksh delete mode 100644 usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_arc_cached_clone.ksh create mode 100644 usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_cached.ksh create mode 100644 usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_cached_clone.ksh delete mode 100644 usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_dbuf_cached.ksh delete mode 100644 usr/src/test/zfs-tests/tests/perf/scripts/profile.d delete mode 100644 usr/src/uts/common/fs/zfs/abd.c delete mode 100644 usr/src/uts/common/fs/zfs/sys/abd.h diff --git a/usr/src/cmd/mdb/common/modules/zfs/zfs.c b/usr/src/cmd/mdb/common/modules/zfs/zfs.c index 10a2f5a4f7..0658d7c639 100644 --- a/usr/src/cmd/mdb/common/modules/zfs/zfs.c +++ b/usr/src/cmd/mdb/common/modules/zfs/zfs.c @@ -3553,6 +3553,7 @@ typedef struct mdb_arc_buf_hdr_t { struct { uint32_t b_bufcnt; uintptr_t b_state; + uintptr_t b_pdata; } b_l1hdr; } mdb_arc_buf_hdr_t; diff --git a/usr/src/cmd/zdb/zdb.c b/usr/src/cmd/zdb/zdb.c index 0137e6f448..9ddf5e1021 100644 --- a/usr/src/cmd/zdb/zdb.c +++ b/usr/src/cmd/zdb/zdb.c @@ -60,7 +60,6 @@ #include #include #include -#include #include #undef verify #include @@ -2538,7 +2537,7 @@ zdb_blkptr_done(zio_t *zio) zdb_cb_t *zcb = zio->io_private; zbookmark_phys_t *zb = &zio->io_bookmark; - abd_free(zio->io_abd); + zio_data_buf_free(zio->io_data, zio->io_size); mutex_enter(&spa->spa_scrub_lock); spa->spa_scrub_inflight--; @@ -2604,7 +2603,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, if (!BP_IS_EMBEDDED(bp) && (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) { size_t size = BP_GET_PSIZE(bp); - abd_t *abd = abd_alloc(size, B_FALSE); + void *data = zio_data_buf_alloc(size); int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW; /* If it's an intent log block, failure is expected. */ @@ -2617,7 +2616,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, spa->spa_scrub_inflight++; mutex_exit(&spa->spa_scrub_lock); - zio_nowait(zio_read(NULL, spa, bp, abd, size, + zio_nowait(zio_read(NULL, spa, bp, data, size, zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb)); } @@ -3398,13 +3397,6 @@ name: return (NULL); } -/* ARGSUSED */ -static int -random_get_pseudo_bytes_cb(void *buf, size_t len, void *unused) -{ - return (random_get_pseudo_bytes(buf, len)); -} - /* * Read a block from a pool and print it out. The syntax of the * block descriptor is: @@ -3436,8 +3428,7 @@ zdb_read_block(char *thing, spa_t *spa) uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0; zio_t *zio; vdev_t *vd; - abd_t *pabd; - void *lbuf, *buf; + void *pbuf, *lbuf, *buf; char *s, *p, *dup, *vdev, *flagstr; int i, error; @@ -3508,7 +3499,7 @@ zdb_read_block(char *thing, spa_t *spa) psize = size; lsize = size; - pabd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_FALSE); + pbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); BP_ZERO(bp); @@ -3536,15 +3527,15 @@ zdb_read_block(char *thing, spa_t *spa) /* * Treat this as a normal block read. */ - zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL, + zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL)); } else { /* * Treat this as a vdev child I/O. */ - zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd, - psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, + zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize, + ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL)); @@ -3567,21 +3558,21 @@ zdb_read_block(char *thing, spa_t *spa) void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); - abd_copy_to_buf(pbuf2, pabd, psize); + bcopy(pbuf, pbuf2, psize); - VERIFY0(abd_iterate_func(pabd, psize, SPA_MAXBLOCKSIZE - psize, - random_get_pseudo_bytes_cb, NULL)); + VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize, + SPA_MAXBLOCKSIZE - psize) == 0); - VERIFY0(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize, - SPA_MAXBLOCKSIZE - psize)); + VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize, + SPA_MAXBLOCKSIZE - psize) == 0); for (lsize = SPA_MAXBLOCKSIZE; lsize > psize; lsize -= SPA_MINBLOCKSIZE) { for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) { - if (zio_decompress_data(c, pabd, - lbuf, psize, lsize) == 0 && - zio_decompress_data_buf(c, pbuf2, - lbuf2, psize, lsize) == 0 && + if (zio_decompress_data(c, pbuf, lbuf, + psize, lsize) == 0 && + zio_decompress_data(c, pbuf2, lbuf2, + psize, lsize) == 0 && bcmp(lbuf, lbuf2, lsize) == 0) break; } @@ -3600,7 +3591,7 @@ zdb_read_block(char *thing, spa_t *spa) buf = lbuf; size = lsize; } else { - buf = abd_to_buf(pabd); + buf = pbuf; size = psize; } @@ -3618,7 +3609,7 @@ zdb_read_block(char *thing, spa_t *spa) zdb_dump_block(thing, buf, size, flags); out: - abd_free(pabd); + umem_free(pbuf, SPA_MAXBLOCKSIZE); umem_free(lbuf, SPA_MAXBLOCKSIZE); free(dup); } diff --git a/usr/src/cmd/zdb/zdb_il.c b/usr/src/cmd/zdb/zdb_il.c index bc02b1b670..583e422286 100644 --- a/usr/src/cmd/zdb/zdb_il.c +++ b/usr/src/cmd/zdb/zdb_il.c @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2013, 2016 by Delphix. All rights reserved. + * Copyright (c) 2013, 2014 by Delphix. All rights reserved. */ /* @@ -41,7 +41,6 @@ #include #include #include -#include extern uint8_t dump_opt[256]; @@ -117,28 +116,14 @@ zil_prt_rec_rename(zilog_t *zilog, int txtype, lr_rename_t *lr) (void) printf("%ssrc %s tgt %s\n", prefix, snm, tnm); } -/* ARGSUSED */ -static int -zil_prt_rec_write_cb(void *data, size_t len, void *unused) -{ - char *cdata = data; - for (int i = 0; i < len; i++) { - if (isprint(*cdata)) - (void) printf("%c ", *cdata); - else - (void) printf("%2X", *cdata); - cdata++; - } - return (0); -} - /* ARGSUSED */ static void zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr) { - abd_t *data; + char *data, *dlimit; blkptr_t *bp = &lr->lr_blkptr; zbookmark_phys_t zb; + char buf[SPA_MAXBLOCKSIZE]; int verbose = MAX(dump_opt['d'], dump_opt['i']); int error; @@ -159,6 +144,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr) if (BP_IS_HOLE(bp)) { (void) printf("\t\t\tLSIZE 0x%llx\n", (u_longlong_t)BP_GET_LSIZE(bp)); + bzero(buf, sizeof (buf)); (void) printf("%s\n", prefix); return; } @@ -171,26 +157,28 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr) lr->lr_foid, ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); - data = abd_alloc(BP_GET_LSIZE(bp), B_FALSE); error = zio_wait(zio_read(NULL, zilog->zl_spa, - bp, data, BP_GET_LSIZE(bp), NULL, NULL, + bp, buf, BP_GET_LSIZE(bp), NULL, NULL, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb)); if (error) - goto out; + return; + data = buf; } else { - /* data is stored after the end of the lr_write record */ - data = abd_alloc(lr->lr_length, B_FALSE); - abd_copy_from_buf(data, lr + 1, lr->lr_length); + data = (char *)(lr + 1); } + dlimit = data + MIN(lr->lr_length, + (verbose < 6 ? 20 : SPA_MAXBLOCKSIZE)); + (void) printf("%s", prefix); - (void) abd_iterate_func(data, - 0, MIN(lr->lr_length, (verbose < 6 ? 20 : SPA_MAXBLOCKSIZE)), - zil_prt_rec_write_cb, NULL); + while (data < dlimit) { + if (isprint(*data)) + (void) printf("%c ", *data); + else + (void) printf("%2X", *data); + data++; + } (void) printf("\n"); - -out: - abd_free(data); } /* ARGSUSED */ diff --git a/usr/src/cmd/ztest/ztest.c b/usr/src/cmd/ztest/ztest.c index 16f79b52ef..75a3d5245f 100644 --- a/usr/src/cmd/ztest/ztest.c +++ b/usr/src/cmd/ztest/ztest.c @@ -111,7 +111,6 @@ #include #include #include -#include #include #include #include @@ -189,7 +188,6 @@ extern uint64_t metaslab_df_alloc_threshold; extern uint64_t zfs_deadman_synctime_ms; extern int metaslab_preload_limit; extern boolean_t zfs_compressed_arc_enabled; -extern boolean_t zfs_abd_scatter_enabled; static ztest_shared_opts_t *ztest_shared_opts; static ztest_shared_opts_t ztest_opts; @@ -5053,7 +5051,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) enum zio_checksum checksum = spa_dedup_checksum(spa); dmu_buf_t *db; dmu_tx_t *tx; - abd_t *abd; + void *buf; blkptr_t blk; int copies = 2 * ZIO_DEDUPDITTO_MIN; @@ -5133,14 +5131,14 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) * Damage the block. Dedup-ditto will save us when we read it later. */ psize = BP_GET_PSIZE(&blk); - abd = abd_alloc_linear(psize, B_TRUE); - ztest_pattern_set(abd_to_buf(abd), psize, ~pattern); + buf = zio_buf_alloc(psize); + ztest_pattern_set(buf, psize, ~pattern); (void) zio_wait(zio_rewrite(NULL, spa, 0, &blk, - abd, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, + buf, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL)); - abd_free(abd); + zio_buf_free(buf, psize); (void) rw_unlock(&ztest_name_lock); } @@ -5423,12 +5421,6 @@ ztest_resume_thread(void *arg) */ if (ztest_random(10) == 0) zfs_compressed_arc_enabled = ztest_random(2); - - /* - * Periodically change the zfs_abd_scatter_enabled setting. - */ - if (ztest_random(10) == 0) - zfs_abd_scatter_enabled = ztest_random(2); } return (NULL); } diff --git a/usr/src/common/zfs/zfs_fletcher.c b/usr/src/common/zfs/zfs_fletcher.c index c889169b42..a58fa14b7c 100644 --- a/usr/src/common/zfs/zfs_fletcher.c +++ b/usr/src/common/zfs/zfs_fletcher.c @@ -24,7 +24,6 @@ */ /* * Copyright 2013 Saso Kiselkov. All rights reserved. - * Copyright (c) 2016 by Delphix. All rights reserved. */ /* @@ -134,29 +133,17 @@ #include #include #include -#include +/*ARGSUSED*/ void -fletcher_init(zio_cksum_t *zcp) -{ - ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); -} - -int -fletcher_2_incremental_native(void *buf, size_t size, void *data) +fletcher_2_native(const void *buf, uint64_t size, + const void *ctx_template, zio_cksum_t *zcp) { - zio_cksum_t *zcp = data; - const uint64_t *ip = buf; const uint64_t *ipend = ip + (size / sizeof (uint64_t)); uint64_t a0, b0, a1, b1; - a0 = zcp->zc_word[0]; - a1 = zcp->zc_word[1]; - b0 = zcp->zc_word[2]; - b1 = zcp->zc_word[3]; - - for (; ip < ipend; ip += 2) { + for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) { a0 += ip[0]; a1 += ip[1]; b0 += a0; @@ -164,33 +151,18 @@ fletcher_2_incremental_native(void *buf, size_t size, void *data) } ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); - return (0); } /*ARGSUSED*/ void -fletcher_2_native(const void *buf, size_t size, +fletcher_2_byteswap(const void *buf, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { - fletcher_init(zcp); - (void) fletcher_2_incremental_native((void *) buf, size, zcp); -} - -int -fletcher_2_incremental_byteswap(void *buf, size_t size, void *data) -{ - zio_cksum_t *zcp = data; - const uint64_t *ip = buf; const uint64_t *ipend = ip + (size / sizeof (uint64_t)); uint64_t a0, b0, a1, b1; - a0 = zcp->zc_word[0]; - a1 = zcp->zc_word[1]; - b0 = zcp->zc_word[2]; - b1 = zcp->zc_word[3]; - - for (; ip < ipend; ip += 2) { + for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) { a0 += BSWAP_64(ip[0]); a1 += BSWAP_64(ip[1]); b0 += a0; @@ -198,23 +170,50 @@ fletcher_2_incremental_byteswap(void *buf, size_t size, void *data) } ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); - return (0); } /*ARGSUSED*/ void -fletcher_2_byteswap(const void *buf, size_t size, +fletcher_4_native(const void *buf, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { - fletcher_init(zcp); - (void) fletcher_2_incremental_byteswap((void *) buf, size, zcp); + const uint32_t *ip = buf; + const uint32_t *ipend = ip + (size / sizeof (uint32_t)); + uint64_t a, b, c, d; + + for (a = b = c = d = 0; ip < ipend; ip++) { + a += ip[0]; + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); } -int -fletcher_4_incremental_native(void *buf, size_t size, void *data) +/*ARGSUSED*/ +void +fletcher_4_byteswap(const void *buf, uint64_t size, + const void *ctx_template, zio_cksum_t *zcp) { - zio_cksum_t *zcp = data; + const uint32_t *ip = buf; + const uint32_t *ipend = ip + (size / sizeof (uint32_t)); + uint64_t a, b, c, d; + for (a = b = c = d = 0; ip < ipend; ip++) { + a += BSWAP_32(ip[0]); + b += a; + c += b; + d += c; + } + + ZIO_SET_CHECKSUM(zcp, a, b, c, d); +} + +void +fletcher_4_incremental_native(const void *buf, uint64_t size, + zio_cksum_t *zcp) +{ const uint32_t *ip = buf; const uint32_t *ipend = ip + (size / sizeof (uint32_t)); uint64_t a, b, c, d; @@ -232,23 +231,12 @@ fletcher_4_incremental_native(void *buf, size_t size, void *data) } ZIO_SET_CHECKSUM(zcp, a, b, c, d); - return (0); } -/*ARGSUSED*/ void -fletcher_4_native(const void *buf, size_t size, - const void *ctx_template, zio_cksum_t *zcp) +fletcher_4_incremental_byteswap(const void *buf, uint64_t size, + zio_cksum_t *zcp) { - fletcher_init(zcp); - (void) fletcher_4_incremental_native((void *) buf, size, zcp); -} - -int -fletcher_4_incremental_byteswap(void *buf, size_t size, void *data) -{ - zio_cksum_t *zcp = data; - const uint32_t *ip = buf; const uint32_t *ipend = ip + (size / sizeof (uint32_t)); uint64_t a, b, c, d; @@ -266,14 +254,4 @@ fletcher_4_incremental_byteswap(void *buf, size_t size, void *data) } ZIO_SET_CHECKSUM(zcp, a, b, c, d); - return (0); -} - -/*ARGSUSED*/ -void -fletcher_4_byteswap(const void *buf, size_t size, - const void *ctx_template, zio_cksum_t *zcp) -{ - fletcher_init(zcp); - (void) fletcher_4_incremental_byteswap((void *) buf, size, zcp); } diff --git a/usr/src/common/zfs/zfs_fletcher.h b/usr/src/common/zfs/zfs_fletcher.h index 33c6c728cf..a920cc816d 100644 --- a/usr/src/common/zfs/zfs_fletcher.h +++ b/usr/src/common/zfs/zfs_fletcher.h @@ -24,7 +24,6 @@ */ /* * Copyright 2013 Saso Kiselkov. All rights reserved. - * Copyright (c) 2016 by Delphix. All rights reserved. */ #ifndef _ZFS_FLETCHER_H @@ -41,15 +40,12 @@ extern "C" { * fletcher checksum functions */ -void fletcher_init(zio_cksum_t *); -void fletcher_2_native(const void *, size_t, const void *, zio_cksum_t *); -void fletcher_2_byteswap(const void *, size_t, const void *, zio_cksum_t *); -int fletcher_2_incremental_native(void *, size_t, void *); -int fletcher_2_incremental_byteswap(void *, size_t, void *); -void fletcher_4_native(const void *, size_t, const void *, zio_cksum_t *); -void fletcher_4_byteswap(const void *, size_t, const void *, zio_cksum_t *); -int fletcher_4_incremental_native(void *, size_t, void *); -int fletcher_4_incremental_byteswap(void *, size_t, void *); +void fletcher_2_native(const void *, uint64_t, const void *, zio_cksum_t *); +void fletcher_2_byteswap(const void *, uint64_t, const void *, zio_cksum_t *); +void fletcher_4_native(const void *, uint64_t, const void *, zio_cksum_t *); +void fletcher_4_byteswap(const void *, uint64_t, const void *, zio_cksum_t *); +void fletcher_4_incremental_native(const void *, uint64_t, zio_cksum_t *); +void fletcher_4_incremental_byteswap(const void *, uint64_t, zio_cksum_t *); #ifdef __cplusplus } diff --git a/usr/src/lib/libzfs/common/libzfs_sendrecv.c b/usr/src/lib/libzfs/common/libzfs_sendrecv.c index 4e89dc053d..2641d53e00 100644 --- a/usr/src/lib/libzfs/common/libzfs_sendrecv.c +++ b/usr/src/lib/libzfs/common/libzfs_sendrecv.c @@ -192,19 +192,19 @@ dump_record(dmu_replay_record_t *drr, void *payload, int payload_len, { ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); - (void) fletcher_4_incremental_native(drr, + fletcher_4_incremental_native(drr, offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc); if (drr->drr_type != DRR_BEGIN) { ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u. drr_checksum.drr_checksum)); drr->drr_u.drr_checksum.drr_checksum = *zc; } - (void) fletcher_4_incremental_native( - &drr->drr_u.drr_checksum.drr_checksum, sizeof (zio_cksum_t), zc); + fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum, + sizeof (zio_cksum_t), zc); if (write(outfd, drr, sizeof (*drr)) == -1) return (errno); if (payload_len != 0) { - (void) fletcher_4_incremental_native(payload, payload_len, zc); + fletcher_4_incremental_native(payload, payload_len, zc); if (write(outfd, payload, payload_len) == -1) return (errno); } @@ -2093,9 +2093,9 @@ recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen, if (zc) { if (byteswap) - (void) fletcher_4_incremental_byteswap(buf, ilen, zc); + fletcher_4_incremental_byteswap(buf, ilen, zc); else - (void) fletcher_4_incremental_native(buf, ilen, zc); + fletcher_4_incremental_native(buf, ilen, zc); } return (0); } @@ -3649,8 +3649,7 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, * recv_read() above; do it again correctly. */ bzero(&zcksum, sizeof (zio_cksum_t)); - (void) fletcher_4_incremental_byteswap(&drr, - sizeof (drr), &zcksum); + fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum); flags->byteswap = B_TRUE; drr.drr_type = BSWAP_32(drr.drr_type); diff --git a/usr/src/lib/libzpool/common/llib-lzpool b/usr/src/lib/libzpool/common/llib-lzpool index 3636b4e76e..d0421bea94 100644 --- a/usr/src/lib/libzpool/common/llib-lzpool +++ b/usr/src/lib/libzpool/common/llib-lzpool @@ -61,7 +61,6 @@ #include #include #include -#include extern uint64_t metaslab_gang_bang; extern uint64_t metaslab_df_alloc_threshold; @@ -69,4 +68,3 @@ extern boolean_t zfeature_checks_disable; extern uint64_t zfs_deadman_synctime_ms; extern int metaslab_preload_limit; extern boolean_t zfs_compressed_arc_enabled; -extern boolean_t zfs_abd_scatter_enabled; diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf index 1010ad94ed..114e9108df 100644 --- a/usr/src/pkg/manifests/system-test-zfstest.mf +++ b/usr/src/pkg/manifests/system-test-zfstest.mf @@ -158,7 +158,6 @@ file path=opt/zfs-tests/bin/file_trunc mode=0555 file path=opt/zfs-tests/bin/file_write mode=0555 file path=opt/zfs-tests/bin/getholes mode=0555 file path=opt/zfs-tests/bin/largest_file mode=0555 -file path=opt/zfs-tests/bin/memory_balloon mode=0555 file path=opt/zfs-tests/bin/mkbusy mode=0555 file path=opt/zfs-tests/bin/mkfiles mode=0555 file path=opt/zfs-tests/bin/mkholes mode=0555 @@ -2303,18 +2302,14 @@ file path=opt/zfs-tests/tests/perf/regression/random_reads mode=0555 file path=opt/zfs-tests/tests/perf/regression/random_readwrite mode=0555 file path=opt/zfs-tests/tests/perf/regression/random_writes mode=0555 file path=opt/zfs-tests/tests/perf/regression/sequential_reads mode=0555 -file path=opt/zfs-tests/tests/perf/regression/sequential_reads_arc_cached \ +file path=opt/zfs-tests/tests/perf/regression/sequential_reads_cached \ mode=0555 -file \ - path=opt/zfs-tests/tests/perf/regression/sequential_reads_arc_cached_clone \ - mode=0555 -file path=opt/zfs-tests/tests/perf/regression/sequential_reads_dbuf_cached \ +file path=opt/zfs-tests/tests/perf/regression/sequential_reads_cached_clone \ mode=0555 file path=opt/zfs-tests/tests/perf/regression/sequential_writes mode=0555 file path=opt/zfs-tests/tests/perf/regression/setup mode=0555 file path=opt/zfs-tests/tests/perf/scripts/io.d mode=0444 file path=opt/zfs-tests/tests/perf/scripts/prefetch_io.d mode=0444 -file path=opt/zfs-tests/tests/perf/scripts/profile.d mode=0444 license cr_Sun license=cr_Sun license lic_CDDL license=lic_CDDL depend fmri=system/file-system/zfs/tests type=require diff --git a/usr/src/test/zfs-tests/cmd/memory_balloon/Makefile b/usr/src/test/zfs-tests/cmd/memory_balloon/Makefile deleted file mode 100644 index df1d370356..0000000000 --- a/usr/src/test/zfs-tests/cmd/memory_balloon/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -# -# This file and its contents are supplied under the terms of the -# Common Development and Distribution License ("CDDL"), version 1.0. -# You may only use this file in accordance with the terms of version -# 1.0 of the CDDL. -# -# A full copy of the text of the CDDL should have accompanied this -# source. A copy of the CDDL is also available via the Internet at -# http://www.illumos.org/license/CDDL. -# - -# -# Copyright (c) 2016 by Delphix. All rights reserved. -# - -PROG = memory_balloon - -include $(SRC)/cmd/Makefile.cmd - -LINTFLAGS += -erroff=E_FUNC_SET_NOT_USED - -include ../Makefile.subdirs diff --git a/usr/src/test/zfs-tests/cmd/memory_balloon/memory_balloon.c b/usr/src/test/zfs-tests/cmd/memory_balloon/memory_balloon.c deleted file mode 100644 index 958f6e6609..0000000000 --- a/usr/src/test/zfs-tests/cmd/memory_balloon/memory_balloon.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - */ - -/* - * Copyright (c) 2016 by Delphix. All rights reserved. - */ - -/* - * Steal memory from the kernel, forcing the ARC to decrease in size, and hold - * it until the process receives a signal. - */ - -#include -#include -#include -#include -#include -#include -#include - -static void -usage(char *progname) -{ - (void) fprintf(stderr, "Usage: %s -f \n", progname); - exit(1); -} - -static void -fail(char *err, int rval) -{ - perror(err); - exit(rval); -} - -static void -daemonize(void) -{ - pid_t pid; - - if ((pid = fork()) < 0) { - fail("fork", 1); - } else if (pid != 0) { - (void) fprintf(stdout, "%ld\n", pid); - exit(0); - } - - (void) setsid(); - (void) close(0); - (void) close(1); - (void) close(2); -} - -int -main(int argc, char *argv[]) -{ - int c; - boolean_t fflag = B_FALSE; - char *prog = argv[0]; - long long size; - char *stroll_leftovers; - int shm_id; - void *shm_attached; - - while ((c = getopt(argc, argv, "f")) != -1) { - switch (c) { - /* Run in the foreground */ - case 'f': - fflag = B_TRUE; - break; - default: - usage(prog); - } - } - - argc -= optind; - argv += optind; - - if (argc != 1) - usage(prog); - size = strtoll(argv[0], &stroll_leftovers, 10); - if (size <= 0) - fail("invalid size in bytes", 1); - - if ((shm_id = shmget(IPC_PRIVATE, size, IPC_CREAT|IPC_EXCL)) == -1) - fail("shmget", 1); - if ((shm_attached = shmat(shm_id, NULL, SHM_SHARE_MMU)) == (void *)-1) - fail("shmat", 1); - - if (fflag == B_FALSE) - daemonize(); - (void) pause(); - - /* NOTREACHED */ - return (0); -} diff --git a/usr/src/test/zfs-tests/runfiles/perf-regression.run b/usr/src/test/zfs-tests/runfiles/perf-regression.run index dbb30f0327..0095931ad5 100644 --- a/usr/src/test/zfs-tests/runfiles/perf-regression.run +++ b/usr/src/test/zfs-tests/runfiles/perf-regression.run @@ -10,7 +10,7 @@ # # -# Copyright (c) 2015, 2016 by Delphix. All rights reserved. +# Copyright (c) 2015 by Delphix. All rights reserved. # [DEFAULT] @@ -24,7 +24,7 @@ post = cleanup outputdir = /var/tmp/test_results [/opt/zfs-tests/tests/perf/regression] -tests = ['sequential_writes', 'sequential_reads', 'sequential_reads_arc_cached', - 'sequential_reads_arc_cached_clone', 'sequential_reads_dbuf_cached', - 'random_reads', 'random_writes', 'random_readwrite'] +tests = ['sequential_writes', 'sequential_reads', 'sequential_reads_cached', + 'sequential_reads_cached_clone', 'random_reads', 'random_writes', + 'random_readwrite'] post = diff --git a/usr/src/test/zfs-tests/tests/functional/mdb/mdb_001_pos.ksh b/usr/src/test/zfs-tests/tests/functional/mdb/mdb_001_pos.ksh index 7f6faf690e..a4f90be49b 100644 --- a/usr/src/test/zfs-tests/tests/functional/mdb/mdb_001_pos.ksh +++ b/usr/src/test/zfs-tests/tests/functional/mdb/mdb_001_pos.ksh @@ -49,51 +49,26 @@ function cleanup verify_runnable "global" log_onexit cleanup -tmpfile=$(mktemp) -log_must zpool scrub $TESTPOOL - -typeset spa=$(mdb -ke "::spa" | awk "/$TESTPOOL/ {print \$1}") -typeset off_ub=$(mdb -ke "::offsetof spa_t spa_uberblock | =J") -typeset off_rbp=$(mdb -ke "::offsetof uberblock_t ub_rootbp | =J") -typeset bp=$(mdb -ke "$spa + $off_ub + $off_rbp =J") - -# dcmds and walkers skipped due to being DEBUG only or difficult to run: -# ::zfs_params -# ::refcount - -set -A dcmds "::abuf_find 1 2" \ - "::arc" \ - "::arc -b" \ - "::arc_compression_stats" \ - "$bp ::blkptr" \ - "$bp ::dva" \ - "::walk spa" \ - "::spa" \ - "$spa ::spa " \ - "$spa ::spa -c" \ - "$spa ::spa -h" \ - "$spa ::spa -v" \ - "$spa ::spa -Mmh" \ - "$spa ::spa_config" \ - "$spa ::spa_space" \ - "$spa ::spa_space -b" \ - "$spa ::spa_vdevs" \ - "$spa ::print spa_t spa_root_vdev | ::vdev" \ - "$spa ::print spa_t spa_root_vdev | ::vdev -re" \ - "$spa ::print -a spa_t spa_dsl_pool->dp_dirty_datasets | ::walk txg_list" \ - "$spa ::print -a spa_t spa_uberblock.ub_rootbp | ::blkptr" \ - "$spa ::walk metaslab" \ - "$spa ::walk metaslab | ::head -1 | ::metaslab_weight" \ - "$spa ::walk metaslab | ::head -1 | ::metaslab_trace" \ - "$spa ::walk zio_root | ::zio -c" \ - "$spa ::walk zio_root | ::zio -r" \ - "$spa ::walk zms_freelist" - "$spa ::zfs_blkstats -v" \ +OUTFILE='/var/tmp/mdb-outfile' +set -A dcmds "::walk spa" \ + "::walk spa | ::spa " \ + "::walk spa | ::spa -c" \ + "::walk spa | ::spa -v" \ + "::walk spa | ::spa_config" \ + "::walk spa | ::spa_space" \ + "::walk spa | ::spa_space -b" \ + "::walk spa | ::spa_vdevs" \ + "::walk spa | ::walk metaslab" \ + "::walk spa | ::print struct spa spa_root_vdev | ::vdev" \ + "::walk spa | ::print struct spa spa_root_vdev | ::vdev -re" \ "::dbufs" \ "::dbufs -n mos -o mdn -l 0 -b 0" \ "::dbufs | ::dbuf" \ "::dbuf_stats" \ - "dbuf_cache ::walk multilist" + "::abuf_find 1 2" \ + "::walk spa | ::print -a struct spa spa_uberblock.ub_rootbp | ::blkptr" \ + "::walk spa | ::print -a struct spa spa_dsl_pool->dp_dirty_datasets | ::walk txg_list" \ + "::walk spa | ::walk zms_freelist" # # The commands above were supplied by the ZFS development team. The idea is to # do as much checking as possible without the need to hardcode addresses. diff --git a/usr/src/test/zfs-tests/tests/perf/perf.shlib b/usr/src/test/zfs-tests/tests/perf/perf.shlib index ff980c0e6e..38e30f255d 100644 --- a/usr/src/test/zfs-tests/tests/perf/perf.shlib +++ b/usr/src/test/zfs-tests/tests/perf/perf.shlib @@ -182,18 +182,6 @@ function get_max_arc_size echo $max_arc_size } -function get_max_dbuf_cache_size -{ - typeset -l max_dbuf_cache_size=$(dtrace -qn 'BEGIN { - printf("%u\n", `dbuf_cache_max_bytes); - exit(0); - }') - - [[ $? -eq 0 ]] || log_fail "get_max_dbuf_cache_size failed" - - echo $max_dbuf_cache_size -} - # Create a file with some information about how this system is configured. function get_system_config { diff --git a/usr/src/test/zfs-tests/tests/perf/regression/random_reads.ksh b/usr/src/test/zfs-tests/tests/perf/regression/random_reads.ksh index 655366e00c..abf05ca719 100644 --- a/usr/src/test/zfs-tests/tests/perf/regression/random_reads.ksh +++ b/usr/src/test/zfs-tests/tests/perf/regression/random_reads.ksh @@ -69,9 +69,8 @@ log_must fio $FIO_SCRIPTS/mkfiles.fio # Set up the scripts and output files that will log performance data. lun_list=$(pool_to_lun_list $PERFPOOL) log_note "Collecting backend IO stats with lun list $lun_list" -export collect_scripts=("dtrace -s $PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" - "io" "vmstat 1" "vmstat" "mpstat 1" "mpstat" "iostat -xcnz 1" "iostat" - "dtrace -s $PERF_SCRIPTS/profile.d" "profile" "kstat zfs:0 1" "kstat") +export collect_scripts=("$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" + "vmstat 1" "vmstat" "mpstat 1" "mpstat" "iostat -xcnz 1" "iostat") log_note "Random reads with $PERF_RUNTYPE settings" do_fio_run random_reads.fio false true diff --git a/usr/src/test/zfs-tests/tests/perf/regression/random_readwrite.ksh b/usr/src/test/zfs-tests/tests/perf/regression/random_readwrite.ksh index f41a2b526e..2422f9c658 100644 --- a/usr/src/test/zfs-tests/tests/perf/regression/random_readwrite.ksh +++ b/usr/src/test/zfs-tests/tests/perf/regression/random_readwrite.ksh @@ -69,9 +69,8 @@ log_must fio $FIO_SCRIPTS/mkfiles.fio # Set up the scripts and output files that will log performance data. lun_list=$(pool_to_lun_list $PERFPOOL) log_note "Collecting backend IO stats with lun list $lun_list" -export collect_scripts=("dtrace -s $PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" - "io" "vmstat 1" "vmstat" "mpstat 1" "mpstat" "iostat -xcnz 1" "iostat" - "dtrace -s $PERF_SCRIPTS/profile.d" "profile" "kstat zfs:0 1" "kstat") +export collect_scripts=("$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" + "vmstat 1" "vmstat" "mpstat 1" "mpstat" "iostat -xcnz 1" "iostat") log_note "Random reads and writes with $PERF_RUNTYPE settings" do_fio_run random_readwrite.fio false true diff --git a/usr/src/test/zfs-tests/tests/perf/regression/random_writes.ksh b/usr/src/test/zfs-tests/tests/perf/regression/random_writes.ksh index 9e201a827c..c48ae76140 100644 --- a/usr/src/test/zfs-tests/tests/perf/regression/random_writes.ksh +++ b/usr/src/test/zfs-tests/tests/perf/regression/random_writes.ksh @@ -61,9 +61,8 @@ fi # Set up the scripts and output files that will log performance data. lun_list=$(pool_to_lun_list $PERFPOOL) log_note "Collecting backend IO stats with lun list $lun_list" -export collect_scripts=("dtrace -s $PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" - "io" "vmstat 1" "vmstat" "mpstat 1" "mpstat" "iostat -xcnz 1" "iostat" - "dtrace -s $PERF_SCRIPTS/profile.d" "profile" "kstat zfs:0 1" "kstat") +export collect_scripts=("$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" + "vmstat 1" "vmstat" "mpstat 1" "mpstat" "iostat -xcnz 1" "iostat") log_note "Random writes with $PERF_RUNTYPE settings" do_fio_run random_writes.fio true false diff --git a/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads.ksh b/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads.ksh index 580f2d94e4..60083c8673 100644 --- a/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads.ksh +++ b/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads.ksh @@ -69,10 +69,9 @@ log_must fio $FIO_SCRIPTS/mkfiles.fio # Set up the scripts and output files that will log performance data. lun_list=$(pool_to_lun_list $PERFPOOL) log_note "Collecting backend IO stats with lun list $lun_list" -export collect_scripts=("dtrace -s $PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" - "io" "dtrace -Cs $PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch" - "vmstat 1" "vmstat" "mpstat 1" "mpstat" "iostat -xcnz 1" "iostat" - "dtrace -s $PERF_SCRIPTS/profile.d" "profile" "kstat zfs:0 1" "kstat") +export collect_scripts=("$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" + "$PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch" "vmstat 1" "vmstat" + "mpstat 1" "mpstat" "iostat -xcnz 1" "iostat") log_note "Sequential reads with $PERF_RUNTYPE settings" do_fio_run sequential_reads.fio false true diff --git a/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_arc_cached.ksh b/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_arc_cached.ksh deleted file mode 100644 index 97bb8bdc31..0000000000 --- a/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_arc_cached.ksh +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/ksh - -# -# This file and its contents are supplied under the terms of the -# Common Development and Distribution License ("CDDL"), version 1.0. -# You may only use this file in accordance with the terms of version -# 1.0 of the CDDL. -# -# A full copy of the text of the CDDL should have accompanied this -# source. A copy of the CDDL is also available via the Internet at -# http://www.illumos.org/license/CDDL. -# - -# -# Copyright (c) 2015, 2016 by Delphix. All rights reserved. -# - -# -# Description: -# Trigger fio runs using the sequential_reads job file. The number of runs and -# data collected is determined by the PERF_* variables. See do_fio_run for -# details about these variables. -# -# The files to read from are created prior to the first fio run, and used -# for all fio runs. The ARC is not cleared to ensure that all data is cached. -# - -. $STF_SUITE/include/libtest.shlib -. $STF_SUITE/tests/perf/perf.shlib - -function cleanup -{ - log_must zfs destroy $TESTFS -} - -log_assert "Measure IO stats during sequential read load" -log_onexit cleanup - -export TESTFS=$PERFPOOL/testfs -recreate_perfpool -log_must zfs create $PERF_FS_OPTS $TESTFS - -# Make sure the working set can be cached in the arc. Aim for 1/2 of arc. -export TOTAL_SIZE=$(($(get_max_arc_size) / 2)) - -# Variables for use by fio. -if [[ -n $PERF_REGRESSION_WEEKLY ]]; then - export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_WEEKLY} - export PERF_RUNTYPE=${PERF_RUNTYPE:-'weekly'} - export PERF_NTHREADS=${PERF_NTHREADS:-'16 64'} - export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} - export PERF_IOSIZES=${PERF_IOSIZES:-'64k 128k 1m'} -elif [[ -n $PERF_REGRESSION_NIGHTLY ]]; then - export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_NIGHTLY} - export PERF_RUNTYPE=${PERF_RUNTYPE:-'nightly'} - export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'} - export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} - export PERF_IOSIZES=${PERF_IOSIZES:-'128k 1m'} -fi - -# Layout the files to be used by the read tests. Create as many files as the -# largest number of threads. An fio run with fewer threads will use a subset -# of the available files. -export NUMJOBS=$(get_max $PERF_NTHREADS) -export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) -log_must fio $FIO_SCRIPTS/mkfiles.fio - -# Set up the scripts and output files that will log performance data. -lun_list=$(pool_to_lun_list $PERFPOOL) -log_note "Collecting backend IO stats with lun list $lun_list" -export collect_scripts=("dtrace -s $PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" - "io" "dtrace -Cs $PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch" - "vmstat 1" "vmstat" "mpstat 1" "mpstat" "iostat -xcnz 1" "iostat" - "dtrace -s $PERF_SCRIPTS/profile.d" "profile" "kstat zfs:0 1" "kstat") - -log_note "Sequential cached reads with $PERF_RUNTYPE settings" -do_fio_run sequential_reads.fio false false -log_pass "Measure IO stats during sequential cached read load" diff --git a/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_arc_cached_clone.ksh b/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_arc_cached_clone.ksh deleted file mode 100644 index cfc748c843..0000000000 --- a/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_arc_cached_clone.ksh +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/ksh - -# -# This file and its contents are supplied under the terms of the -# Common Development and Distribution License ("CDDL"), version 1.0. -# You may only use this file in accordance with the terms of version -# 1.0 of the CDDL. -# -# A full copy of the text of the CDDL should have accompanied this -# source. A copy of the CDDL is also available via the Internet at -# http://www.illumos.org/license/CDDL. -# - -# -# Copyright (c) 2015, 2016 by Delphix. All rights reserved. -# - -# -# Description: -# Trigger fio runs using the sequential_reads job file. The number of runs and -# data collected is determined by the PERF_* variables. See do_fio_run for -# details about these variables. -# -# The files to read from are created prior to the first fio run, and used -# for all fio runs. This test will exercise cached read performance from -# a clone filesystem. The data is initially cached in the ARC and then -# a snapshot and clone are created. All the performance runs are then -# initiated against the clone filesystem to exercise the performance of -# reads when the ARC has to create another buffer from a different dataset. -# It will also exercise the need to evict the duplicate buffer once the last -# reference on that buffer is released. -# - -. $STF_SUITE/include/libtest.shlib -. $STF_SUITE/tests/perf/perf.shlib - -function cleanup -{ - log_must zfs destroy $TESTFS -} - -log_assert "Measure IO stats during sequential read load" -log_onexit cleanup - -export TESTFS=$PERFPOOL/testfs -recreate_perfpool -log_must zfs create $PERF_FS_OPTS $TESTFS - -# Make sure the working set can be cached in the arc. Aim for 1/2 of arc. -export TOTAL_SIZE=$(($(get_max_arc_size) / 2)) - -# Variables for use by fio. -if [[ -n $PERF_REGRESSION_WEEKLY ]]; then - export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_WEEKLY} - export PERF_RUNTYPE=${PERF_RUNTYPE:-'weekly'} - export PERF_NTHREADS=${PERF_NTHREADS:-'16 64'} - export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} - export PERF_IOSIZES=${PERF_IOSIZES:-'64k 128k 1m'} -elif [[ -n $PERF_REGRESSION_NIGHTLY ]]; then - export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_NIGHTLY} - export PERF_RUNTYPE=${PERF_RUNTYPE:-'nightly'} - export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'} - export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} - export PERF_IOSIZES=${PERF_IOSIZES:-'128k 1m'} -fi - -# Layout the files to be used by the read tests. Create as many files as the -# largest number of threads. An fio run with fewer threads will use a subset -# of the available files. -export NUMJOBS=$(get_max $PERF_NTHREADS) -export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) -log_must fio $FIO_SCRIPTS/mkfiles.fio - -log_note "Creating snapshot, $TESTSNAP, of $TESTFS" -create_snapshot $TESTFS $TESTSNAP -log_note "Creating clone, $PERFPOOL/$TESTCLONE, from $TESTFS@$TESTSNAP" -create_clone $TESTFS@$TESTSNAP $PERFPOOL/$TESTCLONE - -# -# Reset the TESTFS to point to the clone -# -export TESTFS=$PERFPOOL/$TESTCLONE - -# Set up the scripts and output files that will log performance data. -lun_list=$(pool_to_lun_list $PERFPOOL) -log_note "Collecting backend IO stats with lun list $lun_list" -export collect_scripts=("dtrace -s $PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" - "io" "dtrace -Cs $PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch" - "vmstat 1" "vmstat" "mpstat 1" "mpstat" "iostat -xcnz 1" "iostat" - "dtrace -s $PERF_SCRIPTS/profile.d" "profile" "kstat zfs:0 1" "kstat") - -log_note "Sequential cached reads from $TESTFS with $PERF_RUNTYPE settings" -do_fio_run sequential_reads.fio false false -log_pass "Measure IO stats during sequential cached read load" diff --git a/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_cached.ksh b/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_cached.ksh new file mode 100644 index 0000000000..b4365c0871 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_cached.ksh @@ -0,0 +1,77 @@ +#!/usr/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015, 2016 by Delphix. All rights reserved. +# + +# +# Description: +# Trigger fio runs using the sequential_reads job file. The number of runs and +# data collected is determined by the PERF_* variables. See do_fio_run for +# details about these variables. +# +# The files to read from are created prior to the first fio run, and used +# for all fio runs. The ARC is not cleared to ensure that all data is cached. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/perf/perf.shlib + +function cleanup +{ + log_must zfs destroy $TESTFS +} + +log_assert "Measure IO stats during sequential read load" +log_onexit cleanup + +export TESTFS=$PERFPOOL/testfs +recreate_perfpool +log_must zfs create $PERF_FS_OPTS $TESTFS + +# Make sure the working set can be cached in the arc. Aim for 1/2 of arc. +export TOTAL_SIZE=$(($(get_max_arc_size) / 2)) + +# Variables for use by fio. +if [[ -n $PERF_REGRESSION_WEEKLY ]]; then + export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_WEEKLY} + export PERF_RUNTYPE=${PERF_RUNTYPE:-'weekly'} + export PERF_NTHREADS=${PERF_NTHREADS:-'16 64'} + export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} + export PERF_IOSIZES=${PERF_IOSIZES:-'64k 128k 1m'} +elif [[ -n $PERF_REGRESSION_NIGHTLY ]]; then + export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_NIGHTLY} + export PERF_RUNTYPE=${PERF_RUNTYPE:-'nightly'} + export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'} + export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} + export PERF_IOSIZES=${PERF_IOSIZES:-'128k 1m'} +fi + +# Layout the files to be used by the read tests. Create as many files as the +# largest number of threads. An fio run with fewer threads will use a subset +# of the available files. +export NUMJOBS=$(get_max $PERF_NTHREADS) +export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) +log_must fio $FIO_SCRIPTS/mkfiles.fio + +# Set up the scripts and output files that will log performance data. +lun_list=$(pool_to_lun_list $PERFPOOL) +log_note "Collecting backend IO stats with lun list $lun_list" +export collect_scripts=("$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" + "$PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch" "vmstat 1" "vmstat" + "mpstat 1" "mpstat" "iostat -xcnz 1" "iostat") + +log_note "Sequential cached reads with $PERF_RUNTYPE settings" +do_fio_run sequential_reads.fio false false +log_pass "Measure IO stats during sequential cached read load" diff --git a/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_cached_clone.ksh b/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_cached_clone.ksh new file mode 100644 index 0000000000..c656eb4643 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_cached_clone.ksh @@ -0,0 +1,93 @@ +#!/usr/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2015, 2016 by Delphix. All rights reserved. +# + +# +# Description: +# Trigger fio runs using the sequential_reads job file. The number of runs and +# data collected is determined by the PERF_* variables. See do_fio_run for +# details about these variables. +# +# The files to read from are created prior to the first fio run, and used +# for all fio runs. This test will exercise cached read performance from +# a clone filesystem. The data is initially cached in the ARC and then +# a snapshot and clone are created. All the performance runs are then +# initiated against the clone filesystem to exercise the performance of +# reads when the ARC has to create another buffer from a different dataset. +# It will also exercise the need to evict the duplicate buffer once the last +# reference on that buffer is released. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/perf/perf.shlib + +function cleanup +{ + log_must zfs destroy $TESTFS +} + +log_assert "Measure IO stats during sequential read load" +log_onexit cleanup + +export TESTFS=$PERFPOOL/testfs +recreate_perfpool +log_must zfs create $PERF_FS_OPTS $TESTFS + +# Make sure the working set can be cached in the arc. Aim for 1/2 of arc. +export TOTAL_SIZE=$(($(get_max_arc_size) / 2)) + +# Variables for use by fio. +if [[ -n $PERF_REGRESSION_WEEKLY ]]; then + export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_WEEKLY} + export PERF_RUNTYPE=${PERF_RUNTYPE:-'weekly'} + export PERF_NTHREADS=${PERF_NTHREADS:-'16 64'} + export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} + export PERF_IOSIZES=${PERF_IOSIZES:-'64k 128k 1m'} +elif [[ -n $PERF_REGRESSION_NIGHTLY ]]; then + export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_NIGHTLY} + export PERF_RUNTYPE=${PERF_RUNTYPE:-'nightly'} + export PERF_NTHREADS=${PERF_NTHREADS:-'64 128'} + export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} + export PERF_IOSIZES=${PERF_IOSIZES:-'128k 1m'} +fi + +# Layout the files to be used by the read tests. Create as many files as the +# largest number of threads. An fio run with fewer threads will use a subset +# of the available files. +export NUMJOBS=$(get_max $PERF_NTHREADS) +export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) +log_must fio $FIO_SCRIPTS/mkfiles.fio + +log_note "Creating snapshot, $TESTSNAP, of $TESTFS" +create_snapshot $TESTFS $TESTSNAP +log_note "Creating clone, $PERFPOOL/$TESTCLONE, from $TESTFS@$TESTSNAP" +create_clone $TESTFS@$TESTSNAP $PERFPOOL/$TESTCLONE + +# +# Reset the TESTFS to point to the clone +# +export TESTFS=$PERFPOOL/$TESTCLONE + +# Set up the scripts and output files that will log performance data. +lun_list=$(pool_to_lun_list $PERFPOOL) +log_note "Collecting backend IO stats with lun list $lun_list" +export collect_scripts=("$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" + "$PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch" "vmstat 1" "vmstat" + "mpstat 1" "mpstat" "iostat -xcnz 1" "iostat") + +log_note "Sequential cached reads from $TESTFS with $PERF_RUNTYPE settings" +do_fio_run sequential_reads.fio false false +log_pass "Measure IO stats during sequential cached read load" diff --git a/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_dbuf_cached.ksh b/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_dbuf_cached.ksh deleted file mode 100644 index f7ea4b75c6..0000000000 --- a/usr/src/test/zfs-tests/tests/perf/regression/sequential_reads_dbuf_cached.ksh +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/ksh - -# -# This file and its contents are supplied under the terms of the -# Common Development and Distribution License ("CDDL"), version 1.0. -# You may only use this file in accordance with the terms of version -# 1.0 of the CDDL. -# -# A full copy of the text of the CDDL should have accompanied this -# source. A copy of the CDDL is also available via the Internet at -# http://www.illumos.org/license/CDDL. -# - -# -# Copyright (c) 2016 by Delphix. All rights reserved. -# - -# -# Description: -# Trigger fio runs using the sequential_reads job file. The number of runs and -# data collected is determined by the PERF_* variables. See do_fio_run for -# details about these variables. -# -# The files to read from are created prior to the first fio run, and used -# for all fio runs. The ARC is not cleared to ensure that all data is cached. -# -# This is basically a copy of the sequential_reads_cached test case, but with -# a smaller dateset so that we can fit everything into the decompressed, linear -# space in the dbuf cache. -# - -. $STF_SUITE/include/libtest.shlib -. $STF_SUITE/tests/perf/perf.shlib - -function cleanup -{ - log_must zfs destroy $TESTFS -} - -log_assert "Measure IO stats during sequential read load" -log_onexit cleanup - -export TESTFS=$PERFPOOL/testfs -recreate_perfpool -log_must zfs create $PERF_FS_OPTS $TESTFS - -# Ensure the working set can be cached in the dbuf cache. -export TOTAL_SIZE=$(($(get_max_dbuf_cache_size) * 3 / 4)) - -# Variables for use by fio. -if [[ -n $PERF_REGRESSION_WEEKLY ]]; then - export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_WEEKLY} - export PERF_RUNTYPE=${PERF_RUNTYPE:-'weekly'} - export PERF_NTHREADS=${PERF_NTHREADS:-'16 64'} - export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} - export PERF_IOSIZES=${PERF_IOSIZES:-'64k'} -elif [[ -n $PERF_REGRESSION_NIGHTLY ]]; then - export PERF_RUNTIME=${PERF_RUNTIME:-$PERF_RUNTIME_NIGHTLY} - export PERF_RUNTYPE=${PERF_RUNTYPE:-'nightly'} - export PERF_NTHREADS=${PERF_NTHREADS:-'64'} - export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'} - export PERF_IOSIZES=${PERF_IOSIZES:-'64k'} -fi - -# Layout the files to be used by the read tests. Create as many files as the -# largest number of threads. An fio run with fewer threads will use a subset -# of the available files. -export NUMJOBS=$(get_max $PERF_NTHREADS) -export FILE_SIZE=$((TOTAL_SIZE / NUMJOBS)) -log_must fio $FIO_SCRIPTS/mkfiles.fio - -# Set up the scripts and output files that will log performance data. -lun_list=$(pool_to_lun_list $PERFPOOL) -log_note "Collecting backend IO stats with lun list $lun_list" -export collect_scripts=("dtrace -s $PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" - "io" "dtrace -Cs $PERF_SCRIPTS/prefetch_io.d $PERFPOOL 1" "prefetch" - "vmstat 1" "vmstat" "mpstat 1" "mpstat" "iostat -xcnz 1" "iostat" - "dtrace -s $PERF_SCRIPTS/profile.d" "profile" "kstat zfs:0 1" "kstat") - -log_note "Sequential cached reads with $PERF_RUNTYPE settings" -do_fio_run sequential_reads.fio false false -log_pass "Measure IO stats during sequential cached read load" diff --git a/usr/src/test/zfs-tests/tests/perf/regression/sequential_writes.ksh b/usr/src/test/zfs-tests/tests/perf/regression/sequential_writes.ksh index 493a3d18b7..e2f2cca0d2 100644 --- a/usr/src/test/zfs-tests/tests/perf/regression/sequential_writes.ksh +++ b/usr/src/test/zfs-tests/tests/perf/regression/sequential_writes.ksh @@ -61,9 +61,8 @@ fi # Set up the scripts and output files that will log performance data. lun_list=$(pool_to_lun_list $PERFPOOL) log_note "Collecting backend IO stats with lun list $lun_list" -export collect_scripts=("dtrace -s $PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" - "io" "vmstat 1" "vmstat" "mpstat 1" "mpstat" "iostat -xcnz 1" "iostat" - "dtrace -s $PERF_SCRIPTS/profile.d" "profile" "kstat zfs:0 1" "kstat") +export collect_scripts=("$PERF_SCRIPTS/io.d $PERFPOOL $lun_list 1" "io" + "vmstat 1" "vmstat" "mpstat 1" "mpstat" "iostat -xcnz 1" "iostat") log_note "Sequential writes with $PERF_RUNTYPE settings" do_fio_run sequential_writes.fio true false diff --git a/usr/src/test/zfs-tests/tests/perf/scripts/profile.d b/usr/src/test/zfs-tests/tests/perf/scripts/profile.d deleted file mode 100644 index e7fbd1fca5..0000000000 --- a/usr/src/test/zfs-tests/tests/perf/scripts/profile.d +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/sbin/dtrace -s - -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - */ - -/* - * Copyright (c) 2016 by Delphix. All rights reserved. - */ - -#pragma D option stackframes=100 - -/* - * @stacks: The number of times a stack has been recorded - */ - -profile-997 -/ arg0 / -{ - @stacks[stack()] = count(); -} - -ERROR -{ - trace(arg1); - trace(arg2); - trace(arg3); - trace(arg4); - trace(arg5); -} diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 873e5755cc..b87e9e258e 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -1349,7 +1349,6 @@ SMBFS_OBJS += smbfs_vfsops.o smbfs_vnops.o smbfs_node.o \ BOOTFS_OBJS += bootfs_construct.o bootfs_vfsops.o bootfs_vnops.o ZFS_COMMON_OBJS += \ - abd.o \ arc.o \ blkptr.o \ bplist.o \ diff --git a/usr/src/uts/common/fs/zfs/abd.c b/usr/src/uts/common/fs/zfs/abd.c deleted file mode 100644 index 932ba800ed..0000000000 --- a/usr/src/uts/common/fs/zfs/abd.c +++ /dev/null @@ -1,940 +0,0 @@ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - */ - -/* - * Copyright (c) 2014 by Chunwei Chen. All rights reserved. - * Copyright (c) 2016 by Delphix. All rights reserved. - */ - -/* - * ARC buffer data (ABD). - * - * ABDs are an abstract data structure for the ARC which can use two - * different ways of storing the underlying data: - * - * (a) Linear buffer. In this case, all the data in the ABD is stored in one - * contiguous buffer in memory (from a zio_[data_]buf_* kmem cache). - * - * +-------------------+ - * | ABD (linear) | - * | abd_flags = ... | - * | abd_size = ... | +--------------------------------+ - * | abd_buf ------------->| raw buffer of size abd_size | - * +-------------------+ +--------------------------------+ - * no abd_chunks - * - * (b) Scattered buffer. In this case, the data in the ABD is split into - * equal-sized chunks (from the abd_chunk_cache kmem_cache), with pointers - * to the chunks recorded in an array at the end of the ABD structure. - * - * +-------------------+ - * | ABD (scattered) | - * | abd_flags = ... | - * | abd_size = ... | - * | abd_offset = 0 | +-----------+ - * | abd_chunks[0] ----------------------------->| chunk 0 | - * | abd_chunks[1] ---------------------+ +-----------+ - * | ... | | +-----------+ - * | abd_chunks[N-1] ---------+ +------->| chunk 1 | - * +-------------------+ | +-----------+ - * | ... - * | +-----------+ - * +----------------->| chunk N-1 | - * +-----------+ - * - * Using a large proportion of scattered ABDs decreases ARC fragmentation since - * when we are at the limit of allocatable space, using equal-size chunks will - * allow us to quickly reclaim enough space for a new large allocation (assuming - * it is also scattered). - * - * In addition to directly allocating a linear or scattered ABD, it is also - * possible to create an ABD by requesting the "sub-ABD" starting at an offset - * within an existing ABD. In linear buffers this is simple (set abd_buf of - * the new ABD to the starting point within the original raw buffer), but - * scattered ABDs are a little more complex. The new ABD makes a copy of the - * relevant abd_chunks pointers (but not the underlying data). However, to - * provide arbitrary rather than only chunk-aligned starting offsets, it also - * tracks an abd_offset field which represents the starting point of the data - * within the first chunk in abd_chunks. For both linear and scattered ABDs, - * creating an offset ABD marks the original ABD as the offset's parent, and the - * original ABD's abd_children refcount is incremented. This data allows us to - * ensure the root ABD isn't deleted before its children. - * - * Most consumers should never need to know what type of ABD they're using -- - * the ABD public API ensures that it's possible to transparently switch from - * using a linear ABD to a scattered one when doing so would be beneficial. - * - * If you need to use the data within an ABD directly, if you know it's linear - * (because you allocated it) you can use abd_to_buf() to access the underlying - * raw buffer. Otherwise, you should use one of the abd_borrow_buf* functions - * which will allocate a raw buffer if necessary. Use the abd_return_buf* - * functions to return any raw buffers that are no longer necessary when you're - * done using them. - * - * There are a variety of ABD APIs that implement basic buffer operations: - * compare, copy, read, write, and fill with zeroes. If you need a custom - * function which progressively accesses the whole ABD, use the abd_iterate_* - * functions. - */ - -#include -#include -#include -#include -#include - -typedef struct abd_stats { - kstat_named_t abdstat_struct_size; - kstat_named_t abdstat_scatter_cnt; - kstat_named_t abdstat_scatter_data_size; - kstat_named_t abdstat_scatter_chunk_waste; - kstat_named_t abdstat_linear_cnt; - kstat_named_t abdstat_linear_data_size; -} abd_stats_t; - -static abd_stats_t abd_stats = { - /* Amount of memory occupied by all of the abd_t struct allocations */ - { "struct_size", KSTAT_DATA_UINT64 }, - /* - * The number of scatter ABDs which are currently allocated, excluding - * ABDs which don't own their data (for instance the ones which were - * allocated through abd_get_offset()). - */ - { "scatter_cnt", KSTAT_DATA_UINT64 }, - /* Amount of data stored in all scatter ABDs tracked by scatter_cnt */ - { "scatter_data_size", KSTAT_DATA_UINT64 }, - /* - * The amount of space wasted at the end of the last chunk across all - * scatter ABDs tracked by scatter_cnt. - */ - { "scatter_chunk_waste", KSTAT_DATA_UINT64 }, - /* - * The number of linear ABDs which are currently allocated, excluding - * ABDs which don't own their data (for instance the ones which were - * allocated through abd_get_offset() and abd_get_from_buf()). If an - * ABD takes ownership of its buf then it will become tracked. - */ - { "linear_cnt", KSTAT_DATA_UINT64 }, - /* Amount of data stored in all linear ABDs tracked by linear_cnt */ - { "linear_data_size", KSTAT_DATA_UINT64 }, -}; - -#define ABDSTAT(stat) (abd_stats.stat.value.ui64) -#define ABDSTAT_INCR(stat, val) \ - atomic_add_64(&abd_stats.stat.value.ui64, (val)) -#define ABDSTAT_BUMP(stat) ABDSTAT_INCR(stat, 1) -#define ABDSTAT_BUMPDOWN(stat) ABDSTAT_INCR(stat, -1) - -/* - * It is possible to make all future ABDs be linear by setting this to B_FALSE. - * Otherwise, ABDs are allocated scattered by default unless the caller uses - * abd_alloc_linear(). - */ -boolean_t zfs_abd_scatter_enabled = B_TRUE; - -/* - * The size of the chunks ABD allocates. Because the sizes allocated from the - * kmem_cache can't change, this tunable can only be modified at boot. Changing - * it at runtime would cause ABD iteration to work incorrectly for ABDs which - * were allocated with the old size, so a safeguard has been put in place which - * will cause the machine to panic if you change it and try to access the data - * within a scattered ABD. - */ -size_t zfs_abd_chunk_size = 4096; - -#ifdef _KERNEL -extern vmem_t *zio_alloc_arena; -#endif - -kmem_cache_t *abd_chunk_cache; -static kstat_t *abd_ksp; - -static void * -abd_alloc_chunk() -{ - void *c = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE); - ASSERT3P(c, !=, NULL); - return (c); -} - -static void -abd_free_chunk(void *c) -{ - kmem_cache_free(abd_chunk_cache, c); -} - -void -abd_init(void) -{ - vmem_t *data_alloc_arena = NULL; - -#ifdef _KERNEL - data_alloc_arena = zio_alloc_arena; -#endif - - /* - * Since ABD chunks do not appear in crash dumps, we pass KMC_NOTOUCH - * so that no allocator metadata is stored with the buffers. - */ - abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0, - NULL, NULL, NULL, NULL, data_alloc_arena, KMC_NOTOUCH); - - abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED, - sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); - if (abd_ksp != NULL) { - abd_ksp->ks_data = &abd_stats; - kstat_install(abd_ksp); - } -} - -void -abd_fini(void) -{ - if (abd_ksp != NULL) { - kstat_delete(abd_ksp); - abd_ksp = NULL; - } - - kmem_cache_destroy(abd_chunk_cache); - abd_chunk_cache = NULL; -} - -static inline size_t -abd_chunkcnt_for_bytes(size_t size) -{ - return (P2ROUNDUP(size, zfs_abd_chunk_size) / zfs_abd_chunk_size); -} - -static inline size_t -abd_scatter_chunkcnt(abd_t *abd) -{ - ASSERT(!abd_is_linear(abd)); - return (abd_chunkcnt_for_bytes( - abd->abd_u.abd_scatter.abd_offset + abd->abd_size)); -} - -static inline void -abd_verify(abd_t *abd) -{ - ASSERT3U(abd->abd_size, >, 0); - ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE); - ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR | - ABD_FLAG_OWNER | ABD_FLAG_META)); - IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER)); - IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER); - if (abd_is_linear(abd)) { - ASSERT3P(abd->abd_u.abd_linear.abd_buf, !=, NULL); - } else { - ASSERT3U(abd->abd_u.abd_scatter.abd_offset, <, - zfs_abd_chunk_size); - size_t n = abd_scatter_chunkcnt(abd); - for (int i = 0; i < n; i++) { - ASSERT3P( - abd->abd_u.abd_scatter.abd_chunks[i], !=, NULL); - } - } -} - -static inline abd_t * -abd_alloc_struct(size_t chunkcnt) -{ - size_t size = offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]); - abd_t *abd = kmem_alloc(size, KM_PUSHPAGE); - ASSERT3P(abd, !=, NULL); - ABDSTAT_INCR(abdstat_struct_size, size); - - return (abd); -} - -static inline void -abd_free_struct(abd_t *abd) -{ - size_t chunkcnt = abd_is_linear(abd) ? 0 : abd_scatter_chunkcnt(abd); - int size = offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]); - kmem_free(abd, size); - ABDSTAT_INCR(abdstat_struct_size, -size); -} - -/* - * Allocate an ABD, along with its own underlying data buffers. Use this if you - * don't care whether the ABD is linear or not. - */ -abd_t * -abd_alloc(size_t size, boolean_t is_metadata) -{ - if (!zfs_abd_scatter_enabled) - return (abd_alloc_linear(size, is_metadata)); - - VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); - - size_t n = abd_chunkcnt_for_bytes(size); - abd_t *abd = abd_alloc_struct(n); - - abd->abd_flags = ABD_FLAG_OWNER; - if (is_metadata) { - abd->abd_flags |= ABD_FLAG_META; - } - abd->abd_size = size; - abd->abd_parent = NULL; - refcount_create(&abd->abd_children); - - abd->abd_u.abd_scatter.abd_offset = 0; - abd->abd_u.abd_scatter.abd_chunk_size = zfs_abd_chunk_size; - - for (int i = 0; i < n; i++) { - void *c = abd_alloc_chunk(); - ASSERT3P(c, !=, NULL); - abd->abd_u.abd_scatter.abd_chunks[i] = c; - } - - ABDSTAT_BUMP(abdstat_scatter_cnt); - ABDSTAT_INCR(abdstat_scatter_data_size, size); - ABDSTAT_INCR(abdstat_scatter_chunk_waste, - n * zfs_abd_chunk_size - size); - - return (abd); -} - -static void -abd_free_scatter(abd_t *abd) -{ - size_t n = abd_scatter_chunkcnt(abd); - for (int i = 0; i < n; i++) { - abd_free_chunk(abd->abd_u.abd_scatter.abd_chunks[i]); - } - - refcount_destroy(&abd->abd_children); - ABDSTAT_BUMPDOWN(abdstat_scatter_cnt); - ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size); - ABDSTAT_INCR(abdstat_scatter_chunk_waste, - abd->abd_size - n * zfs_abd_chunk_size); - - abd_free_struct(abd); -} - -/* - * Allocate an ABD that must be linear, along with its own underlying data - * buffer. Only use this when it would be very annoying to write your ABD - * consumer with a scattered ABD. - */ -abd_t * -abd_alloc_linear(size_t size, boolean_t is_metadata) -{ - abd_t *abd = abd_alloc_struct(0); - - VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); - - abd->abd_flags = ABD_FLAG_LINEAR | ABD_FLAG_OWNER; - if (is_metadata) { - abd->abd_flags |= ABD_FLAG_META; - } - abd->abd_size = size; - abd->abd_parent = NULL; - refcount_create(&abd->abd_children); - - if (is_metadata) { - abd->abd_u.abd_linear.abd_buf = zio_buf_alloc(size); - } else { - abd->abd_u.abd_linear.abd_buf = zio_data_buf_alloc(size); - } - - ABDSTAT_BUMP(abdstat_linear_cnt); - ABDSTAT_INCR(abdstat_linear_data_size, size); - - return (abd); -} - -static void -abd_free_linear(abd_t *abd) -{ - if (abd->abd_flags & ABD_FLAG_META) { - zio_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size); - } else { - zio_data_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size); - } - - refcount_destroy(&abd->abd_children); - ABDSTAT_BUMPDOWN(abdstat_linear_cnt); - ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size); - - abd_free_struct(abd); -} - -/* - * Free an ABD. Only use this on ABDs allocated with abd_alloc() or - * abd_alloc_linear(). - */ -void -abd_free(abd_t *abd) -{ - abd_verify(abd); - ASSERT3P(abd->abd_parent, ==, NULL); - ASSERT(abd->abd_flags & ABD_FLAG_OWNER); - if (abd_is_linear(abd)) - abd_free_linear(abd); - else - abd_free_scatter(abd); -} - -/* - * Allocate an ABD of the same format (same metadata flag, same scatterize - * setting) as another ABD. - */ -abd_t * -abd_alloc_sametype(abd_t *sabd, size_t size) -{ - boolean_t is_metadata = (sabd->abd_flags & ABD_FLAG_META) != 0; - if (abd_is_linear(sabd)) { - return (abd_alloc_linear(size, is_metadata)); - } else { - return (abd_alloc(size, is_metadata)); - } -} - -/* - * If we're going to use this ABD for doing I/O using the block layer, the - * consumer of the ABD data doesn't care if it's scattered or not, and we don't - * plan to store this ABD in memory for a long period of time, we should - * allocate the ABD type that requires the least data copying to do the I/O. - * - * Currently this is linear ABDs, however if ldi_strategy() can ever issue I/Os - * using a scatter/gather list we should switch to that and replace this call - * with vanilla abd_alloc(). - */ -abd_t * -abd_alloc_for_io(size_t size, boolean_t is_metadata) -{ - return (abd_alloc_linear(size, is_metadata)); -} - -/* - * Allocate a new ABD to point to offset off of sabd. It shares the underlying - * buffer data with sabd. Use abd_put() to free. sabd must not be freed while - * any derived ABDs exist. - */ -abd_t * -abd_get_offset(abd_t *sabd, size_t off) -{ - abd_t *abd; - - abd_verify(sabd); - ASSERT3U(off, <=, sabd->abd_size); - - if (abd_is_linear(sabd)) { - abd = abd_alloc_struct(0); - - /* - * Even if this buf is filesystem metadata, we only track that - * if we own the underlying data buffer, which is not true in - * this case. Therefore, we don't ever use ABD_FLAG_META here. - */ - abd->abd_flags = ABD_FLAG_LINEAR; - - abd->abd_u.abd_linear.abd_buf = - (char *)sabd->abd_u.abd_linear.abd_buf + off; - } else { - size_t new_offset = sabd->abd_u.abd_scatter.abd_offset + off; - size_t chunkcnt = abd_scatter_chunkcnt(sabd) - - (new_offset / zfs_abd_chunk_size); - - abd = abd_alloc_struct(chunkcnt); - - /* - * Even if this buf is filesystem metadata, we only track that - * if we own the underlying data buffer, which is not true in - * this case. Therefore, we don't ever use ABD_FLAG_META here. - */ - abd->abd_flags = 0; - - abd->abd_u.abd_scatter.abd_offset = - new_offset % zfs_abd_chunk_size; - abd->abd_u.abd_scatter.abd_chunk_size = zfs_abd_chunk_size; - - /* Copy the scatterlist starting at the correct offset */ - (void) memcpy(&abd->abd_u.abd_scatter.abd_chunks, - &sabd->abd_u.abd_scatter.abd_chunks[new_offset / - zfs_abd_chunk_size], - chunkcnt * sizeof (void *)); - } - - abd->abd_size = sabd->abd_size - off; - abd->abd_parent = sabd; - refcount_create(&abd->abd_children); - (void) refcount_add_many(&sabd->abd_children, abd->abd_size, abd); - - return (abd); -} - -/* - * Allocate a linear ABD structure for buf. You must free this with abd_put() - * since the resulting ABD doesn't own its own buffer. - */ -abd_t * -abd_get_from_buf(void *buf, size_t size) -{ - abd_t *abd = abd_alloc_struct(0); - - VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); - - /* - * Even if this buf is filesystem metadata, we only track that if we - * own the underlying data buffer, which is not true in this case. - * Therefore, we don't ever use ABD_FLAG_META here. - */ - abd->abd_flags = ABD_FLAG_LINEAR; - abd->abd_size = size; - abd->abd_parent = NULL; - refcount_create(&abd->abd_children); - - abd->abd_u.abd_linear.abd_buf = buf; - - return (abd); -} - -/* - * Free an ABD allocated from abd_get_offset() or abd_get_from_buf(). Will not - * free the underlying scatterlist or buffer. - */ -void -abd_put(abd_t *abd) -{ - abd_verify(abd); - ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER)); - - if (abd->abd_parent != NULL) { - (void) refcount_remove_many(&abd->abd_parent->abd_children, - abd->abd_size, abd); - } - - refcount_destroy(&abd->abd_children); - abd_free_struct(abd); -} - -/* - * Get the raw buffer associated with a linear ABD. - */ -void * -abd_to_buf(abd_t *abd) -{ - ASSERT(abd_is_linear(abd)); - abd_verify(abd); - return (abd->abd_u.abd_linear.abd_buf); -} - -/* - * Borrow a raw buffer from an ABD without copying the contents of the ABD - * into the buffer. If the ABD is scattered, this will allocate a raw buffer - * whose contents are undefined. To copy over the existing data in the ABD, use - * abd_borrow_buf_copy() instead. - */ -void * -abd_borrow_buf(abd_t *abd, size_t n) -{ - void *buf; - abd_verify(abd); - ASSERT3U(abd->abd_size, >=, n); - if (abd_is_linear(abd)) { - buf = abd_to_buf(abd); - } else { - buf = zio_buf_alloc(n); - } - (void) refcount_add_many(&abd->abd_children, n, buf); - - return (buf); -} - -void * -abd_borrow_buf_copy(abd_t *abd, size_t n) -{ - void *buf = abd_borrow_buf(abd, n); - if (!abd_is_linear(abd)) { - abd_copy_to_buf(buf, abd, n); - } - return (buf); -} - -/* - * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will - * not change the contents of the ABD and will ASSERT that you didn't modify - * the buffer since it was borrowed. If you want any changes you made to buf to - * be copied back to abd, use abd_return_buf_copy() instead. - */ -void -abd_return_buf(abd_t *abd, void *buf, size_t n) -{ - abd_verify(abd); - ASSERT3U(abd->abd_size, >=, n); - if (abd_is_linear(abd)) { - ASSERT3P(buf, ==, abd_to_buf(abd)); - } else { - ASSERT0(abd_cmp_buf(abd, buf, n)); - zio_buf_free(buf, n); - } - (void) refcount_remove_many(&abd->abd_children, n, buf); -} - -void -abd_return_buf_copy(abd_t *abd, void *buf, size_t n) -{ - if (!abd_is_linear(abd)) { - abd_copy_from_buf(abd, buf, n); - } - abd_return_buf(abd, buf, n); -} - -/* - * Give this ABD ownership of the buffer that it's storing. Can only be used on - * linear ABDs which were allocated via abd_get_from_buf(), or ones allocated - * with abd_alloc_linear() which subsequently released ownership of their buf - * with abd_release_ownership_of_buf(). - */ -void -abd_take_ownership_of_buf(abd_t *abd, boolean_t is_metadata) -{ - ASSERT(abd_is_linear(abd)); - ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER)); - abd_verify(abd); - - abd->abd_flags |= ABD_FLAG_OWNER; - if (is_metadata) { - abd->abd_flags |= ABD_FLAG_META; - } - - ABDSTAT_BUMP(abdstat_linear_cnt); - ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size); -} - -void -abd_release_ownership_of_buf(abd_t *abd) -{ - ASSERT(abd_is_linear(abd)); - ASSERT(abd->abd_flags & ABD_FLAG_OWNER); - abd_verify(abd); - - abd->abd_flags &= ~ABD_FLAG_OWNER; - /* Disable this flag since we no longer own the data buffer */ - abd->abd_flags &= ~ABD_FLAG_META; - - ABDSTAT_BUMPDOWN(abdstat_linear_cnt); - ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size); -} - -struct abd_iter { - abd_t *iter_abd; /* ABD being iterated through */ - size_t iter_pos; /* position (relative to abd_offset) */ - void *iter_mapaddr; /* addr corresponding to iter_pos */ - size_t iter_mapsize; /* length of data valid at mapaddr */ -}; - -static inline size_t -abd_iter_scatter_chunk_offset(struct abd_iter *aiter) -{ - ASSERT(!abd_is_linear(aiter->iter_abd)); - return ((aiter->iter_abd->abd_u.abd_scatter.abd_offset + - aiter->iter_pos) % zfs_abd_chunk_size); -} - -static inline size_t -abd_iter_scatter_chunk_index(struct abd_iter *aiter) -{ - ASSERT(!abd_is_linear(aiter->iter_abd)); - return ((aiter->iter_abd->abd_u.abd_scatter.abd_offset + - aiter->iter_pos) / zfs_abd_chunk_size); -} - -/* - * Initialize the abd_iter. - */ -static void -abd_iter_init(struct abd_iter *aiter, abd_t *abd) -{ - abd_verify(abd); - aiter->iter_abd = abd; - aiter->iter_pos = 0; - aiter->iter_mapaddr = NULL; - aiter->iter_mapsize = 0; -} - -/* - * Advance the iterator by a certain amount. Cannot be called when a chunk is - * in use. This can be safely called when the aiter has already exhausted, in - * which case this does nothing. - */ -static void -abd_iter_advance(struct abd_iter *aiter, size_t amount) -{ - ASSERT3P(aiter->iter_mapaddr, ==, NULL); - ASSERT0(aiter->iter_mapsize); - - /* There's nothing left to advance to, so do nothing */ - if (aiter->iter_pos == aiter->iter_abd->abd_size) - return; - - aiter->iter_pos += amount; -} - -/* - * Map the current chunk into aiter. This can be safely called when the aiter - * has already exhausted, in which case this does nothing. - */ -static void -abd_iter_map(struct abd_iter *aiter) -{ - void *paddr; - size_t offset = 0; - - ASSERT3P(aiter->iter_mapaddr, ==, NULL); - ASSERT0(aiter->iter_mapsize); - - /* Panic if someone has changed zfs_abd_chunk_size */ - IMPLY(!abd_is_linear(aiter->iter_abd), zfs_abd_chunk_size == - aiter->iter_abd->abd_u.abd_scatter.abd_chunk_size); - - /* There's nothing left to iterate over, so do nothing */ - if (aiter->iter_pos == aiter->iter_abd->abd_size) - return; - - if (abd_is_linear(aiter->iter_abd)) { - offset = aiter->iter_pos; - aiter->iter_mapsize = aiter->iter_abd->abd_size - offset; - paddr = aiter->iter_abd->abd_u.abd_linear.abd_buf; - } else { - size_t index = abd_iter_scatter_chunk_index(aiter); - offset = abd_iter_scatter_chunk_offset(aiter); - aiter->iter_mapsize = zfs_abd_chunk_size - offset; - paddr = aiter->iter_abd->abd_u.abd_scatter.abd_chunks[index]; - } - aiter->iter_mapaddr = (char *)paddr + offset; -} - -/* - * Unmap the current chunk from aiter. This can be safely called when the aiter - * has already exhausted, in which case this does nothing. - */ -static void -abd_iter_unmap(struct abd_iter *aiter) -{ - /* There's nothing left to unmap, so do nothing */ - if (aiter->iter_pos == aiter->iter_abd->abd_size) - return; - - ASSERT3P(aiter->iter_mapaddr, !=, NULL); - ASSERT3U(aiter->iter_mapsize, >, 0); - - aiter->iter_mapaddr = NULL; - aiter->iter_mapsize = 0; -} - -int -abd_iterate_func(abd_t *abd, size_t off, size_t size, - abd_iter_func_t *func, void *private) -{ - int ret = 0; - struct abd_iter aiter; - - abd_verify(abd); - ASSERT3U(off + size, <=, abd->abd_size); - - abd_iter_init(&aiter, abd); - abd_iter_advance(&aiter, off); - - while (size > 0) { - abd_iter_map(&aiter); - - size_t len = MIN(aiter.iter_mapsize, size); - ASSERT3U(len, >, 0); - - ret = func(aiter.iter_mapaddr, len, private); - - abd_iter_unmap(&aiter); - - if (ret != 0) - break; - - size -= len; - abd_iter_advance(&aiter, len); - } - - return (ret); -} - -struct buf_arg { - void *arg_buf; -}; - -static int -abd_copy_to_buf_off_cb(void *buf, size_t size, void *private) -{ - struct buf_arg *ba_ptr = private; - - (void) memcpy(ba_ptr->arg_buf, buf, size); - ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; - - return (0); -} - -/* - * Copy abd to buf. (off is the offset in abd.) - */ -void -abd_copy_to_buf_off(void *buf, abd_t *abd, size_t off, size_t size) -{ - struct buf_arg ba_ptr = { buf }; - - (void) abd_iterate_func(abd, off, size, abd_copy_to_buf_off_cb, - &ba_ptr); -} - -static int -abd_cmp_buf_off_cb(void *buf, size_t size, void *private) -{ - int ret; - struct buf_arg *ba_ptr = private; - - ret = memcmp(buf, ba_ptr->arg_buf, size); - ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; - - return (ret); -} - -/* - * Compare the contents of abd to buf. (off is the offset in abd.) - */ -int -abd_cmp_buf_off(abd_t *abd, const void *buf, size_t off, size_t size) -{ - struct buf_arg ba_ptr = { (void *) buf }; - - return (abd_iterate_func(abd, off, size, abd_cmp_buf_off_cb, &ba_ptr)); -} - -static int -abd_copy_from_buf_off_cb(void *buf, size_t size, void *private) -{ - struct buf_arg *ba_ptr = private; - - (void) memcpy(buf, ba_ptr->arg_buf, size); - ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; - - return (0); -} - -/* - * Copy from buf to abd. (off is the offset in abd.) - */ -void -abd_copy_from_buf_off(abd_t *abd, const void *buf, size_t off, size_t size) -{ - struct buf_arg ba_ptr = { (void *) buf }; - - (void) abd_iterate_func(abd, off, size, abd_copy_from_buf_off_cb, - &ba_ptr); -} - -/*ARGSUSED*/ -static int -abd_zero_off_cb(void *buf, size_t size, void *private) -{ - (void) memset(buf, 0, size); - return (0); -} - -/* - * Zero out the abd from a particular offset to the end. - */ -void -abd_zero_off(abd_t *abd, size_t off, size_t size) -{ - (void) abd_iterate_func(abd, off, size, abd_zero_off_cb, NULL); -} - -/* - * Iterate over two ABDs and call func incrementally on the two ABDs' data in - * equal-sized chunks (passed to func as raw buffers). func could be called many - * times during this iteration. - */ -int -abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, - size_t size, abd_iter_func2_t *func, void *private) -{ - int ret = 0; - struct abd_iter daiter, saiter; - - abd_verify(dabd); - abd_verify(sabd); - - ASSERT3U(doff + size, <=, dabd->abd_size); - ASSERT3U(soff + size, <=, sabd->abd_size); - - abd_iter_init(&daiter, dabd); - abd_iter_init(&saiter, sabd); - abd_iter_advance(&daiter, doff); - abd_iter_advance(&saiter, soff); - - while (size > 0) { - abd_iter_map(&daiter); - abd_iter_map(&saiter); - - size_t dlen = MIN(daiter.iter_mapsize, size); - size_t slen = MIN(saiter.iter_mapsize, size); - size_t len = MIN(dlen, slen); - ASSERT(dlen > 0 || slen > 0); - - ret = func(daiter.iter_mapaddr, saiter.iter_mapaddr, len, - private); - - abd_iter_unmap(&saiter); - abd_iter_unmap(&daiter); - - if (ret != 0) - break; - - size -= len; - abd_iter_advance(&daiter, len); - abd_iter_advance(&saiter, len); - } - - return (ret); -} - -/*ARGSUSED*/ -static int -abd_copy_off_cb(void *dbuf, void *sbuf, size_t size, void *private) -{ - (void) memcpy(dbuf, sbuf, size); - return (0); -} - -/* - * Copy from sabd to dabd starting from soff and doff. - */ -void -abd_copy_off(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, size_t size) -{ - (void) abd_iterate_func2(dabd, sabd, doff, soff, size, - abd_copy_off_cb, NULL); -} - -/*ARGSUSED*/ -static int -abd_cmp_cb(void *bufa, void *bufb, size_t size, void *private) -{ - return (memcmp(bufa, bufb, size)); -} - -/* - * Compares the first size bytes of two ABDs. - */ -int -abd_cmp(abd_t *dabd, abd_t *sabd, size_t size) -{ - return (abd_iterate_func2(dabd, sabd, 0, 0, size, abd_cmp_cb, NULL)); -} diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c index 328c332ae4..cb645dc39e 100644 --- a/usr/src/uts/common/fs/zfs/arc.c +++ b/usr/src/uts/common/fs/zfs/arc.c @@ -128,14 +128,14 @@ * the arc_buf_hdr_t that will point to the data block in memory. A block can * only be read by a consumer if it has an l1arc_buf_hdr_t. The L1ARC * caches data in two ways -- in a list of ARC buffers (arc_buf_t) and - * also in the arc_buf_hdr_t's private physical data block pointer (b_pabd). + * also in the arc_buf_hdr_t's private physical data block pointer (b_pdata). * * The L1ARC's data pointer may or may not be uncompressed. The ARC has the - * ability to store the physical data (b_pabd) associated with the DVA of the - * arc_buf_hdr_t. Since the b_pabd is a copy of the on-disk physical block, + * ability to store the physical data (b_pdata) associated with the DVA of the + * arc_buf_hdr_t. Since the b_pdata is a copy of the on-disk physical block, * it will match its on-disk compression characteristics. This behavior can be * disabled by setting 'zfs_compressed_arc_enabled' to B_FALSE. When the - * compressed ARC functionality is disabled, the b_pabd will point to an + * compressed ARC functionality is disabled, the b_pdata will point to an * uncompressed version of the on-disk data. * * Data in the L1ARC is not accessed by consumers of the ARC directly. Each @@ -174,7 +174,7 @@ * | l1arc_buf_hdr_t * | | arc_buf_t * | b_buf +------------>+-----------+ arc_buf_t - * | b_pabd +-+ |b_next +---->+-----------+ + * | b_pdata +-+ |b_next +---->+-----------+ * +-----------+ | |-----------| |b_next +-->NULL * | |b_comp = T | +-----------+ * | |b_data +-+ |b_comp = F | @@ -191,8 +191,8 @@ * When a consumer reads a block, the ARC must first look to see if the * arc_buf_hdr_t is cached. If the hdr is cached then the ARC allocates a new * arc_buf_t and either copies uncompressed data into a new data buffer from an - * existing uncompressed arc_buf_t, decompresses the hdr's b_pabd buffer into a - * new data buffer, or shares the hdr's b_pabd buffer, depending on whether the + * existing uncompressed arc_buf_t, decompresses the hdr's b_pdata buffer into a + * new data buffer, or shares the hdr's b_pdata buffer, depending on whether the * hdr is compressed and the desired compression characteristics of the * arc_buf_t consumer. If the arc_buf_t ends up sharing data with the * arc_buf_hdr_t and both of them are uncompressed then the arc_buf_t must be @@ -216,7 +216,7 @@ * | | arc_buf_t (shared) * | b_buf +------------>+---------+ arc_buf_t * | | |b_next +---->+---------+ - * | b_pabd +-+ |---------| |b_next +-->NULL + * | b_pdata +-+ |---------| |b_next +-->NULL * +-----------+ | | | +---------+ * | |b_data +-+ | | * | +---------+ | |b_data +-+ @@ -230,19 +230,19 @@ * | +------+ | * +---------------------------------+ * - * Writing to the ARC requires that the ARC first discard the hdr's b_pabd + * Writing to the ARC requires that the ARC first discard the hdr's b_pdata * since the physical block is about to be rewritten. The new data contents * will be contained in the arc_buf_t. As the I/O pipeline performs the write, * it may compress the data before writing it to disk. The ARC will be called * with the transformed data and will bcopy the transformed on-disk block into - * a newly allocated b_pabd. Writes are always done into buffers which have + * a newly allocated b_pdata. Writes are always done into buffers which have * either been loaned (and hence are new and don't have other readers) or * buffers which have been released (and hence have their own hdr, if there * were originally other readers of the buf's original hdr). This ensures that * the ARC only needs to update a single buf and its hdr after a write occurs. * - * When the L2ARC is in use, it will also take advantage of the b_pabd. The - * L2ARC will always write the contents of b_pabd to the L2ARC. This means + * When the L2ARC is in use, it will also take advantage of the b_pdata. The + * L2ARC will always write the contents of b_pdata to the L2ARC. This means * that when compressed ARC is enabled that the L2ARC blocks are identical * to the on-disk block in the main data pool. This provides a significant * advantage since the ARC can leverage the bp's checksum when reading from the @@ -263,9 +263,7 @@ #include #include #include -#include #include -#include #ifdef _KERNEL #include #include @@ -301,7 +299,7 @@ int zfs_arc_evict_batch_limit = 10; /* number of seconds before growing cache again */ static int arc_grow_retry = 60; -/* shift of arc_c for calculating overflow limit in arc_get_data_impl */ +/* shift of arc_c for calculating overflow limit in arc_get_data_buf */ int zfs_arc_overflow_shift = 8; /* shift of arc_c for calculating both min and max arc_p */ @@ -464,13 +462,13 @@ typedef struct arc_stats { kstat_named_t arcstat_c_max; kstat_named_t arcstat_size; /* - * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd. + * Number of compressed bytes stored in the arc_buf_hdr_t's b_pdata. * Note that the compressed bytes may match the uncompressed bytes * if the block is either not compressed or compressed arc is disabled. */ kstat_named_t arcstat_compressed_size; /* - * Uncompressed size of the data stored in b_pabd. If compressed + * Uncompressed size of the data stored in b_pdata. If compressed * arc is disabled then this value will be identical to the stat * above. */ @@ -884,7 +882,7 @@ typedef struct l1arc_buf_hdr { refcount_t b_refcnt; arc_callback_t *b_acb; - abd_t *b_pabd; + void *b_pdata; } l1arc_buf_hdr_t; typedef struct l2arc_dev l2arc_dev_t; @@ -1084,7 +1082,7 @@ typedef struct l2arc_write_callback { typedef struct l2arc_data_free { /* protected by l2arc_free_on_write_mtx */ - abd_t *l2df_abd; + void *l2df_data; size_t l2df_size; arc_buf_contents_t l2df_type; list_node_t l2df_list_node; @@ -1094,14 +1092,10 @@ static kmutex_t l2arc_feed_thr_lock; static kcondvar_t l2arc_feed_thr_cv; static uint8_t l2arc_thread_exit; -static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *); static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *); -static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *); -static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *); static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *); -static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag); -static void arc_hdr_free_pabd(arc_buf_hdr_t *); -static void arc_hdr_alloc_pabd(arc_buf_hdr_t *); +static void arc_hdr_free_pdata(arc_buf_hdr_t *hdr); +static void arc_hdr_alloc_pdata(arc_buf_hdr_t *); static void arc_access(arc_buf_hdr_t *, kmutex_t *); static boolean_t arc_is_overflowing(); static void arc_buf_watch(arc_buf_t *); @@ -1441,9 +1435,7 @@ static inline boolean_t arc_buf_is_shared(arc_buf_t *buf) { boolean_t shared = (buf->b_data != NULL && - buf->b_hdr->b_l1hdr.b_pabd != NULL && - abd_is_linear(buf->b_hdr->b_l1hdr.b_pabd) && - buf->b_data == abd_to_buf(buf->b_hdr->b_l1hdr.b_pabd)); + buf->b_data == buf->b_hdr->b_l1hdr.b_pdata); IMPLY(shared, HDR_SHARED_DATA(buf->b_hdr)); IMPLY(shared, ARC_BUF_SHARED(buf)); IMPLY(shared, ARC_BUF_COMPRESSED(buf) || ARC_BUF_LAST(buf)); @@ -1547,8 +1539,7 @@ arc_cksum_is_equal(arc_buf_hdr_t *hdr, zio_t *zio) uint64_t csize; void *cbuf = zio_buf_alloc(HDR_GET_PSIZE(hdr)); - csize = zio_compress_data(compress, zio->io_abd, cbuf, lsize); - + csize = zio_compress_data(compress, zio->io_data, cbuf, lsize); ASSERT3U(csize, <=, HDR_GET_PSIZE(hdr)); if (csize < HDR_GET_PSIZE(hdr)) { /* @@ -1583,7 +1574,7 @@ arc_cksum_is_equal(arc_buf_hdr_t *hdr, zio_t *zio) * logical I/O size and not just a gang fragment. */ valid_cksum = (zio_checksum_error_impl(zio->io_spa, zio->io_bp, - BP_GET_CHECKSUM(zio->io_bp), zio->io_abd, zio->io_size, + BP_GET_CHECKSUM(zio->io_bp), zio->io_data, zio->io_size, zio->io_offset, NULL) == 0); zio_pop_transforms(zio); return (valid_cksum); @@ -1881,7 +1872,7 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed) if (hdr_compressed == compressed) { if (!arc_buf_is_shared(buf)) { - abd_copy_to_buf(buf->b_data, hdr->b_l1hdr.b_pabd, + bcopy(hdr->b_l1hdr.b_pdata, buf->b_data, arc_buf_size(buf)); } } else { @@ -1933,7 +1924,7 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed) return (0); } else { int error = zio_decompress_data(HDR_GET_COMPRESS(hdr), - hdr->b_l1hdr.b_pabd, buf->b_data, + hdr->b_l1hdr.b_pdata, buf->b_data, HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr)); /* @@ -1970,7 +1961,7 @@ arc_decompress(arc_buf_t *buf) } /* - * Return the size of the block, b_pabd, that is stored in the arc_buf_hdr_t. + * Return the size of the block, b_pdata, that is stored in the arc_buf_hdr_t. */ static uint64_t arc_hdr_size(arc_buf_hdr_t *hdr) @@ -2002,14 +1993,14 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state) if (GHOST_STATE(state)) { ASSERT0(hdr->b_l1hdr.b_bufcnt); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); (void) refcount_add_many(&state->arcs_esize[type], HDR_GET_LSIZE(hdr), hdr); return; } ASSERT(!GHOST_STATE(state)); - if (hdr->b_l1hdr.b_pabd != NULL) { + if (hdr->b_l1hdr.b_pdata != NULL) { (void) refcount_add_many(&state->arcs_esize[type], arc_hdr_size(hdr), hdr); } @@ -2037,14 +2028,14 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state) if (GHOST_STATE(state)) { ASSERT0(hdr->b_l1hdr.b_bufcnt); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); (void) refcount_remove_many(&state->arcs_esize[type], HDR_GET_LSIZE(hdr), hdr); return; } ASSERT(!GHOST_STATE(state)); - if (hdr->b_l1hdr.b_pabd != NULL) { + if (hdr->b_l1hdr.b_pdata != NULL) { (void) refcount_remove_many(&state->arcs_esize[type], arc_hdr_size(hdr), hdr); } @@ -2141,7 +2132,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, old_state = hdr->b_l1hdr.b_state; refcnt = refcount_count(&hdr->b_l1hdr.b_refcnt); bufcnt = hdr->b_l1hdr.b_bufcnt; - update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL); + update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pdata != NULL); } else { old_state = arc_l2c_only; refcnt = 0; @@ -2211,7 +2202,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, */ (void) refcount_add_many(&new_state->arcs_size, HDR_GET_LSIZE(hdr), hdr); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); } else { uint32_t buffers = 0; @@ -2240,7 +2231,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, } ASSERT3U(bufcnt, ==, buffers); - if (hdr->b_l1hdr.b_pabd != NULL) { + if (hdr->b_l1hdr.b_pdata != NULL) { (void) refcount_add_many(&new_state->arcs_size, arc_hdr_size(hdr), hdr); } else { @@ -2253,7 +2244,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, ASSERT(HDR_HAS_L1HDR(hdr)); if (GHOST_STATE(old_state)) { ASSERT0(bufcnt); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); /* * When moving a header off of a ghost state, @@ -2293,7 +2284,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, buf); } ASSERT3U(bufcnt, ==, buffers); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); (void) refcount_remove_many( &old_state->arcs_size, arc_hdr_size(hdr), hdr); } @@ -2375,7 +2366,7 @@ arc_space_return(uint64_t space, arc_space_type_t type) /* * Given a hdr and a buf, returns whether that buf can share its b_data buffer - * with the hdr's b_pabd. + * with the hdr's b_pdata. */ static boolean_t arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf) @@ -2452,23 +2443,20 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, /* * If the hdr's data can be shared then we share the data buffer and * set the appropriate bit in the hdr's b_flags to indicate the hdr is - * sharing it's b_pabd with the arc_buf_t. Otherwise, we allocate a new + * sharing it's b_pdata with the arc_buf_t. Otherwise, we allocate a new * buffer to store the buf's data. * - * There are two additional restrictions here because we're sharing - * hdr -> buf instead of the usual buf -> hdr. First, the hdr can't be - * actively involved in an L2ARC write, because if this buf is used by - * an arc_write() then the hdr's data buffer will be released when the + * There is one additional restriction here because we're sharing + * hdr -> buf instead of the usual buf -> hdr: the hdr can't be actively + * involved in an L2ARC write, because if this buf is used by an + * arc_write() then the hdr's data buffer will be released when the * write completes, even though the L2ARC write might still be using it. - * Second, the hdr's ABD must be linear so that the buf's user doesn't - * need to be ABD-aware. */ - boolean_t can_share = arc_can_share(hdr, buf) && !HDR_L2_WRITING(hdr) && - abd_is_linear(hdr->b_l1hdr.b_pabd); + boolean_t can_share = arc_can_share(hdr, buf) && !HDR_L2_WRITING(hdr); /* Set up b_data and sharing */ if (can_share) { - buf->b_data = abd_to_buf(hdr->b_l1hdr.b_pabd); + buf->b_data = hdr->b_l1hdr.b_pdata; buf->b_flags |= ARC_BUF_FLAG_SHARED; arc_hdr_set_flags(hdr, ARC_FLAG_SHARED_DATA); } else { @@ -2564,11 +2552,11 @@ arc_loan_inuse_buf(arc_buf_t *buf, void *tag) } static void -l2arc_free_abd_on_write(abd_t *abd, size_t size, arc_buf_contents_t type) +l2arc_free_data_on_write(void *data, size_t size, arc_buf_contents_t type) { l2arc_data_free_t *df = kmem_alloc(sizeof (*df), KM_SLEEP); - df->l2df_abd = abd; + df->l2df_data = data; df->l2df_size = size; df->l2df_type = type; mutex_enter(&l2arc_free_on_write_mtx); @@ -2599,7 +2587,7 @@ arc_hdr_free_on_write(arc_buf_hdr_t *hdr) arc_space_return(size, ARC_SPACE_DATA); } - l2arc_free_abd_on_write(hdr->b_l1hdr.b_pabd, size, type); + l2arc_free_data_on_write(hdr->b_l1hdr.b_pdata, size, type); } /* @@ -2613,7 +2601,7 @@ arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) arc_state_t *state = hdr->b_l1hdr.b_state; ASSERT(arc_can_share(hdr, buf)); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); /* @@ -2622,9 +2610,7 @@ arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) * the refcount whenever an arc_buf_t is shared. */ refcount_transfer_ownership(&state->arcs_size, buf, hdr); - hdr->b_l1hdr.b_pabd = abd_get_from_buf(buf->b_data, arc_buf_size(buf)); - abd_take_ownership_of_buf(hdr->b_l1hdr.b_pabd, - HDR_ISTYPE_METADATA(hdr)); + hdr->b_l1hdr.b_pdata = buf->b_data; arc_hdr_set_flags(hdr, ARC_FLAG_SHARED_DATA); buf->b_flags |= ARC_BUF_FLAG_SHARED; @@ -2644,7 +2630,7 @@ arc_unshare_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) arc_state_t *state = hdr->b_l1hdr.b_state; ASSERT(arc_buf_is_shared(buf)); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); /* @@ -2653,9 +2639,7 @@ arc_unshare_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) */ refcount_transfer_ownership(&state->arcs_size, hdr, buf); arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA); - abd_release_ownership_of_buf(hdr->b_l1hdr.b_pabd); - abd_put(hdr->b_l1hdr.b_pabd); - hdr->b_l1hdr.b_pabd = NULL; + hdr->b_l1hdr.b_pdata = NULL; buf->b_flags &= ~ARC_BUF_FLAG_SHARED; /* @@ -2750,7 +2734,7 @@ arc_buf_destroy_impl(arc_buf_t *buf) if (ARC_BUF_SHARED(buf) && !ARC_BUF_COMPRESSED(buf)) { /* * If the current arc_buf_t is sharing its data buffer with the - * hdr, then reassign the hdr's b_pabd to share it with the new + * hdr, then reassign the hdr's b_pdata to share it with the new * buffer at the end of the list. The shared buffer is always * the last one on the hdr's buffer list. * @@ -2765,8 +2749,8 @@ arc_buf_destroy_impl(arc_buf_t *buf) /* hdr is uncompressed so can't have compressed buf */ VERIFY(!ARC_BUF_COMPRESSED(lastbuf)); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); - arc_hdr_free_pabd(hdr); + ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); + arc_hdr_free_pdata(hdr); /* * We must setup a new shared block between the @@ -2804,26 +2788,26 @@ arc_buf_destroy_impl(arc_buf_t *buf) } static void -arc_hdr_alloc_pabd(arc_buf_hdr_t *hdr) +arc_hdr_alloc_pdata(arc_buf_hdr_t *hdr) { ASSERT3U(HDR_GET_LSIZE(hdr), >, 0); ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(!HDR_SHARED_DATA(hdr)); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); - hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); + ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); + hdr->b_l1hdr.b_pdata = arc_get_data_buf(hdr, arc_hdr_size(hdr), hdr); hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); ARCSTAT_INCR(arcstat_compressed_size, arc_hdr_size(hdr)); ARCSTAT_INCR(arcstat_uncompressed_size, HDR_GET_LSIZE(hdr)); } static void -arc_hdr_free_pabd(arc_buf_hdr_t *hdr) +arc_hdr_free_pdata(arc_buf_hdr_t *hdr) { ASSERT(HDR_HAS_L1HDR(hdr)); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); /* * If the hdr is currently being written to the l2arc then @@ -2835,10 +2819,10 @@ arc_hdr_free_pabd(arc_buf_hdr_t *hdr) arc_hdr_free_on_write(hdr); ARCSTAT_BUMP(arcstat_l2_free_on_write); } else { - arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd, + arc_free_data_buf(hdr, hdr->b_l1hdr.b_pdata, arc_hdr_size(hdr), hdr); } - hdr->b_l1hdr.b_pabd = NULL; + hdr->b_l1hdr.b_pdata = NULL; hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; ARCSTAT_INCR(arcstat_compressed_size, -arc_hdr_size(hdr)); @@ -2875,7 +2859,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, * the compressed or uncompressed data depending on the block * it references and compressed arc enablement. */ - arc_hdr_alloc_pabd(hdr); + arc_hdr_alloc_pdata(hdr); ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); return (hdr); @@ -2916,7 +2900,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) nhdr->b_l1hdr.b_state = arc_l2c_only; /* Verify previous threads set to NULL before freeing */ - ASSERT3P(nhdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT3P(nhdr->b_l1hdr.b_pdata, ==, NULL); } else { ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); ASSERT0(hdr->b_l1hdr.b_bufcnt); @@ -2934,11 +2918,11 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) /* * A buffer must not be moved into the arc_l2c_only * state if it's not finished being written out to the - * l2arc device. Otherwise, the b_l1hdr.b_pabd field + * l2arc device. Otherwise, the b_l1hdr.b_pdata field * might try to be accessed, even though it was removed. */ VERIFY(!HDR_L2_WRITING(hdr)); - VERIFY3P(hdr->b_l1hdr.b_pabd, ==, NULL); + VERIFY3P(hdr->b_l1hdr.b_pdata, ==, NULL); #ifdef ZFS_DEBUG if (hdr->b_l1hdr.b_thawed != NULL) { @@ -3027,18 +3011,6 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize, arc_buf_thaw(buf); ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); - if (!arc_buf_is_shared(buf)) { - /* - * To ensure that the hdr has the correct data in it if we call - * arc_decompress() on this buf before it's been written to - * disk, it's easiest if we just set up sharing between the - * buf and the hdr. - */ - ASSERT(!abd_is_linear(hdr->b_l1hdr.b_pabd)); - arc_hdr_free_pabd(hdr); - arc_share_buf(hdr, buf); - } - return (buf); } @@ -3114,8 +3086,8 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr) } #endif - if (hdr->b_l1hdr.b_pabd != NULL) { - arc_hdr_free_pabd(hdr); + if (hdr->b_l1hdr.b_pdata != NULL) { + arc_hdr_free_pdata(hdr); } } @@ -3183,7 +3155,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) /* * l2arc_write_buffers() relies on a header's L1 portion - * (i.e. its b_pabd field) during it's write phase. + * (i.e. its b_pdata field) during its write phase. * Thus, we cannot push a header onto the arc_l2c_only * state (removing it's L1 piece) until the header is * done being written to the l2arc. @@ -3198,7 +3170,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, hdr); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); if (HDR_HAS_L2HDR(hdr)) { /* * This buffer is cached on the 2nd Level ARC; @@ -3264,9 +3236,9 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) * If this hdr is being evicted and has a compressed * buffer then we discard it here before we change states. * This ensures that the accounting is updated correctly - * in arc_free_data_impl(). + * in arc_free_data_buf(). */ - arc_hdr_free_pabd(hdr); + arc_hdr_free_pdata(hdr); arc_change_state(evicted_state, hdr, hash_lock); ASSERT(HDR_IN_HASH_TABLE(hdr)); @@ -3364,7 +3336,7 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker, * thread. If we used cv_broadcast, we could * wake up "too many" threads causing arc_size * to significantly overflow arc_c; since - * arc_get_data_impl() doesn't check for overflow + * arc_get_data_buf() doesn't check for overflow * when it's woken up (it doesn't because it's * possible for the ARC to be overflowing while * full of un-evictable buffers, and the @@ -4027,7 +3999,6 @@ arc_kmem_reap_now(void) extern kmem_cache_t *zio_buf_cache[]; extern kmem_cache_t *zio_data_buf_cache[]; extern kmem_cache_t *range_seg_cache; - extern kmem_cache_t *abd_chunk_cache; #ifdef _KERNEL if (arc_meta_used >= arc_meta_limit) { @@ -4055,7 +4026,6 @@ arc_kmem_reap_now(void) kmem_cache_reap_now(zio_data_buf_cache[i]); } } - kmem_cache_reap_now(abd_chunk_cache); kmem_cache_reap_now(buf_cache); kmem_cache_reap_now(hdr_full_cache); kmem_cache_reap_now(hdr_l2only_cache); @@ -4071,13 +4041,13 @@ arc_kmem_reap_now(void) } /* - * Threads can block in arc_get_data_impl() waiting for this thread to evict + * Threads can block in arc_get_data_buf() waiting for this thread to evict * enough data and signal them to proceed. When this happens, the threads in - * arc_get_data_impl() are sleeping while holding the hash lock for their + * arc_get_data_buf() are sleeping while holding the hash lock for their * particular arc header. Thus, we must be careful to never sleep on a * hash lock in this thread. This is to prevent the following deadlock: * - * - Thread A sleeps on CV in arc_get_data_impl() holding hash lock "L", + * - Thread A sleeps on CV in arc_get_data_buf() holding hash lock "L", * waiting for the reclaim thread to signal it. * * - arc_reclaim_thread() tries to acquire hash lock "L" using mutex_enter, @@ -4117,7 +4087,7 @@ arc_reclaim_thread(void) /* * We call arc_adjust() before (possibly) calling * arc_kmem_reap_now(), so that we can wake up - * arc_get_data_impl() sooner. + * arc_get_data_buf() sooner. */ evicted = arc_adjust(); @@ -4274,45 +4244,18 @@ arc_is_overflowing(void) return (arc_size >= arc_c + overflow); } -static abd_t * -arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag) -{ - arc_buf_contents_t type = arc_buf_type(hdr); - - arc_get_data_impl(hdr, size, tag); - if (type == ARC_BUFC_METADATA) { - return (abd_alloc(size, B_TRUE)); - } else { - ASSERT(type == ARC_BUFC_DATA); - return (abd_alloc(size, B_FALSE)); - } -} - -static void * -arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag) -{ - arc_buf_contents_t type = arc_buf_type(hdr); - - arc_get_data_impl(hdr, size, tag); - if (type == ARC_BUFC_METADATA) { - return (zio_buf_alloc(size)); - } else { - ASSERT(type == ARC_BUFC_DATA); - return (zio_data_buf_alloc(size)); - } -} - /* * Allocate a block and return it to the caller. If we are hitting the * hard limit for the cache size, we must sleep, waiting for the eviction * thread to catch up. If we're past the target size but below the hard * limit, we'll only signal the reclaim thread and continue on. */ -static void -arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag) +static void * +arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag) { - arc_state_t *state = hdr->b_l1hdr.b_state; - arc_buf_contents_t type = arc_buf_type(hdr); + void *datap = NULL; + arc_state_t *state = hdr->b_l1hdr.b_state; + arc_buf_contents_t type = arc_buf_type(hdr); arc_adapt(size, state); @@ -4354,8 +4297,11 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag) VERIFY3U(hdr->b_type, ==, type); if (type == ARC_BUFC_METADATA) { + datap = zio_buf_alloc(size); arc_space_consume(size, ARC_SPACE_META); } else { + ASSERT(type == ARC_BUFC_DATA); + datap = zio_data_buf_alloc(size); arc_space_consume(size, ARC_SPACE_DATA); } @@ -4391,34 +4337,14 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag) refcount_count(&arc_mru->arcs_size) > arc_p)) arc_p = MIN(arc_c, arc_p + size); } -} - -static void -arc_free_data_abd(arc_buf_hdr_t *hdr, abd_t *abd, uint64_t size, void *tag) -{ - arc_free_data_impl(hdr, size, tag); - abd_free(abd); -} - -static void -arc_free_data_buf(arc_buf_hdr_t *hdr, void *buf, uint64_t size, void *tag) -{ - arc_buf_contents_t type = arc_buf_type(hdr); - - arc_free_data_impl(hdr, size, tag); - if (type == ARC_BUFC_METADATA) { - zio_buf_free(buf, size); - } else { - ASSERT(type == ARC_BUFC_DATA); - zio_data_buf_free(buf, size); - } + return (datap); } /* * Free the arc data buffer. */ static void -arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag) +arc_free_data_buf(arc_buf_hdr_t *hdr, void *data, uint64_t size, void *tag) { arc_state_t *state = hdr->b_l1hdr.b_state; arc_buf_contents_t type = arc_buf_type(hdr); @@ -4435,9 +4361,11 @@ arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag) VERIFY3U(hdr->b_type, ==, type); if (type == ARC_BUFC_METADATA) { + zio_buf_free(data, size); arc_space_return(size, ARC_SPACE_META); } else { ASSERT(type == ARC_BUFC_DATA); + zio_data_buf_free(data, size); arc_space_return(size, ARC_SPACE_DATA); } } @@ -4710,7 +4638,7 @@ arc_read_done(zio_t *zio) if (callback_cnt == 0) { ASSERT(HDR_PREFETCH(hdr)); ASSERT0(hdr->b_l1hdr.b_bufcnt); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); } ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) || @@ -4806,7 +4734,7 @@ top: hdr = buf_hash_find(guid, bp, &hash_lock); } - if (hdr != NULL && HDR_HAS_L1HDR(hdr) && hdr->b_l1hdr.b_pabd != NULL) { + if (hdr != NULL && HDR_HAS_L1HDR(hdr) && hdr->b_l1hdr.b_pdata != NULL) { arc_buf_t *buf = NULL; *arc_flags |= ARC_FLAG_CACHED; @@ -4949,7 +4877,7 @@ top: hdr = arc_hdr_realloc(hdr, hdr_l2only_cache, hdr_full_cache); } - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); ASSERT(GHOST_STATE(hdr->b_l1hdr.b_state)); ASSERT(!HDR_IO_IN_PROGRESS(hdr)); ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); @@ -4967,9 +4895,9 @@ top: * avoid hitting an assert in remove_reference(). */ arc_access(hdr, hash_lock); - arc_hdr_alloc_pabd(hdr); + arc_hdr_alloc_pdata(hdr); } - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); size = arc_hdr_size(hdr); /* @@ -5072,7 +5000,7 @@ top: ASSERT3U(HDR_GET_COMPRESS(hdr), !=, ZIO_COMPRESS_EMPTY); rzio = zio_read_phys(pio, vd, addr, - size, hdr->b_l1hdr.b_pabd, + size, hdr->b_l1hdr.b_pdata, ZIO_CHECKSUM_OFF, l2arc_read_done, cb, priority, zio_flags | ZIO_FLAG_DONT_CACHE | @@ -5111,7 +5039,7 @@ top: } } - rzio = zio_read(pio, spa, bp, hdr->b_l1hdr.b_pabd, size, + rzio = zio_read(pio, spa, bp, hdr->b_l1hdr.b_pdata, size, arc_read_done, hdr, priority, zio_flags, zb); if (*arc_flags & ARC_FLAG_WAIT) @@ -5295,17 +5223,16 @@ arc_release(arc_buf_t *buf, void *tag) arc_unshare_buf(hdr, buf); /* - * Now we need to recreate the hdr's b_pabd. Since we + * Now we need to recreate the hdr's b_pdata. Since we * have lastbuf handy, we try to share with it, but if - * we can't then we allocate a new b_pabd and copy the + * we can't then we allocate a new b_pdata and copy the * data from buf into it. */ if (arc_can_share(hdr, lastbuf)) { arc_share_buf(hdr, lastbuf); } else { - arc_hdr_alloc_pabd(hdr); - abd_copy_from_buf(hdr->b_l1hdr.b_pabd, - buf->b_data, psize); + arc_hdr_alloc_pdata(hdr); + bcopy(buf->b_data, hdr->b_l1hdr.b_pdata, psize); } VERIFY3P(lastbuf->b_data, !=, NULL); } else if (HDR_SHARED_DATA(hdr)) { @@ -5321,7 +5248,7 @@ arc_release(arc_buf_t *buf, void *tag) HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF); ASSERT(!ARC_BUF_SHARED(buf)); } - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); ASSERT3P(state, !=, arc_l2c_only); (void) refcount_remove_many(&state->arcs_size, @@ -5340,7 +5267,7 @@ arc_release(arc_buf_t *buf, void *tag) mutex_exit(hash_lock); /* - * Allocate a new hdr. The new hdr will contain a b_pabd + * Allocate a new hdr. The new hdr will contain a b_pdata * buffer which will be freed in arc_write(). */ nhdr = arc_hdr_alloc(spa, psize, lsize, compress, type); @@ -5418,15 +5345,15 @@ arc_write_ready(zio_t *zio) if (zio->io_flags & ZIO_FLAG_REEXECUTED) { arc_cksum_free(hdr); arc_buf_unwatch(buf); - if (hdr->b_l1hdr.b_pabd != NULL) { + if (hdr->b_l1hdr.b_pdata != NULL) { if (arc_buf_is_shared(buf)) { arc_unshare_buf(hdr, buf); } else { - arc_hdr_free_pabd(hdr); + arc_hdr_free_pdata(hdr); } } } - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); ASSERT(!HDR_SHARED_DATA(hdr)); ASSERT(!arc_buf_is_shared(buf)); @@ -5448,47 +5375,33 @@ arc_write_ready(zio_t *zio) HDR_SET_PSIZE(hdr, psize); arc_hdr_set_compress(hdr, compress); - /* - * Fill the hdr with data. If the hdr is compressed, the data we want - * is available from the zio, otherwise we can take it from the buf. - * - * We might be able to share the buf's data with the hdr here. However, - * doing so would cause the ARC to be full of linear ABDs if we write a - * lot of shareable data. As a compromise, we check whether scattered - * ABDs are allowed, and assume that if they are then the user wants - * the ARC to be primarily filled with them regardless of the data being - * written. Therefore, if they're allowed then we allocate one and copy - * the data into it; otherwise, we share the data directly if we can. + * If the hdr is compressed, then copy the compressed + * zio contents into arc_buf_hdr_t. Otherwise, copy the original + * data buf into the hdr. Ideally, we would like to always copy the + * io_data into b_pdata but the user may have disabled compressed + * arc thus the on-disk block may or may not match what we maintain + * in the hdr's b_pdata field. */ - if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) { - arc_hdr_alloc_pabd(hdr); - - /* - * Ideally, we would always copy the io_abd into b_pabd, but the - * user may have disabled compressed ARC, thus we must check the - * hdr's compression setting rather than the io_bp's. - */ - if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) { - ASSERT3U(BP_GET_COMPRESS(zio->io_bp), !=, - ZIO_COMPRESS_OFF); - ASSERT3U(psize, >, 0); - - abd_copy(hdr->b_l1hdr.b_pabd, zio->io_abd, psize); - } else { - ASSERT3U(zio->io_orig_size, ==, arc_hdr_size(hdr)); - - abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data, - arc_buf_size(buf)); - } + if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && + !ARC_BUF_COMPRESSED(buf)) { + ASSERT3U(BP_GET_COMPRESS(zio->io_bp), !=, ZIO_COMPRESS_OFF); + ASSERT3U(psize, >, 0); + arc_hdr_alloc_pdata(hdr); + bcopy(zio->io_data, hdr->b_l1hdr.b_pdata, psize); } else { - ASSERT3P(buf->b_data, ==, abd_to_buf(zio->io_orig_abd)); + ASSERT3P(buf->b_data, ==, zio->io_orig_data); ASSERT3U(zio->io_orig_size, ==, arc_buf_size(buf)); ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1); + /* + * This hdr is not compressed so we're able to share + * the arc_buf_t data buffer with the hdr. + */ arc_share_buf(hdr, buf); + ASSERT0(bcmp(zio->io_orig_data, hdr->b_l1hdr.b_pdata, + HDR_GET_LSIZE(hdr))); } - arc_hdr_verify(hdr, zio->io_bp); } @@ -5593,7 +5506,6 @@ arc_write_done(zio_t *zio) ASSERT(!refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); callback->awcb_done(zio, buf, callback->awcb_private); - abd_put(zio->io_abd); kmem_free(callback, sizeof (arc_write_callback_t)); } @@ -5630,10 +5542,10 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, callback->awcb_buf = buf; /* - * The hdr's b_pabd is now stale, free it now. A new data block + * The hdr's b_pdata is now stale, free it now. A new data block * will be allocated when the zio pipeline calls arc_write_ready(). */ - if (hdr->b_l1hdr.b_pabd != NULL) { + if (hdr->b_l1hdr.b_pdata != NULL) { /* * If the buf is currently sharing the data block with * the hdr then we need to break that relationship here. @@ -5643,16 +5555,15 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, if (arc_buf_is_shared(buf)) { arc_unshare_buf(hdr, buf); } else { - arc_hdr_free_pabd(hdr); + arc_hdr_free_pdata(hdr); } VERIFY3P(buf->b_data, !=, NULL); arc_hdr_set_compress(hdr, ZIO_COMPRESS_OFF); } ASSERT(!arc_buf_is_shared(buf)); - ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); - zio = zio_write(pio, spa, txg, bp, - abd_get_from_buf(buf->b_data, HDR_GET_LSIZE(hdr)), + zio = zio_write(pio, spa, txg, bp, buf->b_data, HDR_GET_LSIZE(hdr), arc_buf_size(buf), zp, arc_write_ready, (children_ready != NULL) ? arc_write_children_ready : NULL, arc_write_physdone, arc_write_done, callback, @@ -6408,8 +6319,13 @@ l2arc_do_free_on_write() for (df = list_tail(buflist); df; df = df_prev) { df_prev = list_prev(buflist, df); - ASSERT3P(df->l2df_abd, !=, NULL); - abd_free(df->l2df_abd); + ASSERT3P(df->l2df_data, !=, NULL); + if (df->l2df_type == ARC_BUFC_METADATA) { + zio_buf_free(df->l2df_data, df->l2df_size); + } else { + ASSERT(df->l2df_type == ARC_BUFC_DATA); + zio_data_buf_free(df->l2df_data, df->l2df_size); + } list_remove(buflist, df); kmem_free(df, sizeof (l2arc_data_free_t)); } @@ -6559,12 +6475,12 @@ l2arc_read_done(zio_t *zio) mutex_enter(hash_lock); ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); - ASSERT3P(zio->io_abd, !=, NULL); + ASSERT3P(zio->io_data, !=, NULL); /* * Check this survived the L2ARC journey. */ - ASSERT3P(zio->io_abd, ==, hdr->b_l1hdr.b_pabd); + ASSERT3P(zio->io_data, ==, hdr->b_l1hdr.b_pdata); zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */ zio->io_bp = &zio->io_bp_copy; /* XXX fix in L2ARC 2.0 */ @@ -6598,7 +6514,7 @@ l2arc_read_done(zio_t *zio) ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL); zio_nowait(zio_read(pio, zio->io_spa, zio->io_bp, - hdr->b_l1hdr.b_pabd, zio->io_size, arc_read_done, + hdr->b_l1hdr.b_pdata, zio->io_size, arc_read_done, hdr, zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb)); } @@ -6886,7 +6802,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT3U(HDR_GET_PSIZE(hdr), >, 0); - ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); + ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); ASSERT3U(arc_hdr_size(hdr), >, 0); uint64_t size = arc_hdr_size(hdr); @@ -6901,15 +6817,20 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) * lifetime of the ZIO and be cleaned up afterwards, we * add it to the l2arc_free_on_write queue. */ - abd_t *to_write; + void *to_write; if (!HDR_SHARED_DATA(hdr)) { - to_write = hdr->b_l1hdr.b_pabd; + to_write = hdr->b_l1hdr.b_pdata; } else { - to_write = abd_alloc_for_io(size, - HDR_ISTYPE_METADATA(hdr)); - abd_copy(to_write, hdr->b_l1hdr.b_pabd, size); - l2arc_free_abd_on_write(to_write, size, - arc_buf_type(hdr)); + arc_buf_contents_t type = arc_buf_type(hdr); + if (type == ARC_BUFC_METADATA) { + to_write = zio_buf_alloc(size); + } else { + ASSERT3U(type, ==, ARC_BUFC_DATA); + to_write = zio_data_buf_alloc(size); + } + + bcopy(hdr->b_l1hdr.b_pdata, to_write, size); + l2arc_free_data_on_write(to_write, size, type); } wzio = zio_write_phys(pio, dev->l2ad_vdev, hdr->b_l2hdr.b_daddr, size, to_write, diff --git a/usr/src/uts/common/fs/zfs/blkptr.c b/usr/src/uts/common/fs/zfs/blkptr.c index ff93ff4456..7e61dc96ff 100644 --- a/usr/src/uts/common/fs/zfs/blkptr.c +++ b/usr/src/uts/common/fs/zfs/blkptr.c @@ -14,7 +14,7 @@ */ /* - * Copyright (c) 2013, 2016 by Delphix. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c index 843bc4cc47..bd01d57c0a 100644 --- a/usr/src/uts/common/fs/zfs/dbuf.c +++ b/usr/src/uts/common/fs/zfs/dbuf.c @@ -46,7 +46,6 @@ #include #include #include -#include uint_t zfs_dbuf_evict_key; @@ -3453,10 +3452,8 @@ dbuf_write_override_done(zio_t *zio) arc_release(dr->dt.dl.dr_data, db); } mutex_exit(&db->db_mtx); - dbuf_write_done(zio, NULL, db); - if (zio->io_abd != NULL) - abd_put(zio->io_abd); + dbuf_write_done(zio, NULL, db); } /* Issue I/O to commit a dirty buffer to disk. */ @@ -3549,8 +3546,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) * The BP for this block has been provided by open context * (by dmu_sync() or dmu_buf_write_embedded()). */ - abd_t *contents = (data != NULL) ? - abd_get_from_buf(data->b_data, arc_buf_size(data)) : NULL; + void *contents = (data != NULL) ? data->b_data : NULL; dr->dr_zio = zio_write(zio, os->os_spa, txg, &dr->dr_bp_copy, contents, db->db.db_size, db->db.db_size, &zp, diff --git a/usr/src/uts/common/fs/zfs/ddt.c b/usr/src/uts/common/fs/zfs/ddt.c index ba3e02cfb5..9955f89e77 100644 --- a/usr/src/uts/common/fs/zfs/ddt.c +++ b/usr/src/uts/common/fs/zfs/ddt.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2016 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. */ #include @@ -36,7 +36,6 @@ #include #include #include -#include /* * Enable/disable prefetching of dedup-ed blocks which are going to be freed. @@ -652,8 +651,9 @@ ddt_free(ddt_entry_t *dde) for (int p = 0; p < DDT_PHYS_TYPES; p++) ASSERT(dde->dde_lead_zio[p] == NULL); - if (dde->dde_repair_abd != NULL) - abd_free(dde->dde_repair_abd); + if (dde->dde_repair_data != NULL) + zio_buf_free(dde->dde_repair_data, + DDK_GET_PSIZE(&dde->dde_key)); cv_destroy(&dde->dde_cv); kmem_free(dde, sizeof (*dde)); @@ -917,7 +917,7 @@ ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde) ddt_enter(ddt); - if (dde->dde_repair_abd != NULL && spa_writeable(ddt->ddt_spa) && + if (dde->dde_repair_data != NULL && spa_writeable(ddt->ddt_spa) && avl_find(&ddt->ddt_repair_tree, dde, &where) == NULL) avl_insert(&ddt->ddt_repair_tree, dde, where); else @@ -954,7 +954,7 @@ ddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio) continue; ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); zio_nowait(zio_rewrite(zio, zio->io_spa, 0, &blk, - rdde->dde_repair_abd, DDK_GET_PSIZE(rddk), NULL, NULL, + rdde->dde_repair_data, DDK_GET_PSIZE(rddk), NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, ZIO_DDT_CHILD_FLAGS(zio), NULL)); } diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c index 20a41cc98e..9d41832062 100644 --- a/usr/src/uts/common/fs/zfs/dmu.c +++ b/usr/src/uts/common/fs/zfs/dmu.c @@ -46,7 +46,6 @@ #include #include #include -#include #ifdef _KERNEL #include #include @@ -1633,7 +1632,6 @@ dmu_sync_late_arrival_done(zio_t *zio) dsa->dsa_done(dsa->dsa_zgd, zio->io_error); - abd_put(zio->io_abd); kmem_free(dsa, sizeof (*dsa)); } @@ -1659,10 +1657,10 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd, dsa->dsa_tx = tx; zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp, - abd_get_from_buf(zgd->zgd_db->db_data, zgd->zgd_db->db_size), - zgd->zgd_db->db_size, zgd->zgd_db->db_size, zp, - dmu_sync_late_arrival_ready, NULL, NULL, dmu_sync_late_arrival_done, - dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb)); + zgd->zgd_db->db_data, zgd->zgd_db->db_size, zgd->zgd_db->db_size, + zp, dmu_sync_late_arrival_ready, NULL, + NULL, dmu_sync_late_arrival_done, dsa, ZIO_PRIORITY_SYNC_WRITE, + ZIO_FLAG_CANFAIL, zb)); return (0); } @@ -2195,7 +2193,6 @@ byteswap_uint8_array(void *vbuf, size_t size) void dmu_init(void) { - abd_init(); zfs_dbgmsg_init(); sa_cache_init(); xuio_stat_init(); @@ -2219,5 +2216,4 @@ dmu_fini(void) xuio_stat_fini(); sa_cache_fini(); zfs_dbgmsg_fini(); - abd_fini(); } diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c index ac71c5a11a..e0abf7dbac 100644 --- a/usr/src/uts/common/fs/zfs/dmu_send.c +++ b/usr/src/uts/common/fs/zfs/dmu_send.c @@ -132,7 +132,7 @@ dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len) { ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); - (void) fletcher_4_incremental_native(dsp->dsa_drr, + fletcher_4_incremental_native(dsp->dsa_drr, offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), &dsp->dsa_zc); if (dsp->dsa_drr->drr_type == DRR_BEGIN) { @@ -145,13 +145,13 @@ dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len) if (dsp->dsa_drr->drr_type == DRR_END) { dsp->dsa_sent_end = B_TRUE; } - (void) fletcher_4_incremental_native(&dsp->dsa_drr-> + fletcher_4_incremental_native(&dsp->dsa_drr-> drr_u.drr_checksum.drr_checksum, sizeof (zio_cksum_t), &dsp->dsa_zc); if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) return (SET_ERROR(EINTR)); if (payload_len != 0) { - (void) fletcher_4_incremental_native(payload, payload_len, + fletcher_4_incremental_native(payload, payload_len, &dsp->dsa_zc); if (dump_bytes(dsp, payload, payload_len) != 0) return (SET_ERROR(EINTR)); @@ -1742,11 +1742,11 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { drc->drc_byteswap = B_TRUE; - (void) fletcher_4_incremental_byteswap(drr_begin, + fletcher_4_incremental_byteswap(drr_begin, sizeof (dmu_replay_record_t), &drc->drc_cksum); byteswap_record(drr_begin); } else if (drc->drc_drrb->drr_magic == DMU_BACKUP_MAGIC) { - (void) fletcher_4_incremental_native(drr_begin, + fletcher_4_incremental_native(drr_begin, sizeof (dmu_replay_record_t), &drc->drc_cksum); } else { return (SET_ERROR(EINVAL)); @@ -2419,9 +2419,9 @@ static void receive_cksum(struct receive_arg *ra, int len, void *buf) { if (ra->byteswap) { - (void) fletcher_4_incremental_byteswap(buf, len, &ra->cksum); + fletcher_4_incremental_byteswap(buf, len, &ra->cksum); } else { - (void) fletcher_4_incremental_native(buf, len, &ra->cksum); + fletcher_4_incremental_native(buf, len, &ra->cksum); } } diff --git a/usr/src/uts/common/fs/zfs/dsl_scan.c b/usr/src/uts/common/fs/zfs/dsl_scan.c index 1963f15385..c672128744 100644 --- a/usr/src/uts/common/fs/zfs/dsl_scan.c +++ b/usr/src/uts/common/fs/zfs/dsl_scan.c @@ -20,8 +20,8 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2015 by Delphix. All rights reserved. * Copyright 2016 Gary Mills - * Copyright (c) 2011, 2016 by Delphix. All rights reserved. */ #include @@ -47,7 +47,6 @@ #include #include #include -#include #ifdef _KERNEL #include #endif @@ -1757,7 +1756,7 @@ dsl_scan_scrub_done(zio_t *zio) { spa_t *spa = zio->io_spa; - abd_free(zio->io_abd); + zio_data_buf_free(zio->io_data, zio->io_size); mutex_enter(&spa->spa_scrub_lock); spa->spa_scrub_inflight--; @@ -1840,6 +1839,7 @@ dsl_scan_scrub_cb(dsl_pool_t *dp, if (needs_io && !zfs_no_scrub_io) { vdev_t *rvd = spa->spa_root_vdev; uint64_t maxinflight = rvd->vdev_children * zfs_top_maxinflight; + void *data = zio_data_buf_alloc(size); mutex_enter(&spa->spa_scrub_lock); while (spa->spa_scrub_inflight >= maxinflight) @@ -1854,9 +1854,9 @@ dsl_scan_scrub_cb(dsl_pool_t *dp, if (ddi_get_lbolt64() - spa->spa_last_io <= zfs_scan_idle) delay(scan_delay); - zio_nowait(zio_read(NULL, spa, bp, - abd_alloc_for_io(size, B_FALSE), size, dsl_scan_scrub_done, - NULL, ZIO_PRIORITY_SCRUB, zio_flags, zb)); + zio_nowait(zio_read(NULL, spa, bp, data, size, + dsl_scan_scrub_done, NULL, ZIO_PRIORITY_SCRUB, + zio_flags, zb)); } /* do not relocate this block */ diff --git a/usr/src/uts/common/fs/zfs/edonr_zfs.c b/usr/src/uts/common/fs/zfs/edonr_zfs.c index 9a3430d946..93f1221fd5 100644 --- a/usr/src/uts/common/fs/zfs/edonr_zfs.c +++ b/usr/src/uts/common/fs/zfs/edonr_zfs.c @@ -22,31 +22,19 @@ * Copyright 2013 Saso Kiselkov. All rights reserved. * Use is subject to license terms. */ -/* - * Copyright (c) 2016 by Delphix. All rights reserved. - */ #include #include #include -#include #define EDONR_MODE 512 #define EDONR_BLOCK_SIZE EdonR512_BLOCK_SIZE -static int -edonr_incremental(void *buf, size_t size, void *arg) -{ - EdonRState *ctx = arg; - EdonRUpdate(ctx, buf, size * 8); - return (0); -} - /* * Native zio_checksum interface for the Edon-R hash function. */ /*ARGSUSED*/ void -abd_checksum_edonr_native(abd_t *abd, uint64_t size, +zio_checksum_edonr_native(const void *buf, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { uint8_t digest[EDONR_MODE / 8]; @@ -54,7 +42,7 @@ abd_checksum_edonr_native(abd_t *abd, uint64_t size, ASSERT(ctx_template != NULL); bcopy(ctx_template, &ctx, sizeof (ctx)); - (void) abd_iterate_func(abd, 0, size, edonr_incremental, &ctx); + EdonRUpdate(&ctx, buf, size * 8); EdonRFinal(&ctx, digest); bcopy(digest, zcp->zc_word, sizeof (zcp->zc_word)); } @@ -63,12 +51,12 @@ abd_checksum_edonr_native(abd_t *abd, uint64_t size, * Byteswapped zio_checksum interface for the Edon-R hash function. */ void -abd_checksum_edonr_byteswap(abd_t *abd, uint64_t size, +zio_checksum_edonr_byteswap(const void *buf, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { zio_cksum_t tmp; - abd_checksum_edonr_native(abd, size, ctx_template, &tmp); + zio_checksum_edonr_native(buf, size, ctx_template, &tmp); zcp->zc_word[0] = BSWAP_64(zcp->zc_word[0]); zcp->zc_word[1] = BSWAP_64(zcp->zc_word[1]); zcp->zc_word[2] = BSWAP_64(zcp->zc_word[2]); @@ -76,7 +64,7 @@ abd_checksum_edonr_byteswap(abd_t *abd, uint64_t size, } void * -abd_checksum_edonr_tmpl_init(const zio_cksum_salt_t *salt) +zio_checksum_edonr_tmpl_init(const zio_cksum_salt_t *salt) { EdonRState *ctx; uint8_t salt_block[EDONR_BLOCK_SIZE]; @@ -105,7 +93,7 @@ abd_checksum_edonr_tmpl_init(const zio_cksum_salt_t *salt) } void -abd_checksum_edonr_tmpl_free(void *ctx_template) +zio_checksum_edonr_tmpl_free(void *ctx_template) { EdonRState *ctx = ctx_template; diff --git a/usr/src/uts/common/fs/zfs/lz4.c b/usr/src/uts/common/fs/zfs/lz4.c index 82a08939dc..3aa1b74ef3 100644 --- a/usr/src/uts/common/fs/zfs/lz4.c +++ b/usr/src/uts/common/fs/zfs/lz4.c @@ -31,9 +31,6 @@ * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html * - LZ4 source repository : http://code.google.com/p/lz4/ */ -/* - * Copyright (c) 2016 by Delphix. All rights reserved. - */ #include diff --git a/usr/src/uts/common/fs/zfs/sha256.c b/usr/src/uts/common/fs/zfs/sha256.c index 23a97aa3de..81a7f6b1c2 100644 --- a/usr/src/uts/common/fs/zfs/sha256.c +++ b/usr/src/uts/common/fs/zfs/sha256.c @@ -24,39 +24,29 @@ */ /* * Copyright 2013 Saso Kiselkov. All rights reserved. - * Copyright (c) 2016 by Delphix. All rights reserved. */ #include #include #include -#include - -static int -sha_incremental(void *buf, size_t size, void *arg) -{ - SHA2_CTX *ctx = arg; - SHA2Update(ctx, buf, size); - return (0); -} /*ARGSUSED*/ void -abd_checksum_SHA256(abd_t *abd, uint64_t size, +zio_checksum_SHA256(const void *buf, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { SHA2_CTX ctx; zio_cksum_t tmp; SHA2Init(SHA256, &ctx); - (void) abd_iterate_func(abd, 0, size, sha_incremental, &ctx); + SHA2Update(&ctx, buf, size); SHA2Final(&tmp, &ctx); /* * A prior implementation of this function had a * private SHA256 implementation always wrote things out in * Big Endian and there wasn't a byteswap variant of it. - * To preserve on disk compatibility we need to force that - * behavior. + * To preseve on disk compatibility we need to force that + * behaviour. */ zcp->zc_word[0] = BE_64(tmp.zc_word[0]); zcp->zc_word[1] = BE_64(tmp.zc_word[1]); @@ -66,24 +56,24 @@ abd_checksum_SHA256(abd_t *abd, uint64_t size, /*ARGSUSED*/ void -abd_checksum_SHA512_native(abd_t *abd, uint64_t size, +zio_checksum_SHA512_native(const void *buf, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { SHA2_CTX ctx; SHA2Init(SHA512_256, &ctx); - (void) abd_iterate_func(abd, 0, size, sha_incremental, &ctx); + SHA2Update(&ctx, buf, size); SHA2Final(zcp, &ctx); } /*ARGSUSED*/ void -abd_checksum_SHA512_byteswap(abd_t *abd, uint64_t size, +zio_checksum_SHA512_byteswap(const void *buf, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { zio_cksum_t tmp; - abd_checksum_SHA512_native(abd, size, ctx_template, &tmp); + zio_checksum_SHA512_native(buf, size, ctx_template, &tmp); zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]); zcp->zc_word[1] = BSWAP_64(tmp.zc_word[1]); zcp->zc_word[2] = BSWAP_64(tmp.zc_word[2]); diff --git a/usr/src/uts/common/fs/zfs/skein_zfs.c b/usr/src/uts/common/fs/zfs/skein_zfs.c index 340da7adfb..6592340396 100644 --- a/usr/src/uts/common/fs/zfs/skein_zfs.c +++ b/usr/src/uts/common/fs/zfs/skein_zfs.c @@ -20,52 +20,42 @@ */ /* * Copyright 2013 Saso Kiselkov. All rights reserved. - * Copyright (c) 2016 by Delphix. All rights reserved. */ #include #include #include -#include - -static int -skein_incremental(void *buf, size_t size, void *arg) -{ - Skein_512_Ctxt_t *ctx = arg; - (void) Skein_512_Update(ctx, buf, size); - return (0); -} /* * Computes a native 256-bit skein MAC checksum. Please note that this * function requires the presence of a ctx_template that should be allocated - * using abd_checksum_skein_tmpl_init. + * using zio_checksum_skein_tmpl_init. */ /*ARGSUSED*/ void -abd_checksum_skein_native(abd_t *abd, uint64_t size, +zio_checksum_skein_native(const void *buf, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { Skein_512_Ctxt_t ctx; ASSERT(ctx_template != NULL); bcopy(ctx_template, &ctx, sizeof (ctx)); - (void) abd_iterate_func(abd, 0, size, skein_incremental, &ctx); + (void) Skein_512_Update(&ctx, buf, size); (void) Skein_512_Final(&ctx, (uint8_t *)zcp); bzero(&ctx, sizeof (ctx)); } /* - * Byteswapped version of abd_checksum_skein_native. This just invokes + * Byteswapped version of zio_checksum_skein_native. This just invokes * the native checksum function and byteswaps the resulting checksum (since * skein is internally endian-insensitive). */ void -abd_checksum_skein_byteswap(abd_t *abd, uint64_t size, +zio_checksum_skein_byteswap(const void *buf, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { zio_cksum_t tmp; - abd_checksum_skein_native(abd, size, ctx_template, &tmp); + zio_checksum_skein_native(buf, size, ctx_template, &tmp); zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]); zcp->zc_word[1] = BSWAP_64(tmp.zc_word[1]); zcp->zc_word[2] = BSWAP_64(tmp.zc_word[2]); @@ -77,7 +67,7 @@ abd_checksum_skein_byteswap(abd_t *abd, uint64_t size, * computations and returns a pointer to it. */ void * -abd_checksum_skein_tmpl_init(const zio_cksum_salt_t *salt) +zio_checksum_skein_tmpl_init(const zio_cksum_salt_t *salt) { Skein_512_Ctxt_t *ctx; @@ -89,10 +79,10 @@ abd_checksum_skein_tmpl_init(const zio_cksum_salt_t *salt) /* * Frees a skein context template previously allocated using - * abd_checksum_skein_tmpl_init. + * zio_checksum_skein_tmpl_init. */ void -abd_checksum_skein_tmpl_free(void *ctx_template) +zio_checksum_skein_tmpl_free(void *ctx_template) { Skein_512_Ctxt_t *ctx = ctx_template; diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c index be5b66fd3b..5764d47c33 100644 --- a/usr/src/uts/common/fs/zfs/spa.c +++ b/usr/src/uts/common/fs/zfs/spa.c @@ -70,7 +70,6 @@ #include #include #include -#include #ifdef _KERNEL #include @@ -1877,7 +1876,6 @@ spa_load_verify_done(zio_t *zio) int error = zio->io_error; spa_t *spa = zio->io_spa; - abd_free(zio->io_abd); if (error) { if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) && type != DMU_OT_INTENT_LOG) @@ -1885,6 +1883,7 @@ spa_load_verify_done(zio_t *zio) else atomic_inc_64(&sle->sle_data_count); } + zio_data_buf_free(zio->io_data, zio->io_size); mutex_enter(&spa->spa_scrub_lock); spa->spa_scrub_inflight--; @@ -1914,11 +1913,12 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, */ if (!spa_load_verify_metadata) return (0); - if (!BP_IS_METADATA(bp) && !spa_load_verify_data) + if (BP_GET_BUFC_TYPE(bp) == ARC_BUFC_DATA && !spa_load_verify_data) return (0); zio_t *rio = arg; size_t size = BP_GET_PSIZE(bp); + void *data = zio_data_buf_alloc(size); mutex_enter(&spa->spa_scrub_lock); while (spa->spa_scrub_inflight >= spa_load_verify_maxinflight) @@ -1926,7 +1926,7 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, spa->spa_scrub_inflight++; mutex_exit(&spa->spa_scrub_lock); - zio_nowait(zio_read(rio, spa, bp, abd_alloc_for_io(size, B_FALSE), size, + zio_nowait(zio_read(rio, spa, bp, data, size, spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); diff --git a/usr/src/uts/common/fs/zfs/sys/abd.h b/usr/src/uts/common/fs/zfs/sys/abd.h deleted file mode 100644 index 308f021b76..0000000000 --- a/usr/src/uts/common/fs/zfs/sys/abd.h +++ /dev/null @@ -1,150 +0,0 @@ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - */ - -/* - * Copyright (c) 2014 by Chunwei Chen. All rights reserved. - * Copyright (c) 2016 by Delphix. All rights reserved. - */ - -#ifndef _ABD_H -#define _ABD_H - -#include -#include -#include -#include -#ifdef _KERNEL -#include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum abd_flags { - ABD_FLAG_LINEAR = 1 << 0, /* is buffer linear (or scattered)? */ - ABD_FLAG_OWNER = 1 << 1, /* does it own its data buffers? */ - ABD_FLAG_META = 1 << 2 /* does this represent FS metadata? */ -} abd_flags_t; - -typedef struct abd { - abd_flags_t abd_flags; - uint_t abd_size; /* excludes scattered abd_offset */ - struct abd *abd_parent; - refcount_t abd_children; - union { - struct abd_scatter { - uint_t abd_offset; - uint_t abd_chunk_size; - void *abd_chunks[]; - } abd_scatter; - struct abd_linear { - void *abd_buf; - } abd_linear; - } abd_u; -} abd_t; - -typedef int abd_iter_func_t(void *, size_t, void *); -typedef int abd_iter_func2_t(void *, void *, size_t, void *); - -extern boolean_t zfs_abd_scatter_enabled; - -inline boolean_t -abd_is_linear(abd_t *abd) -{ - return ((abd->abd_flags & ABD_FLAG_LINEAR) != 0); -} - -/* - * Allocations and deallocations - */ - -abd_t *abd_alloc(size_t, boolean_t); -abd_t *abd_alloc_linear(size_t, boolean_t); -abd_t *abd_alloc_for_io(size_t, boolean_t); -abd_t *abd_alloc_sametype(abd_t *, size_t); -void abd_free(abd_t *); -abd_t *abd_get_offset(abd_t *, size_t); -abd_t *abd_get_from_buf(void *, size_t); -void abd_put(abd_t *); - -/* - * Conversion to and from a normal buffer - */ - -void *abd_to_buf(abd_t *); -void *abd_borrow_buf(abd_t *, size_t); -void *abd_borrow_buf_copy(abd_t *, size_t); -void abd_return_buf(abd_t *, void *, size_t); -void abd_return_buf_copy(abd_t *, void *, size_t); -void abd_take_ownership_of_buf(abd_t *, boolean_t); -void abd_release_ownership_of_buf(abd_t *); - -/* - * ABD operations - */ - -int abd_iterate_func(abd_t *, size_t, size_t, abd_iter_func_t *, void *); -int abd_iterate_func2(abd_t *, abd_t *, size_t, size_t, size_t, - abd_iter_func2_t *, void *); -void abd_copy_off(abd_t *, abd_t *, size_t, size_t, size_t); -void abd_copy_from_buf_off(abd_t *, const void *, size_t, size_t); -void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t); -int abd_cmp(abd_t *, abd_t *, size_t); -int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t); -void abd_zero_off(abd_t *, size_t, size_t); - -/* - * Wrappers for calls with offsets of 0 - */ - -inline void -abd_copy(abd_t *dabd, abd_t *sabd, size_t size) -{ - abd_copy_off(dabd, sabd, 0, 0, size); -} - -inline void -abd_copy_from_buf(abd_t *abd, void *buf, size_t size) -{ - abd_copy_from_buf_off(abd, buf, 0, size); -} - -inline void -abd_copy_to_buf(void* buf, abd_t *abd, size_t size) -{ - abd_copy_to_buf_off(buf, abd, 0, size); -} - -inline int -abd_cmp_buf(abd_t *abd, void *buf, size_t size) -{ - return (abd_cmp_buf_off(abd, buf, 0, size)); -} - -inline void -abd_zero(abd_t *abd, size_t size) -{ - abd_zero_off(abd, 0, size); -} - -/* - * Module lifecycle - */ - -void abd_init(void); -void abd_fini(void); - -#ifdef __cplusplus -} -#endif - -#endif /* _ABD_H */ diff --git a/usr/src/uts/common/fs/zfs/sys/ddt.h b/usr/src/uts/common/fs/zfs/sys/ddt.h index 15d2a9a7ad..771610677e 100644 --- a/usr/src/uts/common/fs/zfs/sys/ddt.h +++ b/usr/src/uts/common/fs/zfs/sys/ddt.h @@ -20,7 +20,6 @@ */ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2016 by Delphix. All rights reserved. */ #ifndef _SYS_DDT_H @@ -36,8 +35,6 @@ extern "C" { #endif -struct abd; - /* * On-disk DDT formats, in the desired search order (newest version first). */ @@ -111,7 +108,7 @@ struct ddt_entry { ddt_key_t dde_key; ddt_phys_t dde_phys[DDT_PHYS_TYPES]; zio_t *dde_lead_zio[DDT_PHYS_TYPES]; - struct abd *dde_repair_abd; + void *dde_repair_data; enum ddt_type dde_type; enum ddt_class dde_class; uint8_t dde_loading; diff --git a/usr/src/uts/common/fs/zfs/sys/spa.h b/usr/src/uts/common/fs/zfs/sys/spa.h index 0caefcd153..d0bb431866 100644 --- a/usr/src/uts/common/fs/zfs/sys/spa.h +++ b/usr/src/uts/common/fs/zfs/sys/spa.h @@ -419,17 +419,15 @@ _NOTE(CONSTCOND) } while (0) #define BP_GET_FILL(bp) (BP_IS_EMBEDDED(bp) ? 1 : (bp)->blk_fill) -#define BP_IS_METADATA(bp) \ - (BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) - #define BP_GET_ASIZE(bp) \ (BP_IS_EMBEDDED(bp) ? 0 : \ DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ DVA_GET_ASIZE(&(bp)->blk_dva[2])) -#define BP_GET_UCSIZE(bp) \ - (BP_IS_METADATA(bp) ? BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)) +#define BP_GET_UCSIZE(bp) \ + ((BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) ? \ + BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)) #define BP_GET_NDVAS(bp) \ (BP_IS_EMBEDDED(bp) ? 0 : \ @@ -599,7 +597,8 @@ _NOTE(CONSTCOND) } while (0) } #define BP_GET_BUFC_TYPE(bp) \ - (BP_IS_METADATA(bp) ? ARC_BUFC_METADATA : ARC_BUFC_DATA) + (((BP_GET_LEVEL(bp) > 0) || (DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))) ? \ + ARC_BUFC_METADATA : ARC_BUFC_DATA) typedef enum spa_import_type { SPA_IMPORT_EXISTING, diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h index b7f4219117..ff14aa80c8 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h @@ -52,7 +52,6 @@ extern "C" { typedef struct vdev_queue vdev_queue_t; typedef struct vdev_cache vdev_cache_t; typedef struct vdev_cache_entry vdev_cache_entry_t; -struct abd; extern int zfs_vdev_queue_depth_pct; extern uint32_t zfs_vdev_async_write_max_active; @@ -87,7 +86,7 @@ typedef struct vdev_ops { * Virtual device properties */ struct vdev_cache_entry { - struct abd *ve_abd; + char *ve_data; uint64_t ve_offset; uint64_t ve_lastused; avl_node_t ve_offset_node; diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h index d1de03923b..3dc633f419 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio.h +++ b/usr/src/uts/common/fs/zfs/sys/zio.h @@ -306,7 +306,6 @@ typedef void zio_cksum_free_f(void *cbdata, size_t size); struct zio_bad_cksum; /* defined in zio_checksum.h */ struct dnode_phys; -struct abd; struct zio_cksum_report { struct zio_cksum_report *zcr_next; @@ -339,12 +338,12 @@ typedef struct zio_gang_node { } zio_gang_node_t; typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp, - zio_gang_node_t *gn, struct abd *data, uint64_t offset); + zio_gang_node_t *gn, void *data); -typedef void zio_transform_func_t(zio_t *zio, struct abd *data, uint64_t size); +typedef void zio_transform_func_t(zio_t *zio, void *data, uint64_t size); typedef struct zio_transform { - struct abd *zt_orig_abd; + void *zt_orig_data; uint64_t zt_orig_size; uint64_t zt_bufsize; zio_transform_func_t *zt_transform; @@ -405,8 +404,8 @@ struct zio { blkptr_t io_bp_orig; /* Data represented by this I/O */ - struct abd *io_abd; - struct abd *io_orig_abd; + void *io_data; + void *io_orig_data; uint64_t io_size; uint64_t io_orig_size; /* io_lsize != io_orig_size iff this is a raw write */ @@ -464,19 +463,19 @@ extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, extern zio_t *zio_root(spa_t *spa, zio_done_func_t *done, void *private, enum zio_flag flags); -extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, - struct abd *data, uint64_t lsize, zio_done_func_t *done, void *private, +extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data, + uint64_t lsize, zio_done_func_t *done, void *private, zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb); extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, - struct abd *data, uint64_t size, uint64_t psize, const zio_prop_t *zp, + void *data, uint64_t size, uint64_t psize, const zio_prop_t *zp, zio_done_func_t *ready, zio_done_func_t *children_ready, zio_done_func_t *physdone, zio_done_func_t *done, void *private, zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb); extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, - struct abd *data, uint64_t size, zio_done_func_t *done, void *private, + void *data, uint64_t size, zio_done_func_t *done, void *private, zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb); extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies, @@ -492,12 +491,12 @@ extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, zio_done_func_t *done, void *private, enum zio_flag flags); extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, - uint64_t size, struct abd *data, int checksum, + uint64_t size, void *data, int checksum, zio_done_func_t *done, void *private, zio_priority_t priority, enum zio_flag flags, boolean_t labels); extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, - uint64_t size, struct abd *data, int checksum, + uint64_t size, void *data, int checksum, zio_done_func_t *done, void *private, zio_priority_t priority, enum zio_flag flags, boolean_t labels); @@ -527,19 +526,19 @@ extern void zio_buf_free(void *buf, size_t size); extern void *zio_data_buf_alloc(size_t size); extern void zio_data_buf_free(void *buf, size_t size); -extern void zio_push_transform(zio_t *zio, struct abd *abd, uint64_t size, +extern void zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize, zio_transform_func_t *transform); extern void zio_pop_transforms(zio_t *zio); extern void zio_resubmit_stage_async(void *); extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd, - uint64_t offset, struct abd *data, uint64_t size, int type, + uint64_t offset, void *data, uint64_t size, int type, zio_priority_t priority, enum zio_flag flags, zio_done_func_t *done, void *private); extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, - struct abd *data, uint64_t size, int type, zio_priority_t priority, + void *data, uint64_t size, int type, zio_priority_t priority, enum zio_flag flags, zio_done_func_t *done, void *private); extern void zio_vdev_io_bypass(zio_t *zio); diff --git a/usr/src/uts/common/fs/zfs/sys/zio_checksum.h b/usr/src/uts/common/fs/zfs/sys/zio_checksum.h index 3eda057eae..2f7579fd73 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio_checksum.h +++ b/usr/src/uts/common/fs/zfs/sys/zio_checksum.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2016 by Delphix. All rights reserved. + * Copyright (c) 2014, 2015 by Delphix. All rights reserved. * Copyright Saso Kiselkov 2013, All rights reserved. */ @@ -34,12 +34,10 @@ extern "C" { #endif -struct abd; - /* * Signature for checksum functions. */ -typedef void zio_checksum_t(struct abd *, uint64_t size, +typedef void zio_checksum_t(const void *data, uint64_t size, const void *ctx_template, zio_cksum_t *zcp); typedef void *zio_checksum_tmpl_init_t(const zio_cksum_salt_t *salt); typedef void zio_checksum_tmpl_free_t(void *ctx_template); @@ -83,28 +81,28 @@ extern zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS]; /* * Checksum routines. */ -extern zio_checksum_t abd_checksum_SHA256; -extern zio_checksum_t abd_checksum_SHA512_native; -extern zio_checksum_t abd_checksum_SHA512_byteswap; +extern zio_checksum_t zio_checksum_SHA256; +extern zio_checksum_t zio_checksum_SHA512_native; +extern zio_checksum_t zio_checksum_SHA512_byteswap; /* Skein */ -extern zio_checksum_t abd_checksum_skein_native; -extern zio_checksum_t abd_checksum_skein_byteswap; -extern zio_checksum_tmpl_init_t abd_checksum_skein_tmpl_init; -extern zio_checksum_tmpl_free_t abd_checksum_skein_tmpl_free; +extern zio_checksum_t zio_checksum_skein_native; +extern zio_checksum_t zio_checksum_skein_byteswap; +extern zio_checksum_tmpl_init_t zio_checksum_skein_tmpl_init; +extern zio_checksum_tmpl_free_t zio_checksum_skein_tmpl_free; /* Edon-R */ -extern zio_checksum_t abd_checksum_edonr_native; -extern zio_checksum_t abd_checksum_edonr_byteswap; -extern zio_checksum_tmpl_init_t abd_checksum_edonr_tmpl_init; -extern zio_checksum_tmpl_free_t abd_checksum_edonr_tmpl_free; +extern zio_checksum_t zio_checksum_edonr_native; +extern zio_checksum_t zio_checksum_edonr_byteswap; +extern zio_checksum_tmpl_init_t zio_checksum_edonr_tmpl_init; +extern zio_checksum_tmpl_free_t zio_checksum_edonr_tmpl_free; extern int zio_checksum_equal(spa_t *, blkptr_t *, enum zio_checksum, void *, uint64_t, uint64_t, zio_bad_cksum_t *); -extern void zio_checksum_compute(zio_t *, enum zio_checksum, - struct abd *, uint64_t); +extern void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, + void *data, uint64_t size); extern int zio_checksum_error_impl(spa_t *, blkptr_t *, enum zio_checksum, - struct abd *, uint64_t, uint64_t, zio_bad_cksum_t *); + void *, uint64_t, uint64_t, zio_bad_cksum_t *); extern int zio_checksum_error(zio_t *zio, zio_bad_cksum_t *out); extern enum zio_checksum spa_dedup_checksum(spa_t *spa); extern void zio_checksum_templates_free(spa_t *spa); diff --git a/usr/src/uts/common/fs/zfs/sys/zio_compress.h b/usr/src/uts/common/fs/zfs/sys/zio_compress.h index bcffa699b5..0c1783b140 100644 --- a/usr/src/uts/common/fs/zfs/sys/zio_compress.h +++ b/usr/src/uts/common/fs/zfs/sys/zio_compress.h @@ -25,14 +25,12 @@ */ /* * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. - * Copyright (c) 2015, 2016 by Delphix. All rights reserved. + * Copyright (c) 2015 by Delphix. All rights reserved. */ #ifndef _SYS_ZIO_COMPRESS_H #define _SYS_ZIO_COMPRESS_H -#include - #ifdef __cplusplus extern "C" { #endif @@ -63,22 +61,15 @@ typedef size_t zio_compress_func_t(void *src, void *dst, /* Common signature for all zio decompress functions. */ typedef int zio_decompress_func_t(void *src, void *dst, size_t s_len, size_t d_len, int); -/* - * Common signature for all zio decompress functions using an ABD as input. - * This is helpful if you have both compressed ARC and scatter ABDs enabled, - * but is not a requirement for all compression algorithms. - */ -typedef int zio_decompress_abd_func_t(abd_t *src, void *dst, - size_t s_len, size_t d_len, int); /* * Information about each compression function. */ typedef struct zio_compress_info { - char *ci_name; - int ci_level; - zio_compress_func_t *ci_compress; - zio_decompress_func_t *ci_decompress; + zio_compress_func_t *ci_compress; /* compression function */ + zio_decompress_func_t *ci_decompress; /* decompression function */ + int ci_level; /* level parameter */ + char *ci_name; /* algorithm name */ } zio_compress_info_t; extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS]; @@ -106,11 +97,9 @@ extern int lz4_decompress(void *src, void *dst, size_t s_len, size_t d_len, /* * Compress and decompress data if necessary. */ -extern size_t zio_compress_data(enum zio_compress c, abd_t *src, void *dst, +extern size_t zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len); -extern int zio_decompress_data(enum zio_compress c, abd_t *src, void *dst, - size_t s_len, size_t d_len); -extern int zio_decompress_data_buf(enum zio_compress c, void *src, void *dst, +extern int zio_decompress_data(enum zio_compress c, void *src, void *dst, size_t s_len, size_t d_len); #ifdef __cplusplus diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index d8a31dcab1..e8a0912633 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -45,7 +45,6 @@ #include #include #include -#include /* * Virtual device management. @@ -962,16 +961,16 @@ vdev_probe_done(zio_t *zio) vps->vps_readable = 1; if (zio->io_error == 0 && spa_writeable(spa)) { zio_nowait(zio_write_phys(vd->vdev_probe_zio, vd, - zio->io_offset, zio->io_size, zio->io_abd, + zio->io_offset, zio->io_size, zio->io_data, ZIO_CHECKSUM_OFF, vdev_probe_done, vps, ZIO_PRIORITY_SYNC_WRITE, vps->vps_flags, B_TRUE)); } else { - abd_free(zio->io_abd); + zio_buf_free(zio->io_data, zio->io_size); } } else if (zio->io_type == ZIO_TYPE_WRITE) { if (zio->io_error == 0) vps->vps_writeable = 1; - abd_free(zio->io_abd); + zio_buf_free(zio->io_data, zio->io_size); } else if (zio->io_type == ZIO_TYPE_NULL) { zio_t *pio; @@ -1087,8 +1086,8 @@ vdev_probe(vdev_t *vd, zio_t *zio) for (int l = 1; l < VDEV_LABELS; l++) { zio_nowait(zio_read_phys(pio, vd, vdev_label_offset(vd->vdev_psize, l, - offsetof(vdev_label_t, vl_pad2)), VDEV_PAD_SIZE, - abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE), + offsetof(vdev_label_t, vl_pad2)), + VDEV_PAD_SIZE, zio_buf_alloc(VDEV_PAD_SIZE), ZIO_CHECKSUM_OFF, vdev_probe_done, vps, ZIO_PRIORITY_SYNC_READ, vps->vps_flags, B_TRUE)); } diff --git a/usr/src/uts/common/fs/zfs/vdev_cache.c b/usr/src/uts/common/fs/zfs/vdev_cache.c index 9b4755321d..a6d6cfa61b 100644 --- a/usr/src/uts/common/fs/zfs/vdev_cache.c +++ b/usr/src/uts/common/fs/zfs/vdev_cache.c @@ -23,7 +23,7 @@ * Use is subject to license terms. */ /* - * Copyright (c) 2013, 2017 by Delphix. All rights reserved. + * Copyright (c) 2013, 2015 by Delphix. All rights reserved. */ #include @@ -31,7 +31,6 @@ #include #include #include -#include /* * Virtual device read-ahead caching. @@ -142,12 +141,12 @@ static void vdev_cache_evict(vdev_cache_t *vc, vdev_cache_entry_t *ve) { ASSERT(MUTEX_HELD(&vc->vc_lock)); - ASSERT3P(ve->ve_fill_io, ==, NULL); - ASSERT3P(ve->ve_abd, !=, NULL); + ASSERT(ve->ve_fill_io == NULL); + ASSERT(ve->ve_data != NULL); avl_remove(&vc->vc_lastused_tree, ve); avl_remove(&vc->vc_offset_tree, ve); - abd_free(ve->ve_abd); + zio_buf_free(ve->ve_data, VCBS); kmem_free(ve, sizeof (vdev_cache_entry_t)); } @@ -177,14 +176,14 @@ vdev_cache_allocate(zio_t *zio) ve = avl_first(&vc->vc_lastused_tree); if (ve->ve_fill_io != NULL) return (NULL); - ASSERT3U(ve->ve_hits, !=, 0); + ASSERT(ve->ve_hits != 0); vdev_cache_evict(vc, ve); } ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_SLEEP); ve->ve_offset = offset; ve->ve_lastused = ddi_get_lbolt(); - ve->ve_abd = abd_alloc_for_io(VCBS, B_TRUE); + ve->ve_data = zio_buf_alloc(VCBS); avl_add(&vc->vc_offset_tree, ve); avl_add(&vc->vc_lastused_tree, ve); @@ -198,7 +197,7 @@ vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio) uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS); ASSERT(MUTEX_HELD(&vc->vc_lock)); - ASSERT3P(ve->ve_fill_io, ==, NULL); + ASSERT(ve->ve_fill_io == NULL); if (ve->ve_lastused != ddi_get_lbolt()) { avl_remove(&vc->vc_lastused_tree, ve); @@ -207,7 +206,7 @@ vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio) } ve->ve_hits++; - abd_copy_off(zio->io_abd, ve->ve_abd, 0, cache_phase, zio->io_size); + bcopy(ve->ve_data + cache_phase, zio->io_data, zio->io_size); } /* @@ -221,16 +220,16 @@ vdev_cache_fill(zio_t *fio) vdev_cache_entry_t *ve = fio->io_private; zio_t *pio; - ASSERT3U(fio->io_size, ==, VCBS); + ASSERT(fio->io_size == VCBS); /* * Add data to the cache. */ mutex_enter(&vc->vc_lock); - ASSERT3P(ve->ve_fill_io, ==, fio); - ASSERT3U(ve->ve_offset, ==, fio->io_offset); - ASSERT3P(ve->ve_abd, ==, fio->io_abd); + ASSERT(ve->ve_fill_io == fio); + ASSERT(ve->ve_offset == fio->io_offset); + ASSERT(ve->ve_data == fio->io_data); ve->ve_fill_io = NULL; @@ -261,7 +260,7 @@ vdev_cache_read(zio_t *zio) uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS); zio_t *fio; - ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ); + ASSERT(zio->io_type == ZIO_TYPE_READ); if (zio->io_flags & ZIO_FLAG_DONT_CACHE) return (B_FALSE); @@ -275,7 +274,7 @@ vdev_cache_read(zio_t *zio) if (P2BOUNDARY(zio->io_offset, zio->io_size, VCBS)) return (B_FALSE); - ASSERT3U(cache_phase + zio->io_size, <=, VCBS); + ASSERT(cache_phase + zio->io_size <= VCBS); mutex_enter(&vc->vc_lock); @@ -312,7 +311,7 @@ vdev_cache_read(zio_t *zio) } fio = zio_vdev_delegated_io(zio->io_vd, cache_offset, - ve->ve_abd, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_NOW, + ve->ve_data, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_NOW, ZIO_FLAG_DONT_CACHE, vdev_cache_fill, ve); ve->ve_fill_io = fio; @@ -340,7 +339,7 @@ vdev_cache_write(zio_t *zio) uint64_t max_offset = P2ROUNDUP(io_end, VCBS); avl_index_t where; - ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE); + ASSERT(zio->io_type == ZIO_TYPE_WRITE); mutex_enter(&vc->vc_lock); @@ -357,9 +356,8 @@ vdev_cache_write(zio_t *zio) if (ve->ve_fill_io != NULL) { ve->ve_missed_update = 1; } else { - abd_copy_off(ve->ve_abd, zio->io_abd, - start - ve->ve_offset, start - io_start, - end - start); + bcopy((char *)zio->io_data + start - io_start, + ve->ve_data + start - ve->ve_offset, end - start); } ve = AVL_NEXT(&vc->vc_offset_tree, ve); } diff --git a/usr/src/uts/common/fs/zfs/vdev_disk.c b/usr/src/uts/common/fs/zfs/vdev_disk.c index 24083abbbc..24dcb890e2 100644 --- a/usr/src/uts/common/fs/zfs/vdev_disk.c +++ b/usr/src/uts/common/fs/zfs/vdev_disk.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -659,12 +658,6 @@ vdev_disk_io_intr(buf_t *bp) if (zio->io_error == 0 && bp->b_resid != 0) zio->io_error = SET_ERROR(EIO); - if (zio->io_type == ZIO_TYPE_READ) { - abd_return_buf_copy(zio->io_abd, bp->b_un.b_addr, zio->io_size); - } else { - abd_return_buf(zio->io_abd, bp->b_un.b_addr, zio->io_size); - } - kmem_free(vb, sizeof (vdev_buf_t)); zio_delay_interrupt(zio); @@ -776,15 +769,7 @@ vdev_disk_io_start(zio_t *zio) if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD))) bp->b_flags |= B_FAILFAST; bp->b_bcount = zio->io_size; - - if (zio->io_type == ZIO_TYPE_READ) { - bp->b_un.b_addr = - abd_borrow_buf(zio->io_abd, zio->io_size); - } else { - bp->b_un.b_addr = - abd_borrow_buf_copy(zio->io_abd, zio->io_size); - } - + bp->b_un.b_addr = zio->io_data; bp->b_lblkno = lbtodb(zio->io_offset); bp->b_bufsize = zio->io_size; bp->b_iodone = (int (*)())vdev_disk_io_intr; diff --git a/usr/src/uts/common/fs/zfs/vdev_file.c b/usr/src/uts/common/fs/zfs/vdev_file.c index 147e693967..633621b0dd 100644 --- a/usr/src/uts/common/fs/zfs/vdev_file.c +++ b/usr/src/uts/common/fs/zfs/vdev_file.c @@ -31,7 +31,6 @@ #include #include #include -#include /* * Virtual device vector for files. @@ -158,12 +157,6 @@ vdev_file_io_intr(buf_t *bp) if (zio->io_error == 0 && bp->b_resid != 0) zio->io_error = SET_ERROR(ENOSPC); - if (zio->io_type == ZIO_TYPE_READ) { - abd_return_buf_copy(zio->io_abd, bp->b_un.b_addr, zio->io_size); - } else { - abd_return_buf(zio->io_abd, bp->b_un.b_addr, zio->io_size); - } - kmem_free(vb, sizeof (vdev_buf_t)); zio_delay_interrupt(zio); } @@ -229,15 +222,7 @@ vdev_file_io_start(zio_t *zio) bioinit(bp); bp->b_flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE); bp->b_bcount = zio->io_size; - - if (zio->io_type == ZIO_TYPE_READ) { - bp->b_un.b_addr = - abd_borrow_buf(zio->io_abd, zio->io_size); - } else { - bp->b_un.b_addr = - abd_borrow_buf_copy(zio->io_abd, zio->io_size); - } - + bp->b_un.b_addr = zio->io_data; bp->b_lblkno = lbtodb(zio->io_offset); bp->b_bufsize = zio->io_size; bp->b_private = vf->vf_vnode; diff --git a/usr/src/uts/common/fs/zfs/vdev_label.c b/usr/src/uts/common/fs/zfs/vdev_label.c index b76589f0f6..866046315c 100644 --- a/usr/src/uts/common/fs/zfs/vdev_label.c +++ b/usr/src/uts/common/fs/zfs/vdev_label.c @@ -145,7 +145,6 @@ #include #include #include -#include #include /* @@ -179,7 +178,7 @@ vdev_label_number(uint64_t psize, uint64_t offset) } static void -vdev_label_read(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset, +vdev_label_read(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset, uint64_t size, zio_done_func_t *done, void *private, int flags) { ASSERT(spa_config_held(zio->io_spa, SCL_STATE_ALL, RW_WRITER) == @@ -193,7 +192,7 @@ vdev_label_read(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset, } static void -vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset, +vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset, uint64_t size, zio_done_func_t *done, void *private, int flags) { ASSERT(spa_config_held(zio->io_spa, SCL_ALL, RW_WRITER) == SCL_ALL || @@ -445,7 +444,6 @@ vdev_label_read_config(vdev_t *vd, uint64_t txg) spa_t *spa = vd->vdev_spa; nvlist_t *config = NULL; vdev_phys_t *vp; - abd_t *vp_abd; zio_t *zio; uint64_t best_txg = 0; int error = 0; @@ -457,8 +455,7 @@ vdev_label_read_config(vdev_t *vd, uint64_t txg) if (!vdev_readable(vd)) return (NULL); - vp_abd = abd_alloc_linear(sizeof (vdev_phys_t), B_TRUE); - vp = abd_to_buf(vp_abd); + vp = zio_buf_alloc(sizeof (vdev_phys_t)); retry: for (int l = 0; l < VDEV_LABELS; l++) { @@ -466,7 +463,7 @@ retry: zio = zio_root(spa, NULL, NULL, flags); - vdev_label_read(zio, vd, l, vp_abd, + vdev_label_read(zio, vd, l, vp, offsetof(vdev_label_t, vl_vdev_phys), sizeof (vdev_phys_t), NULL, NULL, flags); @@ -505,7 +502,7 @@ retry: goto retry; } - abd_free(vp_abd); + zio_buf_free(vp, sizeof (vdev_phys_t)); return (config); } @@ -639,10 +636,8 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) spa_t *spa = vd->vdev_spa; nvlist_t *label; vdev_phys_t *vp; - abd_t *vp_abd; - abd_t *pad2; + char *pad2; uberblock_t *ub; - abd_t *ub_abd; zio_t *zio; char *buf; size_t buflen; @@ -724,9 +719,8 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) /* * Initialize its label. */ - vp_abd = abd_alloc_linear(sizeof (vdev_phys_t), B_TRUE); - abd_zero(vp_abd, sizeof (vdev_phys_t)); - vp = abd_to_buf(vp_abd); + vp = zio_buf_alloc(sizeof (vdev_phys_t)); + bzero(vp, sizeof (vdev_phys_t)); /* * Generate a label describing the pool and our top-level vdev. @@ -786,7 +780,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) error = nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP); if (error != 0) { nvlist_free(label); - abd_free(vp_abd); + zio_buf_free(vp, sizeof (vdev_phys_t)); /* EFAULT means nvlist_pack ran out of room */ return (error == EFAULT ? ENAMETOOLONG : EINVAL); } @@ -794,15 +788,14 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) /* * Initialize uberblock template. */ - ub_abd = abd_alloc_linear(VDEV_UBERBLOCK_RING, B_TRUE); - abd_zero(ub_abd, VDEV_UBERBLOCK_RING); - abd_copy_from_buf(ub_abd, &spa->spa_uberblock, sizeof (uberblock_t)); - ub = abd_to_buf(ub_abd); + ub = zio_buf_alloc(VDEV_UBERBLOCK_RING); + bzero(ub, VDEV_UBERBLOCK_RING); + *ub = spa->spa_uberblock; ub->ub_txg = 0; /* Initialize the 2nd padding area. */ - pad2 = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE); - abd_zero(pad2, VDEV_PAD_SIZE); + pad2 = zio_buf_alloc(VDEV_PAD_SIZE); + bzero(pad2, VDEV_PAD_SIZE); /* * Write everything in parallel. @@ -812,7 +805,7 @@ retry: for (int l = 0; l < VDEV_LABELS; l++) { - vdev_label_write(zio, vd, l, vp_abd, + vdev_label_write(zio, vd, l, vp, offsetof(vdev_label_t, vl_vdev_phys), sizeof (vdev_phys_t), NULL, NULL, flags); @@ -825,7 +818,7 @@ retry: offsetof(vdev_label_t, vl_pad2), VDEV_PAD_SIZE, NULL, NULL, flags); - vdev_label_write(zio, vd, l, ub_abd, + vdev_label_write(zio, vd, l, ub, offsetof(vdev_label_t, vl_uberblock), VDEV_UBERBLOCK_RING, NULL, NULL, flags); } @@ -838,9 +831,9 @@ retry: } nvlist_free(label); - abd_free(pad2); - abd_free(ub_abd); - abd_free(vp_abd); + zio_buf_free(pad2, VDEV_PAD_SIZE); + zio_buf_free(ub, VDEV_UBERBLOCK_RING); + zio_buf_free(vp, sizeof (vdev_phys_t)); /* * If this vdev hasn't been previously identified as a spare, then we @@ -904,7 +897,7 @@ vdev_uberblock_load_done(zio_t *zio) vdev_t *vd = zio->io_vd; spa_t *spa = zio->io_spa; zio_t *rio = zio->io_private; - uberblock_t *ub = abd_to_buf(zio->io_abd); + uberblock_t *ub = zio->io_data; struct ubl_cbdata *cbp = rio->io_private; ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(vd)); @@ -925,7 +918,7 @@ vdev_uberblock_load_done(zio_t *zio) mutex_exit(&rio->io_lock); } - abd_free(zio->io_abd); + zio_buf_free(zio->io_data, zio->io_size); } static void @@ -939,8 +932,8 @@ vdev_uberblock_load_impl(zio_t *zio, vdev_t *vd, int flags, for (int l = 0; l < VDEV_LABELS; l++) { for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) { vdev_label_read(zio, vd, l, - abd_alloc_linear(VDEV_UBERBLOCK_SIZE(vd), - B_TRUE), VDEV_UBERBLOCK_OFFSET(vd, n), + zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)), + VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd), vdev_uberblock_load_done, zio, flags); } @@ -1007,6 +1000,9 @@ vdev_uberblock_sync_done(zio_t *zio) static void vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags) { + uberblock_t *ubbuf; + int n; + for (int c = 0; c < vd->vdev_children; c++) vdev_uberblock_sync(zio, ub, vd->vdev_child[c], flags); @@ -1016,20 +1012,19 @@ vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags) if (!vdev_writeable(vd)) return; - int n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1); + n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1); - /* Copy the uberblock_t into the ABD */ - abd_t *ub_abd = abd_alloc_for_io(VDEV_UBERBLOCK_SIZE(vd), B_TRUE); - abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd)); - abd_copy_from_buf(ub_abd, ub, sizeof (uberblock_t)); + ubbuf = zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)); + bzero(ubbuf, VDEV_UBERBLOCK_SIZE(vd)); + *ubbuf = *ub; for (int l = 0; l < VDEV_LABELS; l++) - vdev_label_write(zio, vd, l, ub_abd, + vdev_label_write(zio, vd, l, ubbuf, VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd), vdev_uberblock_sync_done, zio->io_private, flags | ZIO_FLAG_DONT_PROPAGATE); - abd_free(ub_abd); + zio_buf_free(ubbuf, VDEV_UBERBLOCK_SIZE(vd)); } /* Sync the uberblocks to all vdevs in svd[] */ @@ -1105,7 +1100,6 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags) { nvlist_t *label; vdev_phys_t *vp; - abd_t *vp_abd; char *buf; size_t buflen; @@ -1123,16 +1117,15 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags) */ label = spa_config_generate(vd->vdev_spa, vd, txg, B_FALSE); - vp_abd = abd_alloc_linear(sizeof (vdev_phys_t), B_TRUE); - abd_zero(vp_abd, sizeof (vdev_phys_t)); - vp = abd_to_buf(vp_abd); + vp = zio_buf_alloc(sizeof (vdev_phys_t)); + bzero(vp, sizeof (vdev_phys_t)); buf = vp->vp_nvlist; buflen = sizeof (vp->vp_nvlist); if (nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP) == 0) { for (; l < VDEV_LABELS; l += 2) { - vdev_label_write(zio, vd, l, vp_abd, + vdev_label_write(zio, vd, l, vp, offsetof(vdev_label_t, vl_vdev_phys), sizeof (vdev_phys_t), vdev_label_sync_done, zio->io_private, @@ -1140,7 +1133,7 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags) } } - abd_free(vp_abd); + zio_buf_free(vp, sizeof (vdev_phys_t)); nvlist_free(label); } diff --git a/usr/src/uts/common/fs/zfs/vdev_mirror.c b/usr/src/uts/common/fs/zfs/vdev_mirror.c index a57bd6c73b..b038ef6f67 100644 --- a/usr/src/uts/common/fs/zfs/vdev_mirror.c +++ b/usr/src/uts/common/fs/zfs/vdev_mirror.c @@ -31,7 +31,6 @@ #include #include #include -#include #include /* @@ -197,12 +196,13 @@ vdev_mirror_scrub_done(zio_t *zio) while ((pio = zio_walk_parents(zio, &zl)) != NULL) { mutex_enter(&pio->io_lock); ASSERT3U(zio->io_size, >=, pio->io_size); - abd_copy(pio->io_abd, zio->io_abd, pio->io_size); + bcopy(zio->io_data, pio->io_data, pio->io_size); mutex_exit(&pio->io_lock); } mutex_exit(&zio->io_lock); } - abd_free(zio->io_abd); + + zio_buf_free(zio->io_data, zio->io_size); mc->mc_error = zio->io_error; mc->mc_tried = 1; @@ -282,8 +282,7 @@ vdev_mirror_io_start(zio_t *zio) mc = &mm->mm_child[c]; zio_nowait(zio_vdev_child_io(zio, zio->io_bp, mc->mc_vd, mc->mc_offset, - abd_alloc_sametype(zio->io_abd, - zio->io_size), zio->io_size, + zio_buf_alloc(zio->io_size), zio->io_size, zio->io_type, zio->io_priority, 0, vdev_mirror_scrub_done, mc)); } @@ -308,7 +307,7 @@ vdev_mirror_io_start(zio_t *zio) while (children--) { mc = &mm->mm_child[c]; zio_nowait(zio_vdev_child_io(zio, zio->io_bp, - mc->mc_vd, mc->mc_offset, zio->io_abd, zio->io_size, + mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size, zio->io_type, zio->io_priority, 0, vdev_mirror_child_done, mc)); c++; @@ -393,7 +392,7 @@ vdev_mirror_io_done(zio_t *zio) mc = &mm->mm_child[c]; zio_vdev_io_redone(zio); zio_nowait(zio_vdev_child_io(zio, zio->io_bp, - mc->mc_vd, mc->mc_offset, zio->io_abd, zio->io_size, + mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size, ZIO_TYPE_READ, zio->io_priority, 0, vdev_mirror_child_done, mc)); return; @@ -434,7 +433,7 @@ vdev_mirror_io_done(zio_t *zio) zio_nowait(zio_vdev_child_io(zio, zio->io_bp, mc->mc_vd, mc->mc_offset, - zio->io_abd, zio->io_size, + zio->io_data, zio->io_size, ZIO_TYPE_WRITE, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_IO_REPAIR | (unexpected_errors ? ZIO_FLAG_SELF_HEAL : 0), NULL, NULL)); diff --git a/usr/src/uts/common/fs/zfs/vdev_queue.c b/usr/src/uts/common/fs/zfs/vdev_queue.c index b4a84914d5..ac586c879f 100644 --- a/usr/src/uts/common/fs/zfs/vdev_queue.c +++ b/usr/src/uts/common/fs/zfs/vdev_queue.c @@ -35,7 +35,6 @@ #include #include #include -#include /* * ZFS I/O Scheduler @@ -372,12 +371,12 @@ vdev_queue_agg_io_done(zio_t *aio) zio_t *pio; zio_link_t *zl = NULL; while ((pio = zio_walk_parents(aio, &zl)) != NULL) { - abd_copy_off(pio->io_abd, aio->io_abd, - 0, pio->io_offset - aio->io_offset, pio->io_size); + bcopy((char *)aio->io_data + (pio->io_offset - + aio->io_offset), pio->io_data, pio->io_size); } } - abd_free(aio->io_abd); + zio_buf_free(aio->io_data, aio->io_size); } static int @@ -612,8 +611,8 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) ASSERT3U(size, <=, zfs_vdev_aggregation_limit); aio = zio_vdev_delegated_io(first->io_vd, first->io_offset, - abd_alloc_for_io(size, B_TRUE), size, first->io_type, - zio->io_priority, flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE, + zio_buf_alloc(size), size, first->io_type, zio->io_priority, + flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE, vdev_queue_agg_io_done, NULL); aio->io_timestamp = first->io_timestamp; @@ -625,11 +624,12 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) if (dio->io_flags & ZIO_FLAG_NODATA) { ASSERT3U(dio->io_type, ==, ZIO_TYPE_WRITE); - abd_zero_off(aio->io_abd, - dio->io_offset - aio->io_offset, dio->io_size); + bzero((char *)aio->io_data + (dio->io_offset - + aio->io_offset), dio->io_size); } else if (dio->io_type == ZIO_TYPE_WRITE) { - abd_copy_off(aio->io_abd, dio->io_abd, - dio->io_offset - aio->io_offset, 0, dio->io_size); + bcopy(dio->io_data, (char *)aio->io_data + + (dio->io_offset - aio->io_offset), + dio->io_size); } zio_add_child(dio, aio); diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c index 4b77438877..ff06896e8d 100644 --- a/usr/src/uts/common/fs/zfs/vdev_raidz.c +++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include @@ -109,7 +108,7 @@ typedef struct raidz_col { uint64_t rc_devidx; /* child device index for I/O */ uint64_t rc_offset; /* device offset */ uint64_t rc_size; /* I/O size */ - abd_t *rc_abd; /* I/O data */ + void *rc_data; /* I/O data */ void *rc_gdata; /* used to store the "good" version */ int rc_error; /* I/O error for this device */ uint8_t rc_tried; /* Did we attempt this I/O column? */ @@ -126,7 +125,7 @@ typedef struct raidz_map { uint64_t rm_firstdatacol; /* First data column/parity count */ uint64_t rm_nskip; /* Skipped sectors for padding */ uint64_t rm_skipstart; /* Column index of padding start */ - abd_t *rm_abd_copy; /* rm_asize-buffer of copied data */ + void *rm_datacopy; /* rm_asize-buffer of copied data */ uintptr_t rm_reports; /* # of referencing checksum reports */ uint8_t rm_freed; /* map no longer has referencing ZIO */ uint8_t rm_ecksuminjected; /* checksum error was injected */ @@ -266,7 +265,7 @@ vdev_raidz_map_free(raidz_map_t *rm) size_t size; for (c = 0; c < rm->rm_firstdatacol; c++) { - abd_free(rm->rm_col[c].rc_abd); + zio_buf_free(rm->rm_col[c].rc_data, rm->rm_col[c].rc_size); if (rm->rm_col[c].rc_gdata != NULL) zio_buf_free(rm->rm_col[c].rc_gdata, @@ -274,13 +273,11 @@ vdev_raidz_map_free(raidz_map_t *rm) } size = 0; - for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - abd_put(rm->rm_col[c].rc_abd); + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) size += rm->rm_col[c].rc_size; - } - if (rm->rm_abd_copy != NULL) - abd_free(rm->rm_abd_copy); + if (rm->rm_datacopy != NULL) + zio_buf_free(rm->rm_datacopy, size); kmem_free(rm, offsetof(raidz_map_t, rm_col[rm->rm_scols])); } @@ -317,7 +314,7 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) size_t x; const char *good = NULL; - char *bad; + const char *bad = rm->rm_col[c].rc_data; if (good_data == NULL) { zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE); @@ -331,9 +328,8 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) * data never changes for a given logical ZIO) */ if (rm->rm_col[0].rc_gdata == NULL) { - abd_t *bad_parity[VDEV_RAIDZ_MAXPARITY]; + char *bad_parity[VDEV_RAIDZ_MAXPARITY]; char *buf; - int offset; /* * Set up the rm_col[]s to generate the parity for @@ -341,20 +337,15 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) * replacing them with buffers to hold the result. */ for (x = 0; x < rm->rm_firstdatacol; x++) { - bad_parity[x] = rm->rm_col[x].rc_abd; - rm->rm_col[x].rc_gdata = + bad_parity[x] = rm->rm_col[x].rc_data; + rm->rm_col[x].rc_data = rm->rm_col[x].rc_gdata = zio_buf_alloc(rm->rm_col[x].rc_size); - rm->rm_col[x].rc_abd = - abd_get_from_buf(rm->rm_col[x].rc_gdata, - rm->rm_col[x].rc_size); } /* fill in the data columns from good_data */ buf = (char *)good_data; for (; x < rm->rm_cols; x++) { - abd_put(rm->rm_col[x].rc_abd); - rm->rm_col[x].rc_abd = abd_get_from_buf(buf, - rm->rm_col[x].rc_size); + rm->rm_col[x].rc_data = buf; buf += rm->rm_col[x].rc_size; } @@ -364,17 +355,13 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) vdev_raidz_generate_parity(rm); /* restore everything back to its original state */ - for (x = 0; x < rm->rm_firstdatacol; x++) { - abd_put(rm->rm_col[x].rc_abd); - rm->rm_col[x].rc_abd = bad_parity[x]; - } + for (x = 0; x < rm->rm_firstdatacol; x++) + rm->rm_col[x].rc_data = bad_parity[x]; - offset = 0; + buf = rm->rm_datacopy; for (x = rm->rm_firstdatacol; x < rm->rm_cols; x++) { - abd_put(rm->rm_col[x].rc_abd); - rm->rm_col[x].rc_abd = abd_get_offset( - rm->rm_abd_copy, offset); - offset += rm->rm_col[x].rc_size; + rm->rm_col[x].rc_data = buf; + buf += rm->rm_col[x].rc_size; } } @@ -388,10 +375,8 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) good += rm->rm_col[x].rc_size; } - bad = abd_borrow_buf_copy(rm->rm_col[c].rc_abd, rm->rm_col[c].rc_size); /* we drop the ereport if it ends up that the data was good */ zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE); - abd_return_buf(rm->rm_col[c].rc_abd, bad, rm->rm_col[c].rc_size); } /* @@ -404,7 +389,7 @@ static void vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg) { size_t c = (size_t)(uintptr_t)arg; - size_t offset; + caddr_t buf; raidz_map_t *rm = zio->io_vsd; size_t size; @@ -418,7 +403,7 @@ vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg) rm->rm_reports++; ASSERT3U(rm->rm_reports, >, 0); - if (rm->rm_abd_copy != NULL) + if (rm->rm_datacopy != NULL) return; /* @@ -434,20 +419,17 @@ vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg) for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) size += rm->rm_col[c].rc_size; - rm->rm_abd_copy = - abd_alloc_sametype(rm->rm_col[rm->rm_firstdatacol].rc_abd, size); + buf = rm->rm_datacopy = zio_buf_alloc(size); - for (offset = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { + for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { raidz_col_t *col = &rm->rm_col[c]; - abd_t *tmp = abd_get_offset(rm->rm_abd_copy, offset); - abd_copy(tmp, col->rc_abd, col->rc_size); - abd_put(col->rc_abd); - col->rc_abd = tmp; + bcopy(col->rc_data, buf, col->rc_size); + col->rc_data = buf; - offset += col->rc_size; + buf += col->rc_size; } - ASSERT3U(offset, ==, size); + ASSERT3P(buf - (caddr_t)rm->rm_datacopy, ==, size); } static const zio_vsd_ops_t vdev_raidz_vsd_ops = { @@ -460,7 +442,7 @@ static const zio_vsd_ops_t vdev_raidz_vsd_ops = { * the number of children in the target vdev. */ static raidz_map_t * -vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset, +vdev_raidz_map_alloc(caddr_t data, uint64_t size, uint64_t offset, uint64_t unit_shift, uint64_t dcols, uint64_t nparity) { raidz_map_t *rm; @@ -473,7 +455,6 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset, /* The starting byte offset on each child vdev. */ uint64_t o = (b / dcols) << unit_shift; uint64_t q, r, c, bc, col, acols, scols, coff, devidx, asize, tot; - uint64_t off = 0; /* * "Quotient": The number of data sectors for this stripe on all but @@ -518,7 +499,7 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset, rm->rm_missingdata = 0; rm->rm_missingparity = 0; rm->rm_firstdatacol = nparity; - rm->rm_abd_copy = NULL; + rm->rm_datacopy = NULL; rm->rm_reports = 0; rm->rm_freed = 0; rm->rm_ecksuminjected = 0; @@ -534,7 +515,7 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset, } rm->rm_col[c].rc_devidx = col; rm->rm_col[c].rc_offset = coff; - rm->rm_col[c].rc_abd = NULL; + rm->rm_col[c].rc_data = NULL; rm->rm_col[c].rc_gdata = NULL; rm->rm_col[c].rc_error = 0; rm->rm_col[c].rc_tried = 0; @@ -557,16 +538,13 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset, ASSERT3U(rm->rm_nskip, <=, nparity); for (c = 0; c < rm->rm_firstdatacol; c++) - rm->rm_col[c].rc_abd = - abd_alloc_linear(rm->rm_col[c].rc_size, B_TRUE); + rm->rm_col[c].rc_data = zio_buf_alloc(rm->rm_col[c].rc_size); - rm->rm_col[c].rc_abd = abd_get_offset(abd, 0); - off = rm->rm_col[c].rc_size; + rm->rm_col[c].rc_data = data; - for (c = c + 1; c < acols; c++) { - rm->rm_col[c].rc_abd = abd_get_offset(abd, off); - off += rm->rm_col[c].rc_size; - } + for (c = c + 1; c < acols; c++) + rm->rm_col[c].rc_data = (char *)rm->rm_col[c - 1].rc_data + + rm->rm_col[c - 1].rc_size; /* * If all data stored spans all columns, there's a danger that parity @@ -606,84 +584,29 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset, return (rm); } -struct pqr_struct { - uint64_t *p; - uint64_t *q; - uint64_t *r; -}; - -static int -vdev_raidz_p_func(void *buf, size_t size, void *private) -{ - struct pqr_struct *pqr = private; - const uint64_t *src = buf; - int i, cnt = size / sizeof (src[0]); - - ASSERT(pqr->p && !pqr->q && !pqr->r); - - for (i = 0; i < cnt; i++, src++, pqr->p++) - *pqr->p ^= *src; - - return (0); -} - -static int -vdev_raidz_pq_func(void *buf, size_t size, void *private) -{ - struct pqr_struct *pqr = private; - const uint64_t *src = buf; - uint64_t mask; - int i, cnt = size / sizeof (src[0]); - - ASSERT(pqr->p && pqr->q && !pqr->r); - - for (i = 0; i < cnt; i++, src++, pqr->p++, pqr->q++) { - *pqr->p ^= *src; - VDEV_RAIDZ_64MUL_2(*pqr->q, mask); - *pqr->q ^= *src; - } - - return (0); -} - -static int -vdev_raidz_pqr_func(void *buf, size_t size, void *private) -{ - struct pqr_struct *pqr = private; - const uint64_t *src = buf; - uint64_t mask; - int i, cnt = size / sizeof (src[0]); - - ASSERT(pqr->p && pqr->q && pqr->r); - - for (i = 0; i < cnt; i++, src++, pqr->p++, pqr->q++, pqr->r++) { - *pqr->p ^= *src; - VDEV_RAIDZ_64MUL_2(*pqr->q, mask); - *pqr->q ^= *src; - VDEV_RAIDZ_64MUL_4(*pqr->r, mask); - *pqr->r ^= *src; - } - - return (0); -} - static void vdev_raidz_generate_parity_p(raidz_map_t *rm) { - uint64_t *p; + uint64_t *p, *src, pcount, ccount, i; int c; - abd_t *src; + + pcount = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]); for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - src = rm->rm_col[c].rc_abd; - p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd); + src = rm->rm_col[c].rc_data; + p = rm->rm_col[VDEV_RAIDZ_P].rc_data; + ccount = rm->rm_col[c].rc_size / sizeof (src[0]); if (c == rm->rm_firstdatacol) { - abd_copy_to_buf(p, src, rm->rm_col[c].rc_size); + ASSERT(ccount == pcount); + for (i = 0; i < ccount; i++, src++, p++) { + *p = *src; + } } else { - struct pqr_struct pqr = { p, NULL, NULL }; - (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, - vdev_raidz_p_func, &pqr); + ASSERT(ccount <= pcount); + for (i = 0; i < ccount; i++, src++, p++) { + *p ^= *src; + } } } } @@ -691,43 +614,50 @@ vdev_raidz_generate_parity_p(raidz_map_t *rm) static void vdev_raidz_generate_parity_pq(raidz_map_t *rm) { - uint64_t *p, *q, pcnt, ccnt, mask, i; + uint64_t *p, *q, *src, pcnt, ccnt, mask, i; int c; - abd_t *src; - pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (p[0]); + pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]); ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size == rm->rm_col[VDEV_RAIDZ_Q].rc_size); for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - src = rm->rm_col[c].rc_abd; - p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd); - q = abd_to_buf(rm->rm_col[VDEV_RAIDZ_Q].rc_abd); - - ccnt = rm->rm_col[c].rc_size / sizeof (p[0]); + src = rm->rm_col[c].rc_data; + p = rm->rm_col[VDEV_RAIDZ_P].rc_data; + q = rm->rm_col[VDEV_RAIDZ_Q].rc_data; + ccnt = rm->rm_col[c].rc_size / sizeof (src[0]); if (c == rm->rm_firstdatacol) { - abd_copy_to_buf(p, src, rm->rm_col[c].rc_size); - (void) memcpy(q, p, rm->rm_col[c].rc_size); + ASSERT(ccnt == pcnt || ccnt == 0); + for (i = 0; i < ccnt; i++, src++, p++, q++) { + *p = *src; + *q = *src; + } + for (; i < pcnt; i++, src++, p++, q++) { + *p = 0; + *q = 0; + } } else { - struct pqr_struct pqr = { p, q, NULL }; - (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, - vdev_raidz_pq_func, &pqr); - } + ASSERT(ccnt <= pcnt); - if (c == rm->rm_firstdatacol) { - for (i = ccnt; i < pcnt; i++) { - p[i] = 0; - q[i] = 0; + /* + * Apply the algorithm described above by multiplying + * the previous result and adding in the new value. + */ + for (i = 0; i < ccnt; i++, src++, p++, q++) { + *p ^= *src; + + VDEV_RAIDZ_64MUL_2(*q, mask); + *q ^= *src; } - } else { + /* * Treat short columns as though they are full of 0s. * Note that there's therefore nothing needed for P. */ - for (i = ccnt; i < pcnt; i++) { - VDEV_RAIDZ_64MUL_2(q[i], mask); + for (; i < pcnt; i++, q++) { + VDEV_RAIDZ_64MUL_2(*q, mask); } } } @@ -736,48 +666,59 @@ vdev_raidz_generate_parity_pq(raidz_map_t *rm) static void vdev_raidz_generate_parity_pqr(raidz_map_t *rm) { - uint64_t *p, *q, *r, pcnt, ccnt, mask, i; + uint64_t *p, *q, *r, *src, pcnt, ccnt, mask, i; int c; - abd_t *src; - pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (p[0]); + pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]); ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size == rm->rm_col[VDEV_RAIDZ_Q].rc_size); ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size == rm->rm_col[VDEV_RAIDZ_R].rc_size); for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - src = rm->rm_col[c].rc_abd; - p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd); - q = abd_to_buf(rm->rm_col[VDEV_RAIDZ_Q].rc_abd); - r = abd_to_buf(rm->rm_col[VDEV_RAIDZ_R].rc_abd); + src = rm->rm_col[c].rc_data; + p = rm->rm_col[VDEV_RAIDZ_P].rc_data; + q = rm->rm_col[VDEV_RAIDZ_Q].rc_data; + r = rm->rm_col[VDEV_RAIDZ_R].rc_data; - ccnt = rm->rm_col[c].rc_size / sizeof (p[0]); + ccnt = rm->rm_col[c].rc_size / sizeof (src[0]); if (c == rm->rm_firstdatacol) { - abd_copy_to_buf(p, src, rm->rm_col[c].rc_size); - (void) memcpy(q, p, rm->rm_col[c].rc_size); - (void) memcpy(r, p, rm->rm_col[c].rc_size); + ASSERT(ccnt == pcnt || ccnt == 0); + for (i = 0; i < ccnt; i++, src++, p++, q++, r++) { + *p = *src; + *q = *src; + *r = *src; + } + for (; i < pcnt; i++, src++, p++, q++, r++) { + *p = 0; + *q = 0; + *r = 0; + } } else { - struct pqr_struct pqr = { p, q, r }; - (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, - vdev_raidz_pqr_func, &pqr); - } + ASSERT(ccnt <= pcnt); - if (c == rm->rm_firstdatacol) { - for (i = ccnt; i < pcnt; i++) { - p[i] = 0; - q[i] = 0; - r[i] = 0; + /* + * Apply the algorithm described above by multiplying + * the previous result and adding in the new value. + */ + for (i = 0; i < ccnt; i++, src++, p++, q++, r++) { + *p ^= *src; + + VDEV_RAIDZ_64MUL_2(*q, mask); + *q ^= *src; + + VDEV_RAIDZ_64MUL_4(*r, mask); + *r ^= *src; } - } else { + /* * Treat short columns as though they are full of 0s. * Note that there's therefore nothing needed for P. */ - for (i = ccnt; i < pcnt; i++) { - VDEV_RAIDZ_64MUL_2(q[i], mask); - VDEV_RAIDZ_64MUL_4(r[i], mask); + for (; i < pcnt; i++, q++, r++) { + VDEV_RAIDZ_64MUL_2(*q, mask); + VDEV_RAIDZ_64MUL_4(*r, mask); } } } @@ -805,153 +746,40 @@ vdev_raidz_generate_parity(raidz_map_t *rm) } } -/* ARGSUSED */ -static int -vdev_raidz_reconst_p_func(void *dbuf, void *sbuf, size_t size, void *private) -{ - uint64_t *dst = dbuf; - uint64_t *src = sbuf; - int cnt = size / sizeof (src[0]); - - for (int i = 0; i < cnt; i++) { - dst[i] ^= src[i]; - } - - return (0); -} - -/* ARGSUSED */ -static int -vdev_raidz_reconst_q_pre_func(void *dbuf, void *sbuf, size_t size, - void *private) -{ - uint64_t *dst = dbuf; - uint64_t *src = sbuf; - uint64_t mask; - int cnt = size / sizeof (dst[0]); - - for (int i = 0; i < cnt; i++, dst++, src++) { - VDEV_RAIDZ_64MUL_2(*dst, mask); - *dst ^= *src; - } - - return (0); -} - -/* ARGSUSED */ -static int -vdev_raidz_reconst_q_pre_tail_func(void *buf, size_t size, void *private) -{ - uint64_t *dst = buf; - uint64_t mask; - int cnt = size / sizeof (dst[0]); - - for (int i = 0; i < cnt; i++, dst++) { - /* same operation as vdev_raidz_reconst_q_pre_func() on dst */ - VDEV_RAIDZ_64MUL_2(*dst, mask); - } - - return (0); -} - -struct reconst_q_struct { - uint64_t *q; - int exp; -}; - -static int -vdev_raidz_reconst_q_post_func(void *buf, size_t size, void *private) -{ - struct reconst_q_struct *rq = private; - uint64_t *dst = buf; - int cnt = size / sizeof (dst[0]); - - for (int i = 0; i < cnt; i++, dst++, rq->q++) { - *dst ^= *rq->q; - - int j; - uint8_t *b; - for (j = 0, b = (uint8_t *)dst; j < 8; j++, b++) { - *b = vdev_raidz_exp2(*b, rq->exp); - } - } - - return (0); -} - -struct reconst_pq_struct { - uint8_t *p; - uint8_t *q; - uint8_t *pxy; - uint8_t *qxy; - int aexp; - int bexp; -}; - -static int -vdev_raidz_reconst_pq_func(void *xbuf, void *ybuf, size_t size, void *private) -{ - struct reconst_pq_struct *rpq = private; - uint8_t *xd = xbuf; - uint8_t *yd = ybuf; - - for (int i = 0; i < size; - i++, rpq->p++, rpq->q++, rpq->pxy++, rpq->qxy++, xd++, yd++) { - *xd = vdev_raidz_exp2(*rpq->p ^ *rpq->pxy, rpq->aexp) ^ - vdev_raidz_exp2(*rpq->q ^ *rpq->qxy, rpq->bexp); - *yd = *rpq->p ^ *rpq->pxy ^ *xd; - } - - return (0); -} - -static int -vdev_raidz_reconst_pq_tail_func(void *xbuf, size_t size, void *private) -{ - struct reconst_pq_struct *rpq = private; - uint8_t *xd = xbuf; - - for (int i = 0; i < size; - i++, rpq->p++, rpq->q++, rpq->pxy++, rpq->qxy++, xd++) { - /* same operation as vdev_raidz_reconst_pq_func() on xd */ - *xd = vdev_raidz_exp2(*rpq->p ^ *rpq->pxy, rpq->aexp) ^ - vdev_raidz_exp2(*rpq->q ^ *rpq->qxy, rpq->bexp); - } - - return (0); -} - static int vdev_raidz_reconstruct_p(raidz_map_t *rm, int *tgts, int ntgts) { + uint64_t *dst, *src, xcount, ccount, count, i; int x = tgts[0]; int c; - abd_t *dst, *src; ASSERT(ntgts == 1); ASSERT(x >= rm->rm_firstdatacol); ASSERT(x < rm->rm_cols); - ASSERT(rm->rm_col[x].rc_size <= rm->rm_col[VDEV_RAIDZ_P].rc_size); - ASSERT(rm->rm_col[x].rc_size > 0); - - src = rm->rm_col[VDEV_RAIDZ_P].rc_abd; - dst = rm->rm_col[x].rc_abd; + xcount = rm->rm_col[x].rc_size / sizeof (src[0]); + ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0])); + ASSERT(xcount > 0); - abd_copy_from_buf(dst, abd_to_buf(src), rm->rm_col[x].rc_size); + src = rm->rm_col[VDEV_RAIDZ_P].rc_data; + dst = rm->rm_col[x].rc_data; + for (i = 0; i < xcount; i++, dst++, src++) { + *dst = *src; + } for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - uint64_t size = MIN(rm->rm_col[x].rc_size, - rm->rm_col[c].rc_size); - - src = rm->rm_col[c].rc_abd; - dst = rm->rm_col[x].rc_abd; + src = rm->rm_col[c].rc_data; + dst = rm->rm_col[x].rc_data; if (c == x) continue; - (void) abd_iterate_func2(dst, src, 0, 0, size, - vdev_raidz_reconst_p_func, NULL); + ccount = rm->rm_col[c].rc_size / sizeof (src[0]); + count = MIN(ccount, xcount); + + for (i = 0; i < count; i++, dst++, src++) { + *dst ^= *src; + } } return (1 << VDEV_RAIDZ_P); @@ -960,43 +788,57 @@ vdev_raidz_reconstruct_p(raidz_map_t *rm, int *tgts, int ntgts) static int vdev_raidz_reconstruct_q(raidz_map_t *rm, int *tgts, int ntgts) { + uint64_t *dst, *src, xcount, ccount, count, mask, i; + uint8_t *b; int x = tgts[0]; - int c, exp; - abd_t *dst, *src; + int c, j, exp; ASSERT(ntgts == 1); - ASSERT(rm->rm_col[x].rc_size <= rm->rm_col[VDEV_RAIDZ_Q].rc_size); + xcount = rm->rm_col[x].rc_size / sizeof (src[0]); + ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_Q].rc_size / sizeof (src[0])); for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - uint64_t size = (c == x) ? 0 : MIN(rm->rm_col[x].rc_size, - rm->rm_col[c].rc_size); + src = rm->rm_col[c].rc_data; + dst = rm->rm_col[x].rc_data; + + if (c == x) + ccount = 0; + else + ccount = rm->rm_col[c].rc_size / sizeof (src[0]); - src = rm->rm_col[c].rc_abd; - dst = rm->rm_col[x].rc_abd; + count = MIN(ccount, xcount); if (c == rm->rm_firstdatacol) { - abd_copy(dst, src, size); - if (rm->rm_col[x].rc_size > size) - abd_zero_off(dst, size, - rm->rm_col[x].rc_size - size); + for (i = 0; i < count; i++, dst++, src++) { + *dst = *src; + } + for (; i < xcount; i++, dst++) { + *dst = 0; + } + } else { - ASSERT3U(size, <=, rm->rm_col[x].rc_size); - (void) abd_iterate_func2(dst, src, 0, 0, size, - vdev_raidz_reconst_q_pre_func, NULL); - (void) abd_iterate_func(dst, - size, rm->rm_col[x].rc_size - size, - vdev_raidz_reconst_q_pre_tail_func, NULL); + for (i = 0; i < count; i++, dst++, src++) { + VDEV_RAIDZ_64MUL_2(*dst, mask); + *dst ^= *src; + } + + for (; i < xcount; i++, dst++) { + VDEV_RAIDZ_64MUL_2(*dst, mask); + } } } - src = rm->rm_col[VDEV_RAIDZ_Q].rc_abd; - dst = rm->rm_col[x].rc_abd; + src = rm->rm_col[VDEV_RAIDZ_Q].rc_data; + dst = rm->rm_col[x].rc_data; exp = 255 - (rm->rm_cols - 1 - x); - struct reconst_q_struct rq = { abd_to_buf(src), exp }; - (void) abd_iterate_func(dst, 0, rm->rm_col[x].rc_size, - vdev_raidz_reconst_q_post_func, &rq); + for (i = 0; i < xcount; i++, dst++, src++) { + *dst ^= *src; + for (j = 0, b = (uint8_t *)dst; j < 8; j++, b++) { + *b = vdev_raidz_exp2(*b, exp); + } + } return (1 << VDEV_RAIDZ_Q); } @@ -1004,12 +846,11 @@ vdev_raidz_reconstruct_q(raidz_map_t *rm, int *tgts, int ntgts) static int vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts) { - uint8_t *p, *q, *pxy, *qxy, tmp, a, b, aexp, bexp; - abd_t *pdata, *qdata; - uint64_t xsize, ysize; + uint8_t *p, *q, *pxy, *qxy, *xd, *yd, tmp, a, b, aexp, bexp; + void *pdata, *qdata; + uint64_t xsize, ysize, i; int x = tgts[0]; int y = tgts[1]; - abd_t *xd, *yd; ASSERT(ntgts == 2); ASSERT(x < y); @@ -1025,15 +866,15 @@ vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts) * parity so we make those columns appear to be full of zeros by * setting their lengths to zero. */ - pdata = rm->rm_col[VDEV_RAIDZ_P].rc_abd; - qdata = rm->rm_col[VDEV_RAIDZ_Q].rc_abd; + pdata = rm->rm_col[VDEV_RAIDZ_P].rc_data; + qdata = rm->rm_col[VDEV_RAIDZ_Q].rc_data; xsize = rm->rm_col[x].rc_size; ysize = rm->rm_col[y].rc_size; - rm->rm_col[VDEV_RAIDZ_P].rc_abd = - abd_alloc_linear(rm->rm_col[VDEV_RAIDZ_P].rc_size, B_TRUE); - rm->rm_col[VDEV_RAIDZ_Q].rc_abd = - abd_alloc_linear(rm->rm_col[VDEV_RAIDZ_Q].rc_size, B_TRUE); + rm->rm_col[VDEV_RAIDZ_P].rc_data = + zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_P].rc_size); + rm->rm_col[VDEV_RAIDZ_Q].rc_data = + zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_Q].rc_size); rm->rm_col[x].rc_size = 0; rm->rm_col[y].rc_size = 0; @@ -1042,12 +883,12 @@ vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts) rm->rm_col[x].rc_size = xsize; rm->rm_col[y].rc_size = ysize; - p = abd_to_buf(pdata); - q = abd_to_buf(qdata); - pxy = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd); - qxy = abd_to_buf(rm->rm_col[VDEV_RAIDZ_Q].rc_abd); - xd = rm->rm_col[x].rc_abd; - yd = rm->rm_col[y].rc_abd; + p = pdata; + q = qdata; + pxy = rm->rm_col[VDEV_RAIDZ_P].rc_data; + qxy = rm->rm_col[VDEV_RAIDZ_Q].rc_data; + xd = rm->rm_col[x].rc_data; + yd = rm->rm_col[y].rc_data; /* * We now have: @@ -1071,21 +912,24 @@ vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts) aexp = vdev_raidz_log2[vdev_raidz_exp2(a, tmp)]; bexp = vdev_raidz_log2[vdev_raidz_exp2(b, tmp)]; - ASSERT3U(xsize, >=, ysize); - struct reconst_pq_struct rpq = { p, q, pxy, qxy, aexp, bexp }; - (void) abd_iterate_func2(xd, yd, 0, 0, ysize, - vdev_raidz_reconst_pq_func, &rpq); - (void) abd_iterate_func(xd, ysize, xsize - ysize, - vdev_raidz_reconst_pq_tail_func, &rpq); + for (i = 0; i < xsize; i++, p++, q++, pxy++, qxy++, xd++, yd++) { + *xd = vdev_raidz_exp2(*p ^ *pxy, aexp) ^ + vdev_raidz_exp2(*q ^ *qxy, bexp); - abd_free(rm->rm_col[VDEV_RAIDZ_P].rc_abd); - abd_free(rm->rm_col[VDEV_RAIDZ_Q].rc_abd); + if (i < ysize) + *yd = *p ^ *pxy ^ *xd; + } + + zio_buf_free(rm->rm_col[VDEV_RAIDZ_P].rc_data, + rm->rm_col[VDEV_RAIDZ_P].rc_size); + zio_buf_free(rm->rm_col[VDEV_RAIDZ_Q].rc_data, + rm->rm_col[VDEV_RAIDZ_Q].rc_size); /* * Restore the saved parity data. */ - rm->rm_col[VDEV_RAIDZ_P].rc_abd = pdata; - rm->rm_col[VDEV_RAIDZ_Q].rc_abd = qdata; + rm->rm_col[VDEV_RAIDZ_P].rc_data = pdata; + rm->rm_col[VDEV_RAIDZ_Q].rc_data = qdata; return ((1 << VDEV_RAIDZ_P) | (1 << VDEV_RAIDZ_Q)); } @@ -1400,7 +1244,7 @@ vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing, c = used[i]; ASSERT3U(c, <, rm->rm_cols); - src = abd_to_buf(rm->rm_col[c].rc_abd); + src = rm->rm_col[c].rc_data; ccount = rm->rm_col[c].rc_size; for (j = 0; j < nmissing; j++) { cc = missing[j] + rm->rm_firstdatacol; @@ -1408,7 +1252,7 @@ vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing, ASSERT3U(cc, <, rm->rm_cols); ASSERT3U(cc, !=, c); - dst[j] = abd_to_buf(rm->rm_col[cc].rc_abd); + dst[j] = rm->rm_col[cc].rc_data; dcount[j] = rm->rm_col[cc].rc_size; } @@ -1456,25 +1300,8 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) uint8_t *invrows[VDEV_RAIDZ_MAXPARITY]; uint8_t *used; - abd_t **bufs = NULL; - int code = 0; - /* - * Matrix reconstruction can't use scatter ABDs yet, so we allocate - * temporary linear ABDs. - */ - if (!abd_is_linear(rm->rm_col[rm->rm_firstdatacol].rc_abd)) { - bufs = kmem_alloc(rm->rm_cols * sizeof (abd_t *), KM_PUSHPAGE); - - for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - raidz_col_t *col = &rm->rm_col[c]; - - bufs[c] = col->rc_abd; - col->rc_abd = abd_alloc_linear(col->rc_size, B_TRUE); - abd_copy(col->rc_abd, bufs[c], col->rc_size); - } - } n = rm->rm_cols - rm->rm_firstdatacol; @@ -1561,20 +1388,6 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) kmem_free(p, psize); - /* - * copy back from temporary linear abds and free them - */ - if (bufs) { - for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { - raidz_col_t *col = &rm->rm_col[c]; - - abd_copy(bufs[c], col->rc_abd, col->rc_size); - abd_free(col->rc_abd); - col->rc_abd = bufs[c]; - } - kmem_free(bufs, rm->rm_cols * sizeof (abd_t *)); - } - return (code); } @@ -1806,9 +1619,7 @@ vdev_raidz_physio(vdev_t *vd, caddr_t data, size_t size, * treat the on-disk format as if the only blocks are the complete 128 * KB size. */ - abd_t *abd = abd_get_from_buf(data - (offset - origoffset), - SPA_OLD_MAXBLOCKSIZE); - rm = vdev_raidz_map_alloc(abd, + rm = vdev_raidz_map_alloc(data - (offset - origoffset), SPA_OLD_MAXBLOCKSIZE, origoffset, tvd->vdev_ashift, vd->vdev_children, vd->vdev_nparity); @@ -1847,14 +1658,13 @@ vdev_raidz_physio(vdev_t *vd, caddr_t data, size_t size, * example of why this calculation is needed. */ if ((err = vdev_disk_physio(cvd, - ((char *)rc->rc_abd) + colskip, colsize, + ((char *)rc->rc_data) + colskip, colsize, VDEV_LABEL_OFFSET(rc->rc_offset) + colskip, flags, isdump)) != 0) break; } vdev_raidz_map_free(rm); - abd_put(abd); #endif /* KERNEL */ return (err); @@ -1912,7 +1722,7 @@ vdev_raidz_io_start(zio_t *zio) raidz_col_t *rc; int c, i; - rm = vdev_raidz_map_alloc(zio->io_abd, zio->io_size, zio->io_offset, + rm = vdev_raidz_map_alloc(zio->io_data, zio->io_size, zio->io_offset, tvd->vdev_ashift, vd->vdev_children, vd->vdev_nparity); @@ -1928,7 +1738,7 @@ vdev_raidz_io_start(zio_t *zio) rc = &rm->rm_col[c]; cvd = vd->vdev_child[rc->rc_devidx]; zio_nowait(zio_vdev_child_io(zio, NULL, cvd, - rc->rc_offset, rc->rc_abd, rc->rc_size, + rc->rc_offset, rc->rc_data, rc->rc_size, zio->io_type, zio->io_priority, 0, vdev_raidz_child_done, rc)); } @@ -1985,7 +1795,7 @@ vdev_raidz_io_start(zio_t *zio) if (c >= rm->rm_firstdatacol || rm->rm_missingdata > 0 || (zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) { zio_nowait(zio_vdev_child_io(zio, NULL, cvd, - rc->rc_offset, rc->rc_abd, rc->rc_size, + rc->rc_offset, rc->rc_data, rc->rc_size, zio->io_type, zio->io_priority, 0, vdev_raidz_child_done, rc)); } @@ -2001,7 +1811,6 @@ vdev_raidz_io_start(zio_t *zio) static void raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data) { - void *buf; vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx]; if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { @@ -2015,11 +1824,9 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data) zbc.zbc_has_cksum = 0; zbc.zbc_injected = rm->rm_ecksuminjected; - buf = abd_borrow_buf_copy(rc->rc_abd, rc->rc_size); zfs_ereport_post_checksum(zio->io_spa, vd, zio, - rc->rc_offset, rc->rc_size, buf, bad_data, + rc->rc_offset, rc->rc_size, rc->rc_data, bad_data, &zbc); - abd_return_buf(rc->rc_abd, buf, rc->rc_size); } } @@ -2065,7 +1872,7 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm) if (!rc->rc_tried || rc->rc_error != 0) continue; orig[c] = zio_buf_alloc(rc->rc_size); - abd_copy_to_buf(orig[c], rc->rc_abd, rc->rc_size); + bcopy(rc->rc_data, orig[c], rc->rc_size); } vdev_raidz_generate_parity(rm); @@ -2074,7 +1881,7 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm) rc = &rm->rm_col[c]; if (!rc->rc_tried || rc->rc_error != 0) continue; - if (bcmp(orig[c], abd_to_buf(rc->rc_abd), rc->rc_size) != 0) { + if (bcmp(orig[c], rc->rc_data, rc->rc_size) != 0) { raidz_checksum_error(zio, rc, orig[c]); rc->rc_error = SET_ERROR(ECKSUM); ret++; @@ -2182,8 +1989,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) ASSERT3S(c, >=, 0); ASSERT3S(c, <, rm->rm_cols); rc = &rm->rm_col[c]; - abd_copy_to_buf(orig[i], rc->rc_abd, - rc->rc_size); + bcopy(rc->rc_data, orig[i], rc->rc_size); } /* @@ -2214,8 +2020,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) for (i = 0; i < n; i++) { c = tgts[i]; rc = &rm->rm_col[c]; - abd_copy_from_buf(rc->rc_abd, orig[i], - rc->rc_size); + bcopy(orig[i], rc->rc_data, rc->rc_size); } do { @@ -2456,7 +2261,7 @@ vdev_raidz_io_done(zio_t *zio) continue; zio_nowait(zio_vdev_child_io(zio, NULL, vd->vdev_child[rc->rc_devidx], - rc->rc_offset, rc->rc_abd, rc->rc_size, + rc->rc_offset, rc->rc_data, rc->rc_size, zio->io_type, zio->io_priority, 0, vdev_raidz_child_done, rc)); } while (++c < rm->rm_cols); @@ -2536,7 +2341,7 @@ done: continue; zio_nowait(zio_vdev_child_io(zio, NULL, cvd, - rc->rc_offset, rc->rc_abd, rc->rc_size, + rc->rc_offset, rc->rc_data, rc->rc_size, ZIO_TYPE_WRITE, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_IO_REPAIR | (unexpected_errors ? ZIO_FLAG_SELF_HEAL : 0), NULL, NULL)); diff --git a/usr/src/uts/common/fs/zfs/zil.c b/usr/src/uts/common/fs/zfs/zil.c index 1aa4900bc5..31bcbe3fe3 100644 --- a/usr/src/uts/common/fs/zfs/zil.c +++ b/usr/src/uts/common/fs/zfs/zil.c @@ -39,7 +39,6 @@ #include #include #include -#include /* * The zfs intent log (ZIL) saves transaction records of system calls @@ -878,7 +877,6 @@ zil_lwb_write_done(zio_t *zio) * one in zil_commit_writer(). zil_sync() will only remove * the lwb if lwb_buf is null. */ - abd_put(zio->io_abd); zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); mutex_enter(&zilog->zl_lock); lwb->lwb_buf = NULL; @@ -910,10 +908,8 @@ zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb) ZIO_FLAG_CANFAIL); } if (lwb->lwb_zio == NULL) { - abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf, - BP_GET_LSIZE(&lwb->lwb_blk)); lwb->lwb_zio = zio_rewrite(zilog->zl_root_zio, zilog->zl_spa, - 0, &lwb->lwb_blk, lwb_abd, BP_GET_LSIZE(&lwb->lwb_blk), + 0, &lwb->lwb_blk, lwb->lwb_buf, BP_GET_LSIZE(&lwb->lwb_blk), zil_lwb_write_done, lwb, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE, &zb); } diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c index c7d2e0536e..92e28d821d 100644 --- a/usr/src/uts/common/fs/zfs/zio.c +++ b/usr/src/uts/common/fs/zfs/zio.c @@ -41,7 +41,6 @@ #include #include #include -#include /* * ========================================================================== @@ -273,18 +272,12 @@ zio_data_buf_free(void *buf, size_t size) * ========================================================================== */ void -zio_push_transform(zio_t *zio, abd_t *data, uint64_t size, uint64_t bufsize, +zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize, zio_transform_func_t *transform) { zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_SLEEP); - /* - * Ensure that anyone expecting this zio to contain a linear ABD isn't - * going to get a nasty surprise when they try to access the data. - */ - IMPLY(abd_is_linear(zio->io_abd), abd_is_linear(data)); - - zt->zt_orig_abd = zio->io_abd; + zt->zt_orig_data = zio->io_data; zt->zt_orig_size = zio->io_size; zt->zt_bufsize = bufsize; zt->zt_transform = transform; @@ -292,7 +285,7 @@ zio_push_transform(zio_t *zio, abd_t *data, uint64_t size, uint64_t bufsize, zt->zt_next = zio->io_transform_stack; zio->io_transform_stack = zt; - zio->io_abd = data; + zio->io_data = data; zio->io_size = size; } @@ -304,12 +297,12 @@ zio_pop_transforms(zio_t *zio) while ((zt = zio->io_transform_stack) != NULL) { if (zt->zt_transform != NULL) zt->zt_transform(zio, - zt->zt_orig_abd, zt->zt_orig_size); + zt->zt_orig_data, zt->zt_orig_size); if (zt->zt_bufsize != 0) - abd_free(zio->io_abd); + zio_buf_free(zio->io_data, zt->zt_bufsize); - zio->io_abd = zt->zt_orig_abd; + zio->io_data = zt->zt_orig_data; zio->io_size = zt->zt_orig_size; zio->io_transform_stack = zt->zt_next; @@ -323,26 +316,21 @@ zio_pop_transforms(zio_t *zio) * ========================================================================== */ static void -zio_subblock(zio_t *zio, abd_t *data, uint64_t size) +zio_subblock(zio_t *zio, void *data, uint64_t size) { ASSERT(zio->io_size > size); if (zio->io_type == ZIO_TYPE_READ) - abd_copy(data, zio->io_abd, size); + bcopy(zio->io_data, data, size); } static void -zio_decompress(zio_t *zio, abd_t *data, uint64_t size) +zio_decompress(zio_t *zio, void *data, uint64_t size) { - if (zio->io_error == 0) { - void *tmp = abd_borrow_buf(data, size); - int ret = zio_decompress_data(BP_GET_COMPRESS(zio->io_bp), - zio->io_abd, tmp, zio->io_size, size); - abd_return_buf_copy(data, tmp, size); - - if (ret != 0) - zio->io_error = SET_ERROR(EIO); - } + if (zio->io_error == 0 && + zio_decompress_data(BP_GET_COMPRESS(zio->io_bp), + zio->io_data, data, zio->io_size, size) != 0) + zio->io_error = SET_ERROR(EIO); } /* @@ -540,7 +528,7 @@ zio_bookmark_compare(const void *x1, const void *x2) */ static zio_t * zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, - abd_t *data, uint64_t lsize, uint64_t psize, zio_done_func_t *done, + void *data, uint64_t lsize, uint64_t psize, zio_done_func_t *done, void *private, zio_type_t type, zio_priority_t priority, enum zio_flag flags, vdev_t *vd, uint64_t offset, const zbookmark_phys_t *zb, enum zio_stage stage, enum zio_stage pipeline) @@ -599,7 +587,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, zio->io_priority = priority; zio->io_vd = vd; zio->io_offset = offset; - zio->io_orig_abd = zio->io_abd = data; + zio->io_orig_data = zio->io_data = data; zio->io_orig_size = zio->io_size = psize; zio->io_lsize = lsize; zio->io_orig_flags = zio->io_flags = flags; @@ -738,7 +726,7 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp) zio_t * zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, - abd_t *data, uint64_t size, zio_done_func_t *done, void *private, + void *data, uint64_t size, zio_done_func_t *done, void *private, zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb) { zio_t *zio; @@ -756,7 +744,7 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, zio_t * zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, - abd_t *data, uint64_t lsize, uint64_t psize, const zio_prop_t *zp, + void *data, uint64_t lsize, uint64_t psize, const zio_prop_t *zp, zio_done_func_t *ready, zio_done_func_t *children_ready, zio_done_func_t *physdone, zio_done_func_t *done, void *private, zio_priority_t priority, enum zio_flag flags, @@ -797,7 +785,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, } zio_t * -zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, abd_t *data, +zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data, uint64_t size, zio_done_func_t *done, void *private, zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb) { @@ -950,7 +938,7 @@ zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, zio_t * zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, - abd_t *data, int checksum, zio_done_func_t *done, void *private, + void *data, int checksum, zio_done_func_t *done, void *private, zio_priority_t priority, enum zio_flag flags, boolean_t labels) { zio_t *zio; @@ -971,7 +959,7 @@ zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, zio_t * zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, - abd_t *data, int checksum, zio_done_func_t *done, void *private, + void *data, int checksum, zio_done_func_t *done, void *private, zio_priority_t priority, enum zio_flag flags, boolean_t labels) { zio_t *zio; @@ -994,9 +982,8 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, * Therefore, we must make a local copy in case the data is * being written to multiple places in parallel. */ - abd_t *wbuf = abd_alloc_sametype(data, size); - abd_copy(wbuf, data, size); - + void *wbuf = zio_buf_alloc(size); + bcopy(data, wbuf, size); zio_push_transform(zio, wbuf, size, size, NULL); } @@ -1008,7 +995,7 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, */ zio_t * zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset, - abd_t *data, uint64_t size, int type, zio_priority_t priority, + void *data, uint64_t size, int type, zio_priority_t priority, enum zio_flag flags, zio_done_func_t *done, void *private) { enum zio_stage pipeline = ZIO_VDEV_CHILD_PIPELINE; @@ -1073,7 +1060,7 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset, } zio_t * -zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, abd_t *data, uint64_t size, +zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, void *data, uint64_t size, int type, zio_priority_t priority, enum zio_flag flags, zio_done_func_t *done, void *private) { @@ -1134,17 +1121,14 @@ zio_read_bp_init(zio_t *zio) !(zio->io_flags & ZIO_FLAG_RAW)) { uint64_t psize = BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp); - zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize), - psize, psize, zio_decompress); + void *cbuf = zio_buf_alloc(psize); + + zio_push_transform(zio, cbuf, psize, psize, zio_decompress); } if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) { zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; - - int psize = BPE_GET_PSIZE(bp); - void *data = abd_borrow_buf(zio->io_abd, psize); - decode_embedded_bp_compressed(bp, data); - abd_return_buf_copy(zio->io_abd, data, psize); + decode_embedded_bp_compressed(bp, zio->io_data); } else { ASSERT(!BP_IS_EMBEDDED(bp)); } @@ -1284,7 +1268,7 @@ zio_write_compress(zio_t *zio) /* If it's a compressed write that is not raw, compress the buffer. */ if (compress != ZIO_COMPRESS_OFF && psize == lsize) { void *cbuf = zio_buf_alloc(lsize); - psize = zio_compress_data(compress, zio->io_abd, cbuf, lsize); + psize = zio_compress_data(compress, zio->io_data, cbuf, lsize); if (psize == 0 || psize == lsize) { compress = ZIO_COMPRESS_OFF; zio_buf_free(cbuf, lsize); @@ -1319,11 +1303,9 @@ zio_write_compress(zio_t *zio) zio_buf_free(cbuf, lsize); psize = lsize; } else { - abd_t *cdata = abd_get_from_buf(cbuf, lsize); - abd_take_ownership_of_buf(cdata, B_TRUE); - abd_zero_off(cdata, psize, rounded - psize); + bzero((char *)cbuf + psize, rounded - psize); psize = rounded; - zio_push_transform(zio, cdata, + zio_push_transform(zio, cbuf, psize, lsize, NULL); } } @@ -1843,38 +1825,26 @@ zio_resume_wait(spa_t *spa) * ========================================================================== */ -static void -zio_gang_issue_func_done(zio_t *zio) -{ - abd_put(zio->io_abd); -} - static zio_t * -zio_read_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data, - uint64_t offset) +zio_read_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data) { if (gn != NULL) return (pio); - return (zio_read(pio, pio->io_spa, bp, abd_get_offset(data, offset), - BP_GET_PSIZE(bp), zio_gang_issue_func_done, - NULL, pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), + return (zio_read(pio, pio->io_spa, bp, data, BP_GET_PSIZE(bp), + NULL, NULL, pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark)); } -static zio_t * -zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data, - uint64_t offset) +zio_t * +zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data) { zio_t *zio; if (gn != NULL) { - abd_t *gbh_abd = - abd_get_from_buf(gn->gn_gbh, SPA_GANGBLOCKSIZE); zio = zio_rewrite(pio, pio->io_spa, pio->io_txg, bp, - gbh_abd, SPA_GANGBLOCKSIZE, zio_gang_issue_func_done, NULL, - pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), - &pio->io_bookmark); + gn->gn_gbh, SPA_GANGBLOCKSIZE, NULL, NULL, pio->io_priority, + ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); /* * As we rewrite each gang header, the pipeline will compute * a new gang block header checksum for it; but no one will @@ -1885,12 +1855,8 @@ zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data, * this is just good hygiene.) */ if (gn != pio->io_gang_leader->io_gang_tree) { - abd_t *buf = abd_get_offset(data, offset); - zio_checksum_compute(zio, BP_GET_CHECKSUM(bp), - buf, BP_GET_PSIZE(bp)); - - abd_put(buf); + data, BP_GET_PSIZE(bp)); } /* * If we are here to damage data for testing purposes, @@ -1900,8 +1866,7 @@ zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data, zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; } else { zio = zio_rewrite(pio, pio->io_spa, pio->io_txg, bp, - abd_get_offset(data, offset), BP_GET_PSIZE(bp), - zio_gang_issue_func_done, NULL, pio->io_priority, + data, BP_GET_PSIZE(bp), NULL, NULL, pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); } @@ -1909,18 +1874,16 @@ zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data, } /* ARGSUSED */ -static zio_t * -zio_free_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data, - uint64_t offset) +zio_t * +zio_free_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data) { return (zio_free_sync(pio, pio->io_spa, pio->io_txg, bp, ZIO_GANG_CHILD_FLAGS(pio))); } /* ARGSUSED */ -static zio_t * -zio_claim_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data, - uint64_t offset) +zio_t * +zio_claim_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data) { return (zio_claim(pio, pio->io_spa, pio->io_txg, bp, NULL, NULL, ZIO_GANG_CHILD_FLAGS(pio))); @@ -1982,14 +1945,13 @@ static void zio_gang_tree_assemble(zio_t *gio, blkptr_t *bp, zio_gang_node_t **gnpp) { zio_gang_node_t *gn = zio_gang_node_alloc(gnpp); - abd_t *gbh_abd = abd_get_from_buf(gn->gn_gbh, SPA_GANGBLOCKSIZE); ASSERT(gio->io_gang_leader == gio); ASSERT(BP_IS_GANG(bp)); - zio_nowait(zio_read(gio, gio->io_spa, bp, gbh_abd, SPA_GANGBLOCKSIZE, - zio_gang_tree_assemble_done, gn, gio->io_priority, - ZIO_GANG_CHILD_FLAGS(gio), &gio->io_bookmark)); + zio_nowait(zio_read(gio, gio->io_spa, bp, gn->gn_gbh, + SPA_GANGBLOCKSIZE, zio_gang_tree_assemble_done, gn, + gio->io_priority, ZIO_GANG_CHILD_FLAGS(gio), &gio->io_bookmark)); } static void @@ -2005,16 +1967,13 @@ zio_gang_tree_assemble_done(zio_t *zio) if (zio->io_error) return; - /* this ABD was created from a linear buf in zio_gang_tree_assemble */ if (BP_SHOULD_BYTESWAP(bp)) - byteswap_uint64_array(abd_to_buf(zio->io_abd), zio->io_size); + byteswap_uint64_array(zio->io_data, zio->io_size); - ASSERT3P(abd_to_buf(zio->io_abd), ==, gn->gn_gbh); + ASSERT(zio->io_data == gn->gn_gbh); ASSERT(zio->io_size == SPA_GANGBLOCKSIZE); ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC); - abd_put(zio->io_abd); - for (int g = 0; g < SPA_GBH_NBLKPTRS; g++) { blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g]; if (!BP_IS_GANG(gbp)) @@ -2024,8 +1983,7 @@ zio_gang_tree_assemble_done(zio_t *zio) } static void -zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, abd_t *data, - uint64_t offset) +zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, void *data) { zio_t *gio = pio->io_gang_leader; zio_t *zio; @@ -2038,7 +1996,7 @@ zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, abd_t *data, * If you're a gang header, your data is in gn->gn_gbh. * If you're a gang member, your data is in 'data' and gn == NULL. */ - zio = zio_gang_issue_func[gio->io_type](pio, bp, gn, data, offset); + zio = zio_gang_issue_func[gio->io_type](pio, bp, gn, data); if (gn != NULL) { ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC); @@ -2047,14 +2005,13 @@ zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, abd_t *data, blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g]; if (BP_IS_HOLE(gbp)) continue; - zio_gang_tree_issue(zio, gn->gn_child[g], gbp, data, - offset); - offset += BP_GET_PSIZE(gbp); + zio_gang_tree_issue(zio, gn->gn_child[g], gbp, data); + data = (char *)data + BP_GET_PSIZE(gbp); } } if (gn == gio->io_gang_tree) - ASSERT3U(gio->io_size, ==, offset); + ASSERT3P((char *)gio->io_data + gio->io_size, ==, data); if (zio != pio) zio_nowait(zio); @@ -2087,8 +2044,7 @@ zio_gang_issue(zio_t *zio) ASSERT(zio->io_child_type > ZIO_CHILD_GANG); if (zio->io_child_error[ZIO_CHILD_GANG] == 0) - zio_gang_tree_issue(zio, zio->io_gang_tree, bp, zio->io_abd, - 0); + zio_gang_tree_issue(zio, zio->io_gang_tree, bp, zio->io_data); else zio_gang_tree_free(&zio->io_gang_tree); @@ -2127,12 +2083,6 @@ zio_write_gang_member_ready(zio_t *zio) mutex_exit(&pio->io_lock); } -static void -zio_write_gang_done(zio_t *zio) -{ - abd_put(zio->io_abd); -} - static int zio_write_gang_block(zio_t *pio) { @@ -2143,7 +2093,6 @@ zio_write_gang_block(zio_t *pio) zio_t *zio; zio_gang_node_t *gn, **gnpp; zio_gbh_phys_t *gbh; - abd_t *gbh_abd; uint64_t txg = pio->io_txg; uint64_t resid = pio->io_size; uint64_t lsize; @@ -2204,14 +2153,12 @@ zio_write_gang_block(zio_t *pio) gn = zio_gang_node_alloc(gnpp); gbh = gn->gn_gbh; bzero(gbh, SPA_GANGBLOCKSIZE); - gbh_abd = abd_get_from_buf(gbh, SPA_GANGBLOCKSIZE); /* * Create the gang header. */ - zio = zio_rewrite(pio, spa, txg, bp, gbh_abd, SPA_GANGBLOCKSIZE, - zio_write_gang_done, NULL, pio->io_priority, - ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); + zio = zio_rewrite(pio, spa, txg, bp, gbh, SPA_GANGBLOCKSIZE, NULL, NULL, + pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); /* * Create and nowait the gang children. @@ -2231,9 +2178,9 @@ zio_write_gang_block(zio_t *pio) zp.zp_nopwrite = B_FALSE; zio_t *cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g], - abd_get_offset(pio->io_abd, pio->io_size - resid), lsize, - lsize, &zp, zio_write_gang_member_ready, NULL, NULL, - zio_write_gang_done, &gn->gn_child[g], pio->io_priority, + (char *)pio->io_data + (pio->io_size - resid), lsize, lsize, + &zp, zio_write_gang_member_ready, NULL, NULL, NULL, + &gn->gn_child[g], pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) { @@ -2346,11 +2293,10 @@ zio_ddt_child_read_done(zio_t *zio) ddp = ddt_phys_select(dde, bp); if (zio->io_error == 0) ddt_phys_clear(ddp); /* this ddp doesn't need repair */ - - if (zio->io_error == 0 && dde->dde_repair_abd == NULL) - dde->dde_repair_abd = zio->io_abd; + if (zio->io_error == 0 && dde->dde_repair_data == NULL) + dde->dde_repair_data = zio->io_data; else - abd_free(zio->io_abd); + zio_buf_free(zio->io_data, zio->io_size); mutex_exit(&pio->io_lock); } @@ -2382,16 +2328,16 @@ zio_ddt_read_start(zio_t *zio) ddt_bp_create(ddt->ddt_checksum, &dde->dde_key, ddp, &blk); zio_nowait(zio_read(zio, zio->io_spa, &blk, - abd_alloc_for_io(zio->io_size, B_TRUE), - zio->io_size, zio_ddt_child_read_done, dde, - zio->io_priority, ZIO_DDT_CHILD_FLAGS(zio) | - ZIO_FLAG_DONT_PROPAGATE, &zio->io_bookmark)); + zio_buf_alloc(zio->io_size), zio->io_size, + zio_ddt_child_read_done, dde, zio->io_priority, + ZIO_DDT_CHILD_FLAGS(zio) | ZIO_FLAG_DONT_PROPAGATE, + &zio->io_bookmark)); } return (ZIO_PIPELINE_CONTINUE); } zio_nowait(zio_read(zio, zio->io_spa, bp, - zio->io_abd, zio->io_size, NULL, NULL, zio->io_priority, + zio->io_data, zio->io_size, NULL, NULL, zio->io_priority, ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark)); return (ZIO_PIPELINE_CONTINUE); @@ -2421,9 +2367,8 @@ zio_ddt_read_done(zio_t *zio) zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_FALSE); return (ZIO_PIPELINE_STOP); } - if (dde->dde_repair_abd != NULL) { - abd_copy(zio->io_abd, dde->dde_repair_abd, - zio->io_size); + if (dde->dde_repair_data != NULL) { + bcopy(dde->dde_repair_data, zio->io_data, zio->io_size); zio->io_child_error[ZIO_CHILD_DDT] = 0; } ddt_repair_done(ddt, dde); @@ -2455,7 +2400,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) if (lio != NULL) { return (lio->io_orig_size != zio->io_orig_size || - abd_cmp(zio->io_orig_abd, lio->io_orig_abd, + bcmp(zio->io_orig_data, lio->io_orig_data, zio->io_orig_size) != 0); } } @@ -2476,17 +2421,17 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) /* * Intuitively, it would make more sense to compare - * io_abd than io_orig_abd in the raw case since you + * io_data than io_orig_data in the raw case since you * don't want to look at any transformations that have * happened to the data. However, for raw I/Os the - * data will actually be the same in io_abd and - * io_orig_abd, so all we have to do is issue this as + * data will actually be the same in io_data and + * io_orig_data, so all we have to do is issue this as * a raw ARC read. */ if (do_raw) { zio_flags |= ZIO_FLAG_RAW; ASSERT3U(zio->io_size, ==, zio->io_orig_size); - ASSERT0(abd_cmp(zio->io_abd, zio->io_orig_abd, + ASSERT0(bcmp(zio->io_data, zio->io_orig_data, zio->io_size)); ASSERT3P(zio->io_transform_stack, ==, NULL); } @@ -2497,7 +2442,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) if (error == 0) { if (arc_buf_size(abuf) != zio->io_orig_size || - abd_cmp_buf(zio->io_orig_abd, abuf->b_data, + bcmp(abuf->b_data, zio->io_orig_data, zio->io_orig_size) != 0) error = SET_ERROR(EEXIST); arc_buf_destroy(abuf, &abuf); @@ -2663,12 +2608,12 @@ zio_ddt_write(zio_t *zio) return (ZIO_PIPELINE_CONTINUE); } - dio = zio_write(zio, spa, txg, bp, zio->io_orig_abd, + dio = zio_write(zio, spa, txg, bp, zio->io_orig_data, zio->io_orig_size, zio->io_orig_size, &czp, NULL, NULL, NULL, zio_ddt_ditto_write_done, dde, zio->io_priority, ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark); - zio_push_transform(dio, zio->io_abd, zio->io_size, 0, NULL); + zio_push_transform(dio, zio->io_data, zio->io_size, 0, NULL); dde->dde_lead_zio[DDT_PHYS_DITTO] = dio; } @@ -2685,13 +2630,13 @@ zio_ddt_write(zio_t *zio) ddt_phys_fill(ddp, bp); ddt_phys_addref(ddp); } else { - cio = zio_write(zio, spa, txg, bp, zio->io_orig_abd, + cio = zio_write(zio, spa, txg, bp, zio->io_orig_data, zio->io_orig_size, zio->io_orig_size, zp, zio_ddt_child_write_ready, NULL, NULL, zio_ddt_child_write_done, dde, zio->io_priority, ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark); - zio_push_transform(cio, zio->io_abd, zio->io_size, 0, NULL); + zio_push_transform(cio, zio->io_data, zio->io_size, 0, NULL); dde->dde_lead_zio[p] = cio; } @@ -3031,11 +2976,11 @@ zio_vdev_io_start(zio_t *zio) P2PHASE(zio->io_size, align) != 0) { /* Transform logical writes to be a full physical block size. */ uint64_t asize = P2ROUNDUP(zio->io_size, align); - abd_t *abuf = abd_alloc_sametype(zio->io_abd, asize); + char *abuf = zio_buf_alloc(asize); ASSERT(vd == vd->vdev_top); if (zio->io_type == ZIO_TYPE_WRITE) { - abd_copy(abuf, zio->io_abd, zio->io_size); - abd_zero_off(abuf, zio->io_size, asize - zio->io_size); + bcopy(zio->io_data, abuf, zio->io_size); + bzero(abuf + zio->io_size, asize - zio->io_size); } zio_push_transform(zio, abuf, asize, asize, zio_subblock); } @@ -3161,7 +3106,7 @@ zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *ignored) { void *buf = zio_buf_alloc(zio->io_size); - abd_copy_to_buf(buf, zio->io_abd, zio->io_size); + bcopy(zio->io_data, buf, zio->io_size); zcr->zcr_cbinfo = zio->io_size; zcr->zcr_cbdata = buf; @@ -3305,7 +3250,7 @@ zio_checksum_generate(zio_t *zio) } } - zio_checksum_compute(zio, checksum, zio->io_abd, zio->io_size); + zio_checksum_compute(zio, checksum, zio->io_data, zio->io_size); return (ZIO_PIPELINE_CONTINUE); } @@ -3444,7 +3389,7 @@ zio_ready(zio_t *zio) if (BP_IS_GANG(bp)) { zio->io_flags &= ~ZIO_FLAG_NODATA; } else { - ASSERT((uintptr_t)zio->io_abd < SPA_MAXBLOCKSIZE); + ASSERT((uintptr_t)zio->io_data < SPA_MAXBLOCKSIZE); zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; } } @@ -3599,28 +3544,21 @@ zio_done(zio_t *zio) zio_cksum_report_t *zcr = zio->io_cksum_report; uint64_t align = zcr->zcr_align; uint64_t asize = P2ROUNDUP(psize, align); - char *abuf = NULL; - abd_t *adata = zio->io_abd; + char *abuf = zio->io_data; if (asize != psize) { - adata = abd_alloc_linear(asize, B_TRUE); - abd_copy(adata, zio->io_abd, psize); - abd_zero_off(adata, psize, asize - psize); + abuf = zio_buf_alloc(asize); + bcopy(zio->io_data, abuf, psize); + bzero(abuf + psize, asize - psize); } - if (adata != NULL) - abuf = abd_borrow_buf_copy(adata, asize); - zio->io_cksum_report = zcr->zcr_next; zcr->zcr_next = NULL; zcr->zcr_finish(zcr, abuf); zfs_ereport_free_checksum(zcr); - if (adata != NULL) - abd_return_buf(adata, abuf, asize); - if (asize != psize) - abd_free(adata); + zio_buf_free(abuf, asize); } } diff --git a/usr/src/uts/common/fs/zfs/zio_checksum.c b/usr/src/uts/common/fs/zfs/zio_checksum.c index e1c98b0b99..2bd9001456 100644 --- a/usr/src/uts/common/fs/zfs/zio_checksum.c +++ b/usr/src/uts/common/fs/zfs/zio_checksum.c @@ -31,7 +31,6 @@ #include #include #include -#include #include /* @@ -94,85 +93,45 @@ /*ARGSUSED*/ static void -abd_checksum_off(abd_t *abd, uint64_t size, +zio_checksum_off(const void *buf, uint64_t size, const void *ctx_template, zio_cksum_t *zcp) { ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); } -/*ARGSUSED*/ -void -abd_fletcher_2_native(abd_t *abd, uint64_t size, - const void *ctx_template, zio_cksum_t *zcp) -{ - fletcher_init(zcp); - (void) abd_iterate_func(abd, 0, size, - fletcher_2_incremental_native, zcp); -} - -/*ARGSUSED*/ -void -abd_fletcher_2_byteswap(abd_t *abd, uint64_t size, - const void *ctx_template, zio_cksum_t *zcp) -{ - fletcher_init(zcp); - (void) abd_iterate_func(abd, 0, size, - fletcher_2_incremental_byteswap, zcp); -} - -/*ARGSUSED*/ -void -abd_fletcher_4_native(abd_t *abd, uint64_t size, - const void *ctx_template, zio_cksum_t *zcp) -{ - fletcher_init(zcp); - (void) abd_iterate_func(abd, 0, size, - fletcher_4_incremental_native, zcp); -} - -/*ARGSUSED*/ -void -abd_fletcher_4_byteswap(abd_t *abd, uint64_t size, - const void *ctx_template, zio_cksum_t *zcp) -{ - fletcher_init(zcp); - (void) abd_iterate_func(abd, 0, size, - fletcher_4_incremental_byteswap, zcp); -} - zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { {{NULL, NULL}, NULL, NULL, 0, "inherit"}, {{NULL, NULL}, NULL, NULL, 0, "on"}, - {{abd_checksum_off, abd_checksum_off}, + {{zio_checksum_off, zio_checksum_off}, NULL, NULL, 0, "off"}, - {{abd_checksum_SHA256, abd_checksum_SHA256}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED, "label"}, - {{abd_checksum_SHA256, abd_checksum_SHA256}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED, "gang_header"}, - {{abd_fletcher_2_native, abd_fletcher_2_byteswap}, + {{fletcher_2_native, fletcher_2_byteswap}, NULL, NULL, ZCHECKSUM_FLAG_EMBEDDED, "zilog"}, - {{abd_fletcher_2_native, abd_fletcher_2_byteswap}, + {{fletcher_2_native, fletcher_2_byteswap}, NULL, NULL, 0, "fletcher2"}, - {{abd_fletcher_4_native, abd_fletcher_4_byteswap}, + {{fletcher_4_native, fletcher_4_byteswap}, NULL, NULL, ZCHECKSUM_FLAG_METADATA, "fletcher4"}, - {{abd_checksum_SHA256, abd_checksum_SHA256}, + {{zio_checksum_SHA256, zio_checksum_SHA256}, NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP | ZCHECKSUM_FLAG_NOPWRITE, "sha256"}, - {{abd_fletcher_4_native, abd_fletcher_4_byteswap}, + {{fletcher_4_native, fletcher_4_byteswap}, NULL, NULL, ZCHECKSUM_FLAG_EMBEDDED, "zilog2"}, - {{abd_checksum_off, abd_checksum_off}, + {{zio_checksum_off, zio_checksum_off}, NULL, NULL, 0, "noparity"}, - {{abd_checksum_SHA512_native, abd_checksum_SHA512_byteswap}, + {{zio_checksum_SHA512_native, zio_checksum_SHA512_byteswap}, NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP | ZCHECKSUM_FLAG_NOPWRITE, "sha512"}, - {{abd_checksum_skein_native, abd_checksum_skein_byteswap}, - abd_checksum_skein_tmpl_init, abd_checksum_skein_tmpl_free, + {{zio_checksum_skein_native, zio_checksum_skein_byteswap}, + zio_checksum_skein_tmpl_init, zio_checksum_skein_tmpl_free, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP | ZCHECKSUM_FLAG_SALTED | ZCHECKSUM_FLAG_NOPWRITE, "skein"}, - {{abd_checksum_edonr_native, abd_checksum_edonr_byteswap}, - abd_checksum_edonr_tmpl_init, abd_checksum_edonr_tmpl_free, + {{zio_checksum_edonr_native, zio_checksum_edonr_byteswap}, + zio_checksum_edonr_tmpl_init, zio_checksum_edonr_tmpl_free, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_SALTED | ZCHECKSUM_FLAG_NOPWRITE, "edonr"}, }; @@ -292,7 +251,7 @@ zio_checksum_template_init(enum zio_checksum checksum, spa_t *spa) */ void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, - abd_t *abd, uint64_t size) + void *data, uint64_t size) { blkptr_t *bp = zio->io_bp; uint64_t offset = zio->io_offset; @@ -307,7 +266,6 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { zio_eck_t *eck; - void *data = abd_to_buf(abd); if (checksum == ZIO_CHECKSUM_ZILOG2) { zil_chain_t *zilc = data; @@ -325,18 +283,18 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, else bp->blk_cksum = eck->zec_cksum; eck->zec_magic = ZEC_MAGIC; - ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], + ci->ci_func[0](data, size, spa->spa_cksum_tmpls[checksum], &cksum); eck->zec_cksum = cksum; } else { - ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], + ci->ci_func[0](data, size, spa->spa_cksum_tmpls[checksum], &bp->blk_cksum); } } int zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, - abd_t *abd, uint64_t size, uint64_t offset, zio_bad_cksum_t *info) + void *data, uint64_t size, uint64_t offset, zio_bad_cksum_t *info) { zio_checksum_info_t *ci = &zio_checksum_table[checksum]; zio_cksum_t actual_cksum, expected_cksum; @@ -350,31 +308,25 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { zio_eck_t *eck; zio_cksum_t verifier; - uint64_t data_size = size; - void *data = abd_borrow_buf_copy(abd, data_size); if (checksum == ZIO_CHECKSUM_ZILOG2) { zil_chain_t *zilc = data; uint64_t nused; eck = &zilc->zc_eck; - if (eck->zec_magic == ZEC_MAGIC) { + if (eck->zec_magic == ZEC_MAGIC) nused = zilc->zc_nused; - } else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC)) { + else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC)) nused = BSWAP_64(zilc->zc_nused); - } else { - abd_return_buf(abd, data, data_size); + else return (SET_ERROR(ECKSUM)); - } - if (nused > data_size) { - abd_return_buf(abd, data, data_size); + if (nused > size) return (SET_ERROR(ECKSUM)); - } size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t); } else { - eck = (zio_eck_t *)((char *)data + data_size) - 1; + eck = (zio_eck_t *)((char *)data + size) - 1; } if (checksum == ZIO_CHECKSUM_GANG_HEADER) @@ -389,15 +341,11 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, if (byteswap) byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); - size_t eck_offset = (size_t)(&eck->zec_cksum) - (size_t)data; expected_cksum = eck->zec_cksum; eck->zec_cksum = verifier; - abd_return_buf_copy(abd, data, data_size); - - ci->ci_func[byteswap](abd, size, + ci->ci_func[byteswap](data, size, spa->spa_cksum_tmpls[checksum], &actual_cksum); - abd_copy_from_buf_off(abd, &expected_cksum, - eck_offset, sizeof (zio_cksum_t)); + eck->zec_cksum = expected_cksum; if (byteswap) { byteswap_uint64_array(&expected_cksum, @@ -406,7 +354,7 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, } else { byteswap = BP_SHOULD_BYTESWAP(bp); expected_cksum = bp->blk_cksum; - ci->ci_func[byteswap](abd, size, + ci->ci_func[byteswap](data, size, spa->spa_cksum_tmpls[checksum], &actual_cksum); } @@ -435,7 +383,7 @@ zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info) uint64_t size = (bp == NULL ? zio->io_size : (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp))); uint64_t offset = zio->io_offset; - abd_t *data = zio->io_abd; + void *data = zio->io_data; spa_t *spa = zio->io_spa; error = zio_checksum_error_impl(spa, bp, checksum, data, size, diff --git a/usr/src/uts/common/fs/zfs/zio_compress.c b/usr/src/uts/common/fs/zfs/zio_compress.c index 8d0a33de69..4e2d645572 100644 --- a/usr/src/uts/common/fs/zfs/zio_compress.c +++ b/usr/src/uts/common/fs/zfs/zio_compress.c @@ -25,7 +25,10 @@ */ /* * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. - * Copyright (c) 2013, 2016 by Delphix. All rights reserved. + */ + +/* + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -38,23 +41,24 @@ /* * Compression vectors. */ + zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = { - {"inherit", 0, NULL, NULL}, - {"on", 0, NULL, NULL}, - {"uncompressed", 0, NULL, NULL}, - {"lzjb", 0, lzjb_compress, lzjb_decompress}, - {"empty", 0, NULL, NULL}, - {"gzip-1", 1, gzip_compress, gzip_decompress}, - {"gzip-2", 2, gzip_compress, gzip_decompress}, - {"gzip-3", 3, gzip_compress, gzip_decompress}, - {"gzip-4", 4, gzip_compress, gzip_decompress}, - {"gzip-5", 5, gzip_compress, gzip_decompress}, - {"gzip-6", 6, gzip_compress, gzip_decompress}, - {"gzip-7", 7, gzip_compress, gzip_decompress}, - {"gzip-8", 8, gzip_compress, gzip_decompress}, - {"gzip-9", 9, gzip_compress, gzip_decompress}, - {"zle", 64, zle_compress, zle_decompress}, - {"lz4", 0, lz4_compress, lz4_decompress} + {NULL, NULL, 0, "inherit"}, + {NULL, NULL, 0, "on"}, + {NULL, NULL, 0, "uncompressed"}, + {lzjb_compress, lzjb_decompress, 0, "lzjb"}, + {NULL, NULL, 0, "empty"}, + {gzip_compress, gzip_decompress, 1, "gzip-1"}, + {gzip_compress, gzip_decompress, 2, "gzip-2"}, + {gzip_compress, gzip_decompress, 3, "gzip-3"}, + {gzip_compress, gzip_decompress, 4, "gzip-4"}, + {gzip_compress, gzip_decompress, 5, "gzip-5"}, + {gzip_compress, gzip_decompress, 6, "gzip-6"}, + {gzip_compress, gzip_decompress, 7, "gzip-7"}, + {gzip_compress, gzip_decompress, 8, "gzip-8"}, + {gzip_compress, gzip_decompress, 9, "gzip-9"}, + {zle_compress, zle_decompress, 64, "zle"}, + {lz4_compress, lz4_decompress, 0, "lz4"}, }; enum zio_compress @@ -81,21 +85,10 @@ zio_compress_select(spa_t *spa, enum zio_compress child, return (result); } -/*ARGSUSED*/ -static int -zio_compress_zeroed_cb(void *data, size_t len, void *private) -{ - uint64_t *end = (uint64_t *)((char *)data + len); - for (uint64_t *word = (uint64_t *)data; word < end; word++) - if (*word != 0) - return (1); - - return (0); -} - size_t -zio_compress_data(enum zio_compress c, abd_t *src, void *dst, size_t s_len) +zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len) { + uint64_t *word, *word_end; size_t c_len, d_len; zio_compress_info_t *ci = &zio_compress_table[c]; @@ -106,7 +99,12 @@ zio_compress_data(enum zio_compress c, abd_t *src, void *dst, size_t s_len) * If the data is all zeroes, we don't even need to allocate * a block for it. We indicate this by returning zero size. */ - if (abd_iterate_func(src, 0, s_len, zio_compress_zeroed_cb, NULL) == 0) + word_end = (uint64_t *)((char *)src + s_len); + for (word = src; word < word_end; word++) + if (*word != 0) + break; + + if (word == word_end) return (0); if (c == ZIO_COMPRESS_EMPTY) @@ -114,11 +112,7 @@ zio_compress_data(enum zio_compress c, abd_t *src, void *dst, size_t s_len) /* Compress at least 12.5% */ d_len = s_len - (s_len >> 3); - - /* No compression algorithms can read from ABDs directly */ - void *tmp = abd_borrow_buf_copy(src, s_len); - c_len = ci->ci_compress(tmp, dst, s_len, d_len, ci->ci_level); - abd_return_buf(src, tmp, s_len); + c_len = ci->ci_compress(src, dst, s_len, d_len, ci->ci_level); if (c_len > d_len) return (s_len); @@ -128,23 +122,13 @@ zio_compress_data(enum zio_compress c, abd_t *src, void *dst, size_t s_len) } int -zio_decompress_data_buf(enum zio_compress c, void *src, void *dst, +zio_decompress_data(enum zio_compress c, void *src, void *dst, size_t s_len, size_t d_len) { zio_compress_info_t *ci = &zio_compress_table[c]; + if ((uint_t)c >= ZIO_COMPRESS_FUNCTIONS || ci->ci_decompress == NULL) return (SET_ERROR(EINVAL)); return (ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level)); } - -int -zio_decompress_data(enum zio_compress c, abd_t *src, void *dst, - size_t s_len, size_t d_len) -{ - void *tmp = abd_borrow_buf_copy(src, s_len); - int ret = zio_decompress_data_buf(c, tmp, dst, s_len, d_len); - abd_return_buf(src, tmp, s_len); - - return (ret); -} -- cgit v1.2.3