summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGvozden Neskovic <neskovic@gmail.com>2020-06-01 06:10:07 -0600
committerJerry Jelinek <jerry.jelinek@joyent.com>2020-06-01 15:05:58 -0600
commite86372a01d2d16a5dd4a64e144ed978ba17fe7dd (patch)
treebc899b99b0409baebbf09e9e392997a7a8596543
parent82049ff560eed6fbdf4cf222d894467f5809f9b3 (diff)
downloadillumos-joyent-e86372a01d2d16a5dd4a64e144ed978ba17fe7dd.tar.gz
12668 ZFS support for vectorized algorithms on x86 (initial support)
Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed by: Jason King <jason.king@joyent.com> Approved by: Dan McDonald <danmcd@joyent.com>
-rw-r--r--exception_lists/check_rtime1
-rw-r--r--usr/src/cmd/Makefile3
-rw-r--r--usr/src/cmd/raidz_test/Makefile61
-rw-r--r--usr/src/cmd/raidz_test/raidz_bench.c228
-rw-r--r--usr/src/cmd/raidz_test/raidz_test.c761
-rw-r--r--usr/src/cmd/raidz_test/raidz_test.h117
-rw-r--r--usr/src/pkg/manifests/system-file-system-zfs-tests.mf2
-rw-r--r--usr/src/pkg/manifests/system-test-zfstest.mf5
-rw-r--r--usr/src/test/zfs-tests/include/commands.cfg3
-rw-r--r--usr/src/test/zfs-tests/runfiles/delphix.run3
-rw-r--r--usr/src/test/zfs-tests/runfiles/omnios.run3
-rw-r--r--usr/src/test/zfs-tests/runfiles/openindiana.run3
-rw-r--r--usr/src/test/zfs-tests/runfiles/smartos.run3
-rw-r--r--usr/src/test/zfs-tests/tests/functional/raidz/Makefile21
-rwxr-xr-xusr/src/test/zfs-tests/tests/functional/raidz/cleanup.ksh30
-rwxr-xr-xusr/src/test/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh38
-rwxr-xr-xusr/src/test/zfs-tests/tests/functional/raidz/raidz_002_pos.ksh41
-rwxr-xr-xusr/src/test/zfs-tests/tests/functional/raidz/setup.ksh32
-rw-r--r--usr/src/uts/common/Makefile.files2
-rw-r--r--usr/src/uts/common/fs/zfs/abd.c181
-rw-r--r--usr/src/uts/common/fs/zfs/spa_misc.c3
-rw-r--r--usr/src/uts/common/fs/zfs/sys/abd.h9
-rw-r--r--usr/src/uts/common/fs/zfs/sys/simd.h40
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_raidz.h65
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_raidz_impl.h351
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_raidz.c265
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_raidz_math.c571
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_raidz_math_impl.h1477
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_raidz_math_scalar.c337
29 files changed, 4464 insertions, 192 deletions
diff --git a/exception_lists/check_rtime b/exception_lists/check_rtime
index 42964957d4..43978cdbd6 100644
--- a/exception_lists/check_rtime
+++ b/exception_lists/check_rtime
@@ -231,6 +231,7 @@ FORBIDDEN libfakekernel\.so
FORBIDDEN_DEP usr/MACH(lib)/libzpool.so.1
FORBIDDEN_DEP usr/bin/amd64/ztest
FORBIDDEN_DEP usr/bin/i86/ztest
+FORBIDDEN_DEP usr/bin/raidz_test
FORBIDDEN_DEP usr/bin/sparcv7/ztest
FORBIDDEN_DEP usr/bin/sparcv9/ztest
FORBIDDEN_DEP usr/lib/MACH(smbfs)/libfknsmb.so.1
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index 0f2cc306aa..7e0a681cd6 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -21,7 +21,7 @@
#
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2019 Joyent, Inc.
+# Copyright 2020 Joyent, Inc.
# Copyright (c) 2012, 2015 by Delphix. All rights reserved.
# Copyright (c) 2013 DEY Storage Systems, Inc. All rights reserved.
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
@@ -346,6 +346,7 @@ COMMON_SUBDIRS= \
pwd \
pyzfs \
raidctl \
+ raidz_test \
ramdiskadm \
rcap \
rcm_daemon \
diff --git a/usr/src/cmd/raidz_test/Makefile b/usr/src/cmd/raidz_test/Makefile
new file mode 100644
index 0000000000..43e0c07829
--- /dev/null
+++ b/usr/src/cmd/raidz_test/Makefile
@@ -0,0 +1,61 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+include ../Makefile.cmd
+include ../Makefile.cmd.64
+
+PROG= raidz_test
+OBJS= raidz_test.o raidz_bench.o
+SRCS= $(OBJS:%.o=%.c)
+POFILES= $(PROG:%=%.po)
+
+# No msg catalog here.
+POFILE=
+
+LDLIBS += -lzpool -lfakekernel -lumem
+
+INCS += -I../../lib/libzpool/common
+INCS += -I../../uts/common/fs/zfs
+
+CPPFLAGS.first = -I$(SRC)/lib/libfakekernel/common -D_FAKE_KERNEL
+CPPFLAGS += -D_LARGEFILE64_SOURCE=1
+CPPFLAGS += $(INCS)
+
+CSTD = $(CSTD_GNU99)
+
+CERRWARN += -_gcc=-Wno-type-limits
+
+SMATCH=off
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) -o $(PROG) $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
+install: all $(ROOTPROG)
+
+clean:
+ $(RM) $(OBJS)
+
+_msg: $(MSGDOMAIN) $(POFILES)
+ $(CP) $(POFILES) $(MSGDOMAIN)
+
+$(MSGDOMAIN):
+ $(INS.dir)
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/raidz_test/raidz_bench.c b/usr/src/cmd/raidz_test/raidz_bench.c
new file mode 100644
index 0000000000..9dc22af6fd
--- /dev/null
+++ b/usr/src/cmd/raidz_test/raidz_bench.c
@@ -0,0 +1,228 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/zio.h>
+#include <sys/vdev_raidz.h>
+#include <sys/vdev_raidz_impl.h>
+#include <stdio.h>
+#include <strings.h>
+
+#include <sys/time.h>
+
+#include "raidz_test.h"
+
+#define GEN_BENCH_MEMORY (((uint64_t)1ULL)<<32)
+#define REC_BENCH_MEMORY (((uint64_t)1ULL)<<29)
+#define BENCH_ASHIFT 12
+#define MIN_CS_SHIFT BENCH_ASHIFT
+#define MAX_CS_SHIFT SPA_MAXBLOCKSHIFT
+
+static zio_t zio_bench;
+static raidz_map_t *rm_bench;
+static size_t max_data_size = SPA_MAXBLOCKSIZE;
+
+static void
+bench_init_raidz_map(void)
+{
+ zio_bench.io_offset = 0;
+ zio_bench.io_size = max_data_size;
+
+ /*
+ * To permit larger column sizes these have to be done
+ * allocated using aligned alloc instead of zio_abd_buf_alloc
+ */
+ zio_bench.io_abd = raidz_alloc(max_data_size);
+
+ init_zio_abd(&zio_bench);
+}
+
+static void
+bench_fini_raidz_maps(void)
+{
+ /* tear down golden zio */
+ raidz_free(zio_bench.io_abd, max_data_size);
+ bzero(&zio_bench, sizeof (zio_t));
+}
+
+static inline void
+run_gen_bench_impl(const char *impl)
+{
+ int fn, ncols;
+ uint64_t ds, iter_cnt, iter, disksize;
+ hrtime_t start;
+ double elapsed, d_bw;
+
+ /* Benchmark generate functions */
+ for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
+
+ for (ds = MIN_CS_SHIFT; ds <= MAX_CS_SHIFT; ds++) {
+ /* create suitable raidz_map */
+ ncols = rto_opts.rto_dcols + fn + 1;
+ zio_bench.io_size = 1ULL << ds;
+ rm_bench = vdev_raidz_map_alloc(&zio_bench,
+ BENCH_ASHIFT, ncols, fn+1);
+
+ /* estimate iteration count */
+ iter_cnt = GEN_BENCH_MEMORY;
+ iter_cnt /= zio_bench.io_size;
+
+ start = gethrtime();
+ for (iter = 0; iter < iter_cnt; iter++)
+ vdev_raidz_generate_parity(rm_bench);
+ elapsed = NSEC2SEC((double)(gethrtime() - start));
+
+ disksize = (1ULL << ds) / rto_opts.rto_dcols;
+ d_bw = (double)iter_cnt * (double)disksize;
+ d_bw /= (1024.0 * 1024.0 * elapsed);
+
+ LOG(D_ALL, "%10s, %8s, %zu, %10llu, %lf, %lf, %u\n",
+ impl,
+ raidz_gen_name[fn],
+ rto_opts.rto_dcols,
+ (1ULL<<ds),
+ d_bw,
+ d_bw * (double)(ncols),
+ (unsigned)iter_cnt);
+
+ vdev_raidz_map_free(rm_bench);
+ }
+ }
+}
+
+void
+run_gen_bench(void)
+{
+ char **impl_name;
+
+ LOG(D_INFO, DBLSEP "\nBenchmarking parity generation...\n\n");
+ LOG(D_ALL, "impl, math, dcols, iosize, disk_bw, total_bw, iter\n");
+
+ for (impl_name = (char **)raidz_impl_names; *impl_name != NULL;
+ impl_name++) {
+
+ if (vdev_raidz_impl_set(*impl_name) != 0)
+ continue;
+
+ run_gen_bench_impl(*impl_name);
+ }
+}
+
+static void
+run_rec_bench_impl(const char *impl)
+{
+ int fn, ncols, nbad;
+ uint64_t ds, iter_cnt, iter, disksize;
+ hrtime_t start;
+ double elapsed, d_bw;
+ static const int tgt[7][3] = {
+ {1, 2, 3}, /* rec_p: bad QR & D[0] */
+ {0, 2, 3}, /* rec_q: bad PR & D[0] */
+ {0, 1, 3}, /* rec_r: bad PQ & D[0] */
+ {2, 3, 4}, /* rec_pq: bad R & D[0][1] */
+ {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
+ {0, 3, 4}, /* rec_qr: bad P & D[0][1] */
+ {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
+ };
+
+ for (fn = 0; fn < RAIDZ_REC_NUM; fn++) {
+ for (ds = MIN_CS_SHIFT; ds <= MAX_CS_SHIFT; ds++) {
+
+ /* create suitable raidz_map */
+ ncols = rto_opts.rto_dcols + PARITY_PQR;
+ zio_bench.io_size = 1ULL << ds;
+
+ /*
+ * raidz block is too short to test
+ * the requested method
+ */
+ if (zio_bench.io_size / rto_opts.rto_dcols <
+ (1ULL << BENCH_ASHIFT))
+ continue;
+
+ rm_bench = vdev_raidz_map_alloc(&zio_bench,
+ BENCH_ASHIFT, ncols, PARITY_PQR);
+
+ /* estimate iteration count */
+ iter_cnt = (REC_BENCH_MEMORY);
+ iter_cnt /= zio_bench.io_size;
+
+ /* calculate how many bad columns there are */
+ nbad = MIN(3, raidz_ncols(rm_bench) -
+ raidz_parity(rm_bench));
+
+ start = gethrtime();
+ for (iter = 0; iter < iter_cnt; iter++)
+ vdev_raidz_reconstruct(rm_bench, tgt[fn], nbad);
+ elapsed = NSEC2SEC((double)(gethrtime() - start));
+
+ disksize = (1ULL << ds) / rto_opts.rto_dcols;
+ d_bw = (double)iter_cnt * (double)(disksize);
+ d_bw /= (1024.0 * 1024.0 * elapsed);
+
+ LOG(D_ALL, "%10s, %8s, %zu, %10llu, %lf, %lf, %u\n",
+ impl,
+ raidz_rec_name[fn],
+ rto_opts.rto_dcols,
+ (1ULL<<ds),
+ d_bw,
+ d_bw * (double)ncols,
+ (unsigned)iter_cnt);
+
+ vdev_raidz_map_free(rm_bench);
+ }
+ }
+}
+
+void
+run_rec_bench(void)
+{
+ char **impl_name;
+
+ LOG(D_INFO, DBLSEP "\nBenchmarking data reconstruction...\n\n");
+ LOG(D_ALL, "impl, math, dcols, iosize, disk_bw, total_bw, iter\n");
+
+ for (impl_name = (char **)raidz_impl_names; *impl_name != NULL;
+ impl_name++) {
+
+ if (vdev_raidz_impl_set(*impl_name) != 0)
+ continue;
+
+ run_rec_bench_impl(*impl_name);
+ }
+}
+
+void
+run_raidz_benchmark(void)
+{
+ bench_init_raidz_map();
+
+ run_gen_bench();
+ run_rec_bench();
+
+ bench_fini_raidz_maps();
+}
diff --git a/usr/src/cmd/raidz_test/raidz_test.c b/usr/src/cmd/raidz_test/raidz_test.c
new file mode 100644
index 0000000000..8d025b479d
--- /dev/null
+++ b/usr/src/cmd/raidz_test/raidz_test.c
@@ -0,0 +1,761 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ * Copyright 2020 Joyent, Inc.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/zio.h>
+#include <umem.h>
+#include <sys/vdev_raidz.h>
+#include <sys/vdev_raidz_impl.h>
+#include <assert.h>
+#include <stdio.h>
+#include <strings.h>
+#include <unistd.h>
+#include "raidz_test.h"
+
+static int *rand_data;
+raidz_test_opts_t rto_opts;
+
+static char gdb[256];
+static const char gdb_tmpl[] = "gdb -ex \"set pagination 0\" -p %d";
+
+#define boot_ncpus (sysconf(_SC_NPROCESSORS_ONLN))
+
+static void print_opts(raidz_test_opts_t *opts, boolean_t force)
+{
+ char *verbose;
+ switch (opts->rto_v) {
+ case 0:
+ verbose = "no";
+ break;
+ case 1:
+ verbose = "info";
+ break;
+ default:
+ verbose = "debug";
+ break;
+ }
+
+ if (force || opts->rto_v >= D_INFO) {
+ (void) fprintf(stdout, DBLSEP "Running with options:\n"
+ " (-a) zio ashift : %zu\n"
+ " (-o) zio offset : 1 << %zu\n"
+ " (-d) number of raidz data columns : %zu\n"
+ " (-s) size of DATA : 1 << %zu\n"
+ " (-S) sweep parameters : %s \n"
+ " (-v) verbose : %s \n\n",
+ opts->rto_ashift, /* -a */
+ ilog2(opts->rto_offset), /* -o */
+ opts->rto_dcols, /* -d */
+ ilog2(opts->rto_dsize), /* -s */
+ opts->rto_sweep ? "yes" : "no", /* -S */
+ verbose); /* -v */
+ }
+}
+
+static void usage(boolean_t requested)
+{
+ const raidz_test_opts_t *o = &rto_opts_defaults;
+
+ FILE *fp = requested ? stdout : stderr;
+
+ (void) fprintf(fp, "Usage:\n"
+ "\t[-a zio ashift (default: %zu)]\n"
+ "\t[-o zio offset, exponent radix 2 (default: %zu)]\n"
+ "\t[-d number of raidz data columns (default: %zu)]\n"
+ "\t[-s zio size, exponent radix 2 (default: %zu)]\n"
+ "\t[-S parameter sweep (default: %s)]\n"
+ "\t[-t timeout for parameter sweep test]\n"
+ "\t[-B benchmark all raidz implementations]\n"
+ "\t[-v increase verbosity (default: %zu)]\n"
+ "\t[-h (print help)]\n"
+ "\t[-T test the test, see if failure would be detected]\n"
+ "\t[-D debug (attach gdb on SIGSEGV)]\n"
+ "",
+ o->rto_ashift, /* -a */
+ ilog2(o->rto_offset), /* -o */
+ o->rto_dcols, /* -d */
+ ilog2(o->rto_dsize), /* -s */
+ rto_opts.rto_sweep ? "yes" : "no", /* -S */
+ o->rto_v); /* -d */
+
+ exit(requested ? 0 : 1);
+}
+
+static void process_options(int argc, char **argv)
+{
+ size_t value;
+ int opt;
+
+ raidz_test_opts_t *o = &rto_opts;
+
+ bcopy(&rto_opts_defaults, o, sizeof (*o));
+
+ while ((opt = getopt(argc, argv, "TDBSvha:o:d:s:t:")) != -1) {
+ value = 0;
+
+ switch (opt) {
+ case 'a':
+ value = strtoull(optarg, NULL, 0);
+ o->rto_ashift = MIN(13, MAX(9, value));
+ break;
+ case 'o':
+ value = strtoull(optarg, NULL, 0);
+ o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9;
+ break;
+ case 'd':
+ value = strtoull(optarg, NULL, 0);
+ o->rto_dcols = MIN(255, MAX(1, value));
+ break;
+ case 's':
+ value = strtoull(optarg, NULL, 0);
+ o->rto_dsize = 1ULL << MIN(SPA_MAXBLOCKSHIFT,
+ MAX(SPA_MINBLOCKSHIFT, value));
+ break;
+ case 't':
+ value = strtoull(optarg, NULL, 0);
+ o->rto_sweep_timeout = value;
+ break;
+ case 'v':
+ o->rto_v++;
+ break;
+ case 'S':
+ o->rto_sweep = 1;
+ break;
+ case 'B':
+ o->rto_benchmark = 1;
+ break;
+ case 'D':
+ o->rto_gdb = 1;
+ break;
+ case 'T':
+ o->rto_sanity = 1;
+ break;
+ case 'h':
+ usage(B_TRUE);
+ break;
+ case '?':
+ default:
+ usage(B_FALSE);
+ break;
+ }
+ }
+}
+
+#define DATA_COL(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_abd)
+#define DATA_COL_SIZE(rm, i) ((rm)->rm_col[raidz_parity(rm) + (i)].rc_size)
+
+#define CODE_COL(rm, i) ((rm)->rm_col[(i)].rc_abd)
+#define CODE_COL_SIZE(rm, i) ((rm)->rm_col[(i)].rc_size)
+
+static int
+cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
+{
+ int i, ret = 0;
+
+ VERIFY(parity >= 1 && parity <= 3);
+
+ for (i = 0; i < parity; i++) {
+ if (abd_cmp(CODE_COL(rm, i), CODE_COL(opts->rm_golden, i),
+ CODE_COL(rm, i)->abd_size) != 0) {
+ ret++;
+ LOG_OPT(D_DEBUG, opts,
+ "\nParity block [%d] different!\n", i);
+ }
+ }
+ return (ret);
+}
+
+static int
+cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
+{
+ int i, ret = 0;
+ int dcols = opts->rm_golden->rm_cols - raidz_parity(opts->rm_golden);
+
+ for (i = 0; i < dcols; i++) {
+ if (abd_cmp(DATA_COL(opts->rm_golden, i), DATA_COL(rm, i),
+ DATA_COL(opts->rm_golden, i)->abd_size) != 0) {
+ ret++;
+
+ LOG_OPT(D_DEBUG, opts,
+ "\nData block [%d] different!\n", i);
+ }
+ }
+ return (ret);
+}
+
+static int
+init_rand(void *data, size_t size, void *private)
+{
+ int i;
+ int *dst = (int *)data;
+
+ for (i = 0; i < size / sizeof (int); i++)
+ dst[i] = rand_data[i];
+
+ return (0);
+}
+
+static void
+corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
+{
+ int i;
+ raidz_col_t *col;
+
+ for (i = 0; i < cnt; i++) {
+ col = &rm->rm_col[tgts[i]];
+ (void) abd_iterate_func(col->rc_abd, 0, col->rc_size,
+ init_rand, NULL);
+ }
+}
+
+void
+init_zio_abd(zio_t *zio)
+{
+ (void) abd_iterate_func(zio->io_abd, 0, zio->io_size, init_rand, NULL);
+}
+
+static void
+fini_raidz_map(zio_t **zio, raidz_map_t **rm)
+{
+ vdev_raidz_map_free(*rm);
+ raidz_free((*zio)->io_abd, (*zio)->io_size);
+ umem_free(*zio, sizeof (zio_t));
+
+ *zio = NULL;
+ *rm = NULL;
+}
+
+static int
+init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
+{
+ int err = 0;
+ zio_t *zio_test;
+ raidz_map_t *rm_test;
+ const size_t total_ncols = opts->rto_dcols + parity;
+
+ if (opts->rm_golden) {
+ fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
+ }
+
+ opts->zio_golden = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
+ zio_test = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
+
+ opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset;
+ opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize;
+
+ opts->zio_golden->io_abd = raidz_alloc(opts->rto_dsize);
+ zio_test->io_abd = raidz_alloc(opts->rto_dsize);
+
+ init_zio_abd(opts->zio_golden);
+ init_zio_abd(zio_test);
+
+ VERIFY0(vdev_raidz_impl_set("original"));
+
+ opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
+ opts->rto_ashift, total_ncols, parity);
+ rm_test = vdev_raidz_map_alloc(zio_test,
+ opts->rto_ashift, total_ncols, parity);
+
+ VERIFY(opts->zio_golden);
+ VERIFY(opts->rm_golden);
+
+ vdev_raidz_generate_parity(opts->rm_golden);
+ vdev_raidz_generate_parity(rm_test);
+
+ /* sanity check */
+ err |= cmp_data(opts, rm_test);
+ err |= cmp_code(opts, rm_test, parity);
+
+ if (err)
+ ERRMSG("initializing the golden copy ... [FAIL]!\n");
+
+ /* tear down raidz_map of test zio */
+ fini_raidz_map(&zio_test, &rm_test);
+
+ return (err);
+}
+
+static raidz_map_t *
+init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
+{
+ raidz_map_t *rm = NULL;
+ const size_t alloc_dsize = opts->rto_dsize;
+ const size_t total_ncols = opts->rto_dcols + parity;
+ const int ccols[] = { 0, 1, 2 };
+
+ VERIFY(zio);
+ VERIFY(parity <= 3 && parity >= 1);
+
+ *zio = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
+
+ (*zio)->io_offset = 0;
+ (*zio)->io_size = alloc_dsize;
+ (*zio)->io_abd = raidz_alloc(alloc_dsize);
+ init_zio_abd(*zio);
+
+ rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
+ total_ncols, parity);
+ VERIFY(rm);
+
+ /* Make sure code columns are destroyed */
+ corrupt_colums(rm, ccols, parity);
+
+ return (rm);
+}
+
+static int
+run_gen_check(raidz_test_opts_t *opts)
+{
+ char **impl_name;
+ int fn, err = 0;
+ zio_t *zio_test;
+ raidz_map_t *rm_test;
+
+ err = init_raidz_golden_map(opts, PARITY_PQR);
+ if (0 != err)
+ return (err);
+
+ LOG(D_INFO, DBLSEP);
+ LOG(D_INFO, "Testing parity generation...\n");
+
+ for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
+ impl_name++) {
+
+ LOG(D_INFO, SEP);
+ LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
+
+ if (0 != vdev_raidz_impl_set(*impl_name)) {
+ LOG(D_INFO, "[SKIP]\n");
+ continue;
+ } else {
+ LOG(D_INFO, "[SUPPORTED]\n");
+ }
+
+ for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
+
+ /* Check if should stop */
+ if (rto_opts.rto_should_stop)
+ return (err);
+
+ /* create suitable raidz_map */
+ rm_test = init_raidz_map(opts, &zio_test, fn+1);
+ VERIFY(rm_test);
+
+ LOG(D_INFO, "\t\tTesting method [%s] ...",
+ raidz_gen_name[fn]);
+
+ if (!opts->rto_sanity)
+ vdev_raidz_generate_parity(rm_test);
+
+ if (cmp_code(opts, rm_test, fn+1) != 0) {
+ LOG(D_INFO, "[FAIL]\n");
+ err++;
+ } else
+ LOG(D_INFO, "[PASS]\n");
+
+ fini_raidz_map(&zio_test, &rm_test);
+ }
+ }
+
+ fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
+
+ return (err);
+}
+
+static int
+run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
+{
+ int x0, x1, x2;
+ int tgtidx[3];
+ int err = 0;
+ static const int rec_tgts[7][3] = {
+ {1, 2, 3}, /* rec_p: bad QR & D[0] */
+ {0, 2, 3}, /* rec_q: bad PR & D[0] */
+ {0, 1, 3}, /* rec_r: bad PQ & D[0] */
+ {2, 3, 4}, /* rec_pq: bad R & D[0][1] */
+ {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
+ {0, 3, 4}, /* rec_qr: bad P & D[0][1] */
+ {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
+ };
+
+ memcpy(tgtidx, rec_tgts[fn], sizeof (tgtidx));
+
+ if (fn < RAIDZ_REC_PQ) {
+ /* can reconstruct 1 failed data disk */
+ for (x0 = 0; x0 < opts->rto_dcols; x0++) {
+ if (x0 >= rm->rm_cols - raidz_parity(rm))
+ continue;
+
+ /* Check if should stop */
+ if (rto_opts.rto_should_stop)
+ return (err);
+
+ LOG(D_DEBUG, "[%d] ", x0);
+
+ tgtidx[2] = x0 + raidz_parity(rm);
+
+ corrupt_colums(rm, tgtidx+2, 1);
+
+ if (!opts->rto_sanity)
+ (void) vdev_raidz_reconstruct(rm, tgtidx, 3);
+
+ if (cmp_data(opts, rm) != 0) {
+ err++;
+ LOG(D_DEBUG, "\nREC D[%d]... [FAIL]\n", x0);
+ }
+ }
+
+ } else if (fn < RAIDZ_REC_PQR) {
+ /* can reconstruct 2 failed data disk */
+ for (x0 = 0; x0 < opts->rto_dcols; x0++) {
+ if (x0 >= rm->rm_cols - raidz_parity(rm))
+ continue;
+ for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
+ if (x1 >= rm->rm_cols - raidz_parity(rm))
+ continue;
+
+ /* Check if should stop */
+ if (rto_opts.rto_should_stop)
+ return (err);
+
+ LOG(D_DEBUG, "[%d %d] ", x0, x1);
+
+ tgtidx[1] = x0 + raidz_parity(rm);
+ tgtidx[2] = x1 + raidz_parity(rm);
+
+ corrupt_colums(rm, tgtidx+1, 2);
+
+ if (!opts->rto_sanity)
+ (void) vdev_raidz_reconstruct(rm,
+ tgtidx, 3);
+
+ if (cmp_data(opts, rm) != 0) {
+ err++;
+ LOG(D_DEBUG, "\nREC D[%d %d]... "
+ "[FAIL]\n", x0, x1);
+ }
+ }
+ }
+ } else {
+ /* can reconstruct 3 failed data disk */
+ for (x0 = 0; x0 < opts->rto_dcols; x0++) {
+ if (x0 >= rm->rm_cols - raidz_parity(rm))
+ continue;
+ for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
+ if (x1 >= rm->rm_cols - raidz_parity(rm))
+ continue;
+ for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) {
+ if (x2 >=
+ rm->rm_cols - raidz_parity(rm))
+ continue;
+
+ /* Check if should stop */
+ if (rto_opts.rto_should_stop)
+ return (err);
+
+ LOG(D_DEBUG, "[%d %d %d]", x0, x1, x2);
+
+ tgtidx[0] = x0 + raidz_parity(rm);
+ tgtidx[1] = x1 + raidz_parity(rm);
+ tgtidx[2] = x2 + raidz_parity(rm);
+
+ corrupt_colums(rm, tgtidx, 3);
+
+ if (!opts->rto_sanity)
+ (void) vdev_raidz_reconstruct(
+ rm, tgtidx, 3);
+
+ if (cmp_data(opts, rm) != 0) {
+ err++;
+ LOG(D_DEBUG,
+ "\nREC D[%d %d %d]... "
+ "[FAIL]\n", x0, x1, x2);
+ }
+ }
+ }
+ }
+ }
+ return (err);
+}
+
+static int
+run_rec_check(raidz_test_opts_t *opts)
+{
+ char **impl_name;
+ unsigned fn, err = 0;
+ zio_t *zio_test;
+ raidz_map_t *rm_test;
+
+ err = init_raidz_golden_map(opts, PARITY_PQR);
+ if (0 != err)
+ return (err);
+
+ LOG(D_INFO, DBLSEP);
+ LOG(D_INFO, "Testing data reconstruction...\n");
+
+ for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
+ impl_name++) {
+
+ LOG(D_INFO, SEP);
+ LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
+
+ if (vdev_raidz_impl_set(*impl_name) != 0) {
+ LOG(D_INFO, "[SKIP]\n");
+ continue;
+ } else
+ LOG(D_INFO, "[SUPPORTED]\n");
+
+
+ /* create suitable raidz_map */
+ rm_test = init_raidz_map(opts, &zio_test, PARITY_PQR);
+ /* generate parity */
+ vdev_raidz_generate_parity(rm_test);
+
+ for (fn = 0; fn < RAIDZ_REC_NUM; fn++) {
+
+ LOG(D_INFO, "\t\tTesting method [%s] ...",
+ raidz_rec_name[fn]);
+
+ if (run_rec_check_impl(opts, rm_test, fn) != 0) {
+ LOG(D_INFO, "[FAIL]\n");
+ err++;
+
+ } else
+ LOG(D_INFO, "[PASS]\n");
+
+ }
+ /* tear down test raidz_map */
+ fini_raidz_map(&zio_test, &rm_test);
+ }
+
+ fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
+
+ return (err);
+}
+
+static int
+run_test(raidz_test_opts_t *opts)
+{
+ int err = 0;
+
+ if (opts == NULL)
+ opts = &rto_opts;
+
+ print_opts(opts, B_FALSE);
+
+ err |= run_gen_check(opts);
+ err |= run_rec_check(opts);
+
+ return (err);
+}
+
+#define SWEEP_RUNNING 0
+#define SWEEP_FINISHED 1
+#define SWEEP_ERROR 2
+#define SWEEP_TIMEOUT 3
+
+static int sweep_state = 0;
+static raidz_test_opts_t failed_opts;
+
+static kmutex_t sem_mtx;
+static kcondvar_t sem_cv;
+static int max_free_slots;
+static int free_slots;
+
+static void
+sweep_thread(void *arg)
+{
+ int err = 0;
+ raidz_test_opts_t *opts = (raidz_test_opts_t *)arg;
+ VERIFY(opts != NULL);
+
+ err = run_test(opts);
+
+ if (rto_opts.rto_sanity) {
+ /* 25% chance that a sweep test fails */
+ if (rand() < (RAND_MAX/4))
+ err = 1;
+ }
+
+ if (0 != err) {
+ mutex_enter(&sem_mtx);
+ memcpy(&failed_opts, opts, sizeof (raidz_test_opts_t));
+ sweep_state = SWEEP_ERROR;
+ mutex_exit(&sem_mtx);
+ }
+
+ umem_free(opts, sizeof (raidz_test_opts_t));
+
+ /* signal the next thread */
+ mutex_enter(&sem_mtx);
+ free_slots++;
+ cv_signal(&sem_cv);
+ mutex_exit(&sem_mtx);
+
+ thread_exit();
+}
+
+static int
+run_sweep(void)
+{
+ static const size_t dcols_v[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 };
+ static const size_t ashift_v[] = { 9, 12, 14 };
+ static const size_t size_v[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12),
+ 1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE };
+
+ (void) setvbuf(stdout, NULL, _IONBF, 0);
+
+ ulong_t total_comb = ARRAY_SIZE(size_v) * ARRAY_SIZE(ashift_v) *
+ ARRAY_SIZE(dcols_v);
+ ulong_t tried_comb = 0;
+ hrtime_t time_diff, start_time = gethrtime();
+ raidz_test_opts_t *opts;
+ int a, d, s;
+
+ max_free_slots = free_slots = MAX(2, boot_ncpus);
+
+ mutex_init(&sem_mtx, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&sem_cv, NULL, CV_DEFAULT, NULL);
+
+ for (s = 0; s < ARRAY_SIZE(size_v); s++)
+ for (a = 0; a < ARRAY_SIZE(ashift_v); a++)
+ for (d = 0; d < ARRAY_SIZE(dcols_v); d++) {
+
+ if (size_v[s] < (1 << ashift_v[a])) {
+ total_comb--;
+ continue;
+ }
+
+ if (++tried_comb % 20 == 0)
+ LOG(D_ALL, "%lu/%lu... ", tried_comb, total_comb);
+
+ /* wait for signal to start new thread */
+ mutex_enter(&sem_mtx);
+ while (cv_timedwait_sig(&sem_cv, &sem_mtx,
+ ddi_get_lbolt() + hz)) {
+
+ /* check if should stop the test (timeout) */
+ time_diff = (gethrtime() - start_time) / NANOSEC;
+ if (rto_opts.rto_sweep_timeout > 0 &&
+ time_diff >= rto_opts.rto_sweep_timeout) {
+ sweep_state = SWEEP_TIMEOUT;
+ rto_opts.rto_should_stop = B_TRUE;
+ mutex_exit(&sem_mtx);
+ goto exit;
+ }
+
+ /* check if should stop the test (error) */
+ if (sweep_state != SWEEP_RUNNING) {
+ mutex_exit(&sem_mtx);
+ goto exit;
+ }
+
+ /* exit loop if a slot is available */
+ if (free_slots > 0) {
+ break;
+ }
+ }
+
+ free_slots--;
+ mutex_exit(&sem_mtx);
+
+ opts = umem_zalloc(sizeof (raidz_test_opts_t), UMEM_NOFAIL);
+ opts->rto_ashift = ashift_v[a];
+ opts->rto_dcols = dcols_v[d];
+ opts->rto_offset = (1 << ashift_v[a]) * rand();
+ opts->rto_dsize = size_v[s];
+ opts->rto_v = 0; /* be quiet */
+
+ VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts,
+ 0, NULL, TS_RUN, maxclsyspri), !=, NULL);
+ }
+
+exit:
+ LOG(D_ALL, "\nWaiting for test threads to finish...\n");
+ mutex_enter(&sem_mtx);
+ VERIFY(free_slots <= max_free_slots);
+ while (free_slots < max_free_slots) {
+ (void) cv_wait(&sem_cv, &sem_mtx);
+ }
+ mutex_exit(&sem_mtx);
+
+ if (sweep_state == SWEEP_ERROR) {
+ ERRMSG("Sweep test failed! Failed option: \n");
+ print_opts(&failed_opts, B_TRUE);
+ } else {
+ if (sweep_state == SWEEP_TIMEOUT)
+ LOG(D_ALL, "Test timeout (%lus). Stopping...\n",
+ (ulong_t)rto_opts.rto_sweep_timeout);
+
+ LOG(D_ALL, "Sweep test succeeded on %lu raidz maps!\n",
+ (ulong_t)tried_comb);
+ }
+
+ mutex_destroy(&sem_mtx);
+
+ return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0);
+}
+
+int
+main(int argc, char **argv)
+{
+ size_t i;
+ int err = 0;
+
+ /* init gdb string early */
+ (void) sprintf(gdb, gdb_tmpl, getpid());
+
+ (void) setvbuf(stdout, NULL, _IOLBF, 0);
+
+ dprintf_setup(&argc, argv);
+
+ process_options(argc, argv);
+
+ kernel_init(FREAD);
+
+ /* setup random data because rand() is not reentrant */
+ rand_data = (int *)umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
+ srand((unsigned)time(NULL) * getpid());
+ for (i = 0; i < SPA_MAXBLOCKSIZE / sizeof (int); i++)
+ rand_data[i] = rand();
+
+ mprotect((void *)rand_data, SPA_MAXBLOCKSIZE, PROT_READ);
+
+ if (rto_opts.rto_benchmark) {
+ run_raidz_benchmark();
+ } else if (rto_opts.rto_sweep) {
+ err = run_sweep();
+ } else {
+ err = run_test(NULL);
+ }
+
+ umem_free(rand_data, SPA_MAXBLOCKSIZE);
+ kernel_fini();
+
+ return (err);
+}
diff --git a/usr/src/cmd/raidz_test/raidz_test.h b/usr/src/cmd/raidz_test/raidz_test.h
new file mode 100644
index 0000000000..c91e521436
--- /dev/null
+++ b/usr/src/cmd/raidz_test/raidz_test.h
@@ -0,0 +1,117 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ * Copyright 2020 Joyent, Inc.
+ */
+
+#ifndef RAIDZ_TEST_H
+#define RAIDZ_TEST_H
+
+#include <sys/spa.h>
+
+static const char *raidz_impl_names[] = {
+ "original",
+ "scalar",
+ "sse2",
+ "ssse3",
+ "avx2",
+ "avx512f",
+ "avx512bw",
+ "aarch64_neon",
+ "aarch64_neonx2",
+ "powerpc_altivec",
+ NULL
+};
+
+typedef struct raidz_test_opts {
+ size_t rto_ashift;
+ size_t rto_offset;
+ size_t rto_dcols;
+ size_t rto_dsize;
+ size_t rto_v;
+ size_t rto_sweep;
+ size_t rto_sweep_timeout;
+ size_t rto_benchmark;
+ size_t rto_sanity;
+ size_t rto_gdb;
+
+ /* non-user options */
+ boolean_t rto_should_stop;
+
+ zio_t *zio_golden;
+ raidz_map_t *rm_golden;
+} raidz_test_opts_t;
+
+static const raidz_test_opts_t rto_opts_defaults = {
+ .rto_ashift = 9,
+ .rto_offset = 1ULL << 0,
+ .rto_dcols = 8,
+ .rto_dsize = 1<<19,
+ .rto_v = 0,
+ .rto_sweep = 0,
+ .rto_benchmark = 0,
+ .rto_sanity = 0,
+ .rto_gdb = 0,
+ .rto_should_stop = B_FALSE
+};
+
+extern raidz_test_opts_t rto_opts;
+
+static inline size_t ilog2(size_t a)
+{
+ return (a > 1 ? 1 + ilog2(a >> 1) : 0);
+}
+
+
+#define D_ALL 0
+#define D_INFO 1
+#define D_DEBUG 2
+
+#define LOG(lvl, a...) \
+{ \
+ if (rto_opts.rto_v >= lvl) \
+ (void) fprintf(stdout, a); \
+} \
+
+#define LOG_OPT(lvl, opt, a...) \
+{ \
+ if (opt->rto_v >= lvl) \
+ (void) fprintf(stdout, a); \
+} \
+
+#define ERRMSG(a...) (void) fprintf(stderr, a)
+
+
+#define DBLSEP "================\n"
+#define SEP "----------------\n"
+
+
+#define raidz_alloc(size) abd_alloc(size, B_FALSE)
+#define raidz_free(p, size) abd_free(p)
+
+
+void init_zio_abd(zio_t *zio);
+
+void run_raidz_benchmark(void);
+
+#endif /* RAIDZ_TEST_H */
diff --git a/usr/src/pkg/manifests/system-file-system-zfs-tests.mf b/usr/src/pkg/manifests/system-file-system-zfs-tests.mf
index 4e2b5f1add..d39248a2e4 100644
--- a/usr/src/pkg/manifests/system-file-system-zfs-tests.mf
+++ b/usr/src/pkg/manifests/system-file-system-zfs-tests.mf
@@ -22,6 +22,7 @@
#
# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+# Copyright 2020 Joyent, Inc.
#
set name=pkg.fmri value=pkg:/system/file-system/zfs/tests@$(PKGVERS)
@@ -53,6 +54,7 @@ $(i386_ONLY)file path=usr/bin/$(ARCH32)/zlook mode=0555
$(i386_ONLY)file path=usr/bin/$(ARCH32)/ztest mode=0555
file path=usr/bin/$(ARCH64)/zlook mode=0555
file path=usr/bin/$(ARCH64)/ztest mode=0555
+file path=usr/bin/raidz_test mode=0555
file path=usr/bin/zloop mode=0555
file path=usr/include/sys/fs/zut.h
file path=usr/lib/devfsadm/linkmod/SUNW_zut_link.so group=sys
diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf
index 3a4d20d9bf..40629ff671 100644
--- a/usr/src/pkg/manifests/system-test-zfstest.mf
+++ b/usr/src/pkg/manifests/system-test-zfstest.mf
@@ -143,6 +143,7 @@ dir path=opt/zfs-tests/tests/functional/poolversion
dir path=opt/zfs-tests/tests/functional/privilege
dir path=opt/zfs-tests/tests/functional/projectquota
dir path=opt/zfs-tests/tests/functional/quota
+dir path=opt/zfs-tests/tests/functional/raidz
dir path=opt/zfs-tests/tests/functional/redundancy
dir path=opt/zfs-tests/tests/functional/refquota
dir path=opt/zfs-tests/tests/functional/refreserv
@@ -2798,6 +2799,10 @@ file path=opt/zfs-tests/tests/functional/quota/quota_004_pos mode=0555
file path=opt/zfs-tests/tests/functional/quota/quota_005_pos mode=0555
file path=opt/zfs-tests/tests/functional/quota/quota_006_neg mode=0555
file path=opt/zfs-tests/tests/functional/quota/setup mode=0555
+file path=opt/zfs-tests/tests/functional/raidz/cleanup mode=0555
+file path=opt/zfs-tests/tests/functional/raidz/raidz_001_neg mode=0555
+file path=opt/zfs-tests/tests/functional/raidz/raidz_002_pos mode=0555
+file path=opt/zfs-tests/tests/functional/raidz/setup mode=0555
file path=opt/zfs-tests/tests/functional/redundancy/cleanup mode=0555
file path=opt/zfs-tests/tests/functional/redundancy/redundancy.cfg mode=0444
file path=opt/zfs-tests/tests/functional/redundancy/redundancy.kshlib \
diff --git a/usr/src/test/zfs-tests/include/commands.cfg b/usr/src/test/zfs-tests/include/commands.cfg
index 050d6caba7..f9b0bdf7ac 100644
--- a/usr/src/test/zfs-tests/include/commands.cfg
+++ b/usr/src/test/zfs-tests/include/commands.cfg
@@ -11,7 +11,7 @@
#
# Copyright (c) 2016, 2018 by Delphix. All rights reserved.
-# Copyright 2019 Joyent, Inc.
+# Copyright 2020 Joyent, Inc.
#
#
@@ -89,6 +89,7 @@ export USR_BIN_FILES='awk
ps
pwd
python
+ raidz_test
readlink
rm
rmdir
diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run
index 8acd2710bf..3d1e71cb36 100644
--- a/usr/src/test/zfs-tests/runfiles/delphix.run
+++ b/usr/src/test/zfs-tests/runfiles/delphix.run
@@ -582,6 +582,9 @@ tests = ['projectid_001_pos', 'projectid_002_pos', 'projectid_003_pos',
tests = ['quota_001_pos', 'quota_002_pos', 'quota_003_pos', 'quota_004_pos',
'quota_005_pos', 'quota_006_neg']
+[/opt/zfs-tests/tests/functional/raidz]
+tests = ['raidz_001_neg', 'raidz_002_pos']
+
[/opt/zfs-tests/tests/functional/redundancy]
tests = ['redundancy_001_pos', 'redundancy_002_pos', 'redundancy_003_pos',
'redundancy_004_neg']
diff --git a/usr/src/test/zfs-tests/runfiles/omnios.run b/usr/src/test/zfs-tests/runfiles/omnios.run
index f287933c2f..35b87d44e6 100644
--- a/usr/src/test/zfs-tests/runfiles/omnios.run
+++ b/usr/src/test/zfs-tests/runfiles/omnios.run
@@ -581,6 +581,9 @@ tests = ['projectid_001_pos', 'projectid_002_pos', 'projectid_003_pos',
tests = ['quota_001_pos', 'quota_002_pos', 'quota_003_pos', 'quota_004_pos',
'quota_005_pos', 'quota_006_neg']
+[/opt/zfs-tests/tests/functional/raidz]
+tests = ['raidz_001_neg', 'raidz_002_pos']
+
[/opt/zfs-tests/tests/functional/redundancy]
tests = ['redundancy_001_pos', 'redundancy_002_pos', 'redundancy_003_pos',
'redundancy_004_neg']
diff --git a/usr/src/test/zfs-tests/runfiles/openindiana.run b/usr/src/test/zfs-tests/runfiles/openindiana.run
index 21d2055a8c..be80f55301 100644
--- a/usr/src/test/zfs-tests/runfiles/openindiana.run
+++ b/usr/src/test/zfs-tests/runfiles/openindiana.run
@@ -581,6 +581,9 @@ tests = ['projectid_001_pos', 'projectid_002_pos', 'projectid_003_pos',
tests = ['quota_001_pos', 'quota_002_pos', 'quota_003_pos', 'quota_004_pos',
'quota_005_pos', 'quota_006_neg']
+[/opt/zfs-tests/tests/functional/raidz]
+tests = ['raidz_001_neg', 'raidz_002_pos']
+
[/opt/zfs-tests/tests/functional/redundancy]
tests = ['redundancy_001_pos', 'redundancy_002_pos', 'redundancy_003_pos',
'redundancy_004_neg']
diff --git a/usr/src/test/zfs-tests/runfiles/smartos.run b/usr/src/test/zfs-tests/runfiles/smartos.run
index 92b9b18c57..2fe72661c5 100644
--- a/usr/src/test/zfs-tests/runfiles/smartos.run
+++ b/usr/src/test/zfs-tests/runfiles/smartos.run
@@ -509,6 +509,9 @@ tests = ['projectid_001_pos', 'projectid_002_pos', 'projectid_003_pos',
tests = ['quota_001_pos', 'quota_002_pos', 'quota_003_pos', 'quota_004_pos',
'quota_005_pos', 'quota_006_neg']
+[/opt/zfs-tests/tests/functional/raidz]
+tests = ['raidz_001_neg', 'raidz_002_pos']
+
[/opt/zfs-tests/tests/functional/refquota]
tests = ['refquota_001_pos', 'refquota_002_pos', 'refquota_003_pos',
'refquota_004_pos', 'refquota_005_pos', 'refquota_006_neg']
diff --git a/usr/src/test/zfs-tests/tests/functional/raidz/Makefile b/usr/src/test/zfs-tests/tests/functional/raidz/Makefile
new file mode 100644
index 0000000000..5d0bf4506a
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/raidz/Makefile
@@ -0,0 +1,21 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2020 Joyent, Inc.
+#
+
+include $(SRC)/Makefile.master
+
+ROOTOPTPKG = $(ROOT)/opt/zfs-tests
+TARGETDIR = $(ROOTOPTPKG)/tests/functional/raidz
+
+include $(SRC)/test/zfs-tests/Makefile.com
diff --git a/usr/src/test/zfs-tests/tests/functional/raidz/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/raidz/cleanup.ksh
new file mode 100755
index 0000000000..c92c54c270
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/raidz/cleanup.ksh
@@ -0,0 +1,30 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Gvozden Neskovic. All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+# default_cleanup
diff --git a/usr/src/test/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh b/usr/src/test/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh
new file mode 100755
index 0000000000..4c105b9411
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/raidz/raidz_001_neg.ksh
@@ -0,0 +1,38 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Gvozden Neskovic. All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Call the raidz_test tool with -T options to test the infrastructure.
+# This option should make raidz_test to return non 0.
+#
+
+log_mustnot raidz_test -T
+
+log_pass "raidz_test detects errors as espected."
diff --git a/usr/src/test/zfs-tests/tests/functional/raidz/raidz_002_pos.ksh b/usr/src/test/zfs-tests/tests/functional/raidz/raidz_002_pos.ksh
new file mode 100755
index 0000000000..e238a881b0
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/raidz/raidz_002_pos.ksh
@@ -0,0 +1,41 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Gvozden Neskovic. All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Call the raidz_test tool with -S to test all supported raidz
+# implementations. This options will test several raidz block geometries
+# and several zio parameters that affect raidz block layout. Data
+# reconstruction performs all combinations of failed disks. Wall time
+# is set to 5min, but actual runtime might be longer.
+#
+
+log_must raidz_test -S -t 300
+
+log_pass "raidz_test parameter sweep test succeeded."
diff --git a/usr/src/test/zfs-tests/tests/functional/raidz/setup.ksh b/usr/src/test/zfs-tests/tests/functional/raidz/setup.ksh
new file mode 100755
index 0000000000..4e155d24d5
--- /dev/null
+++ b/usr/src/test/zfs-tests/tests/functional/raidz/setup.ksh
@@ -0,0 +1,32 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Gvozden Neskovic. All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "global"
+
+log_pass
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 63f314ca93..783b436bf4 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -1403,6 +1403,8 @@ ZFS_COMMON_OBJS += \
vdev_missing.o \
vdev_queue.o \
vdev_raidz.o \
+ vdev_raidz_math.o \
+ vdev_raidz_math_scalar.o \
vdev_removal.o \
vdev_root.o \
vdev_trim.o \
diff --git a/usr/src/uts/common/fs/zfs/abd.c b/usr/src/uts/common/fs/zfs/abd.c
index 596545afd9..3aabaf3f01 100644
--- a/usr/src/uts/common/fs/zfs/abd.c
+++ b/usr/src/uts/common/fs/zfs/abd.c
@@ -12,6 +12,7 @@
/*
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
* Copyright (c) 2019 by Delphix. All rights reserved.
+ * Copyright 2020 Joyent, Inc.
*/
/*
@@ -761,7 +762,8 @@ abd_iter_map(struct abd_iter *aiter)
} else {
size_t index = abd_iter_scatter_chunk_index(aiter);
offset = abd_iter_scatter_chunk_offset(aiter);
- aiter->iter_mapsize = zfs_abd_chunk_size - offset;
+ aiter->iter_mapsize = MIN(zfs_abd_chunk_size - offset,
+ aiter->iter_abd->abd_size - aiter->iter_pos);
paddr = aiter->iter_abd->abd_u.abd_scatter.abd_chunks[index];
}
aiter->iter_mapaddr = (char *)paddr + offset;
@@ -990,3 +992,180 @@ abd_cmp(abd_t *dabd, abd_t *sabd, size_t size)
{
return (abd_iterate_func2(dabd, sabd, 0, 0, size, abd_cmp_cb, NULL));
}
+
+/*
+ * Iterate over code ABDs and a data ABD and call @func_raidz_gen.
+ *
+ * @cabds parity ABDs, must have equal size
+ * @dabd data ABD. Can be NULL (in this case @dsize = 0)
+ * @func_raidz_gen should be implemented so that its behaviour
+ * is the same when taking linear and when taking scatter
+ */
+void
+abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
+ ssize_t csize, ssize_t dsize, const unsigned parity,
+ void (*func_raidz_gen)(void **, const void *, size_t, size_t))
+{
+ int i;
+ ssize_t len, dlen;
+ struct abd_iter caiters[3];
+ struct abd_iter daiter = {0};
+ void *caddrs[3];
+
+ ASSERT3U(parity, <=, 3);
+
+ for (i = 0; i < parity; i++)
+ abd_iter_init(&caiters[i], cabds[i]);
+
+ if (dabd)
+ abd_iter_init(&daiter, dabd);
+
+ ASSERT3S(dsize, >=, 0);
+
+#ifdef _KERNEL
+ kpreempt_disable();
+#endif
+ while (csize > 0) {
+ len = csize;
+
+ if (dabd && dsize > 0)
+ abd_iter_map(&daiter);
+
+ for (i = 0; i < parity; i++) {
+ abd_iter_map(&caiters[i]);
+ caddrs[i] = caiters[i].iter_mapaddr;
+ }
+
+ switch (parity) {
+ case 3:
+ len = MIN(caiters[2].iter_mapsize, len);
+ /* falls through */
+ case 2:
+ len = MIN(caiters[1].iter_mapsize, len);
+ /* falls through */
+ case 1:
+ len = MIN(caiters[0].iter_mapsize, len);
+ }
+
+ /* must be progressive */
+ ASSERT3S(len, >, 0);
+
+ if (dabd && dsize > 0) {
+ /* this needs precise iter.length */
+ len = MIN(daiter.iter_mapsize, len);
+ len = MIN(dsize, len);
+ dlen = len;
+ } else
+ dlen = 0;
+
+ /* must be progressive */
+ ASSERT3S(len, >, 0);
+ /*
+ * The iterated function likely will not do well if each
+ * segment except the last one is not multiple of 512 (raidz).
+ */
+ ASSERT3U(((uint64_t)len & 511ULL), ==, 0);
+
+ func_raidz_gen(caddrs, daiter.iter_mapaddr, len, dlen);
+
+ for (i = parity-1; i >= 0; i--) {
+ abd_iter_unmap(&caiters[i]);
+ abd_iter_advance(&caiters[i], len);
+ }
+
+ if (dabd && dsize > 0) {
+ abd_iter_unmap(&daiter);
+ abd_iter_advance(&daiter, dlen);
+ dsize -= dlen;
+ }
+
+ csize -= len;
+
+ ASSERT3S(dsize, >=, 0);
+ ASSERT3S(csize, >=, 0);
+ }
+#ifdef _KERNEL
+ kpreempt_enable();
+#endif
+}
+
+/*
+ * Iterate over code ABDs and data reconstruction target ABDs and call
+ * @func_raidz_rec. Function maps at most 6 pages atomically.
+ *
+ * @cabds parity ABDs, must have equal size
+ * @tabds rec target ABDs, at most 3
+ * @tsize size of data target columns
+ * @func_raidz_rec expects syndrome data in target columns. Function
+ * reconstructs data and overwrites target columns.
+ */
+void
+abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
+ ssize_t tsize, const unsigned parity,
+ void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
+ const unsigned *mul),
+ const unsigned *mul)
+{
+ int i;
+ ssize_t len;
+ struct abd_iter citers[3];
+ struct abd_iter xiters[3];
+ void *caddrs[3], *xaddrs[3];
+
+ ASSERT3U(parity, <=, 3);
+
+ for (i = 0; i < parity; i++) {
+ abd_iter_init(&citers[i], cabds[i]);
+ abd_iter_init(&xiters[i], tabds[i]);
+ }
+
+#ifdef _KERNEL
+ kpreempt_disable();
+#endif
+ while (tsize > 0) {
+
+ for (i = 0; i < parity; i++) {
+ abd_iter_map(&citers[i]);
+ abd_iter_map(&xiters[i]);
+ caddrs[i] = citers[i].iter_mapaddr;
+ xaddrs[i] = xiters[i].iter_mapaddr;
+ }
+
+ len = tsize;
+ switch (parity) {
+ case 3:
+ len = MIN(xiters[2].iter_mapsize, len);
+ len = MIN(citers[2].iter_mapsize, len);
+ /* falls through */
+ case 2:
+ len = MIN(xiters[1].iter_mapsize, len);
+ len = MIN(citers[1].iter_mapsize, len);
+ /* falls through */
+ case 1:
+ len = MIN(xiters[0].iter_mapsize, len);
+ len = MIN(citers[0].iter_mapsize, len);
+ }
+ /* must be progressive */
+ ASSERT3S(len, >, 0);
+ /*
+ * The iterated function likely will not do well if each
+ * segment except the last one is not multiple of 512 (raidz).
+ */
+ ASSERT3U(((uint64_t)len & 511ULL), ==, 0);
+
+ func_raidz_rec(xaddrs, len, caddrs, mul);
+
+ for (i = parity-1; i >= 0; i--) {
+ abd_iter_unmap(&xiters[i]);
+ abd_iter_unmap(&citers[i]);
+ abd_iter_advance(&xiters[i], len);
+ abd_iter_advance(&citers[i], len);
+ }
+
+ tsize -= len;
+ ASSERT3S(tsize, >=, 0);
+ }
+#ifdef _KERNEL
+ kpreempt_enable();
+#endif
+}
diff --git a/usr/src/uts/common/fs/zfs/spa_misc.c b/usr/src/uts/common/fs/zfs/spa_misc.c
index 9dac4e2ddc..c9ceeb6873 100644
--- a/usr/src/uts/common/fs/zfs/spa_misc.c
+++ b/usr/src/uts/common/fs/zfs/spa_misc.c
@@ -44,6 +44,7 @@
#include <sys/vdev_impl.h>
#include <sys/vdev_initialize.h>
#include <sys/vdev_trim.h>
+#include <sys/vdev_raidz.h>
#include <sys/metaslab.h>
#include <sys/uberblock_impl.h>
#include <sys/txg.h>
@@ -2253,6 +2254,7 @@ spa_init(int mode)
zil_init();
vdev_cache_stat_init();
vdev_mirror_stat_init();
+ vdev_raidz_math_init();
zfs_prop_init();
zpool_prop_init();
zpool_feature_init();
@@ -2271,6 +2273,7 @@ spa_fini(void)
vdev_cache_stat_fini();
vdev_mirror_stat_fini();
+ vdev_raidz_math_fini();
zil_fini();
dmu_fini();
zio_fini();
diff --git a/usr/src/uts/common/fs/zfs/sys/abd.h b/usr/src/uts/common/fs/zfs/sys/abd.h
index 621635933e..23699c0420 100644
--- a/usr/src/uts/common/fs/zfs/sys/abd.h
+++ b/usr/src/uts/common/fs/zfs/sys/abd.h
@@ -103,6 +103,15 @@ int abd_cmp(abd_t *, abd_t *, size_t);
int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t);
void abd_zero_off(abd_t *, size_t, size_t);
+void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
+ ssize_t csize, ssize_t dsize, const unsigned parity,
+ void (*func_raidz_gen)(void **, const void *, size_t, size_t));
+void abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
+ ssize_t tsize, const unsigned parity,
+ void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
+ const unsigned *mul),
+ const unsigned *mul);
+
/*
* Wrappers for calls with offsets of 0
*/
diff --git a/usr/src/uts/common/fs/zfs/sys/simd.h b/usr/src/uts/common/fs/zfs/sys/simd.h
new file mode 100644
index 0000000000..4494c7d02a
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/sys/simd.h
@@ -0,0 +1,40 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020 Joyent, Inc.
+ */
+
+#ifndef _SIMD_H
+#define _SIMD_H
+
+#if defined(__amd64__) || defined(__i386__)
+
+/* Temporararily disabled until subsequent work to turn this on. */
+#define kfpu_allowed() 0
+#define kfpu_initialize(tsk) do {} while (0)
+#define kfpu_begin() do {} while (0)
+#define kfpu_end() do {} while (0)
+#define kfpu_init() (0)
+#define kfpu_fini() do {} while (0)
+
+#else
+
+/* Non-x86 CPUs currently always disallow kernel FPU support */
+#define kfpu_allowed() 0
+#define kfpu_initialize(tsk) do {} while (0)
+#define kfpu_begin() do {} while (0)
+#define kfpu_end() do {} while (0)
+#define kfpu_init() (0)
+#define kfpu_fini() do {} while (0)
+#endif
+
+#endif /* _SIMD_H */
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_raidz.h b/usr/src/uts/common/fs/zfs/sys/vdev_raidz.h
new file mode 100644
index 0000000000..bf5c840139
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_raidz.h
@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>.
+ * Copyright 2020 Joyent, Inc.
+ */
+
+#ifndef _SYS_VDEV_RAIDZ_H
+#define _SYS_VDEV_RAIDZ_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct zio;
+struct raidz_map;
+#if !defined(_KERNEL)
+struct kernel_param {};
+#endif
+
+/*
+ * vdev_raidz interface
+ */
+struct raidz_map * vdev_raidz_map_alloc(struct zio *, uint64_t,
+ uint64_t, uint64_t);
+void vdev_raidz_map_free(struct raidz_map *);
+void vdev_raidz_generate_parity(struct raidz_map *);
+int vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
+
+/*
+ * vdev_raidz_math interface
+ */
+void vdev_raidz_math_init(void);
+void vdev_raidz_math_fini(void);
+const struct raidz_impl_ops *vdev_raidz_math_get_ops(void);
+int vdev_raidz_math_generate(struct raidz_map *);
+int vdev_raidz_math_reconstruct(struct raidz_map *, const int *,
+ const int *, const int);
+int vdev_raidz_impl_set(const char *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VDEV_RAIDZ_H */
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_raidz_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_raidz_impl.h
new file mode 100644
index 0000000000..d8defc04ea
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_raidz_impl.h
@@ -0,0 +1,351 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#ifndef _VDEV_RAIDZ_H
+#define _VDEV_RAIDZ_H
+
+#include <sys/types.h>
+#include <sys/debug.h>
+#include <sys/kstat.h>
+#include <sys/abd.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CODE_P (0U)
+#define CODE_Q (1U)
+#define CODE_R (2U)
+
+#define PARITY_P (1U)
+#define PARITY_PQ (2U)
+#define PARITY_PQR (3U)
+
+#define TARGET_X (0U)
+#define TARGET_Y (1U)
+#define TARGET_Z (2U)
+
+/*
+ * Parity generation methods indexes
+ */
+enum raidz_math_gen_op {
+ RAIDZ_GEN_P = 0,
+ RAIDZ_GEN_PQ,
+ RAIDZ_GEN_PQR,
+ RAIDZ_GEN_NUM = 3
+};
+/*
+ * Data reconstruction methods indexes
+ */
+enum raidz_rec_op {
+ RAIDZ_REC_P = 0,
+ RAIDZ_REC_Q,
+ RAIDZ_REC_R,
+ RAIDZ_REC_PQ,
+ RAIDZ_REC_PR,
+ RAIDZ_REC_QR,
+ RAIDZ_REC_PQR,
+ RAIDZ_REC_NUM = 7
+};
+
+extern const char *raidz_gen_name[RAIDZ_GEN_NUM];
+extern const char *raidz_rec_name[RAIDZ_REC_NUM];
+
+/*
+ * Methods used to define raidz implementation
+ *
+ * @raidz_gen_f Parity generation function
+ * @par1 pointer to raidz_map
+ * @raidz_rec_f Data reconstruction function
+ * @par1 pointer to raidz_map
+ * @par2 array of reconstruction targets
+ * @will_work_f Function returns TRUE if impl. is supported on the system
+ * @init_impl_f Function is called once on init
+ * @fini_impl_f Function is called once on fini
+ */
+typedef void (*raidz_gen_f)(void *);
+typedef int (*raidz_rec_f)(void *, const int *);
+typedef boolean_t (*will_work_f)(void);
+typedef void (*init_impl_f)(void);
+typedef void (*fini_impl_f)(void);
+
+#define RAIDZ_IMPL_NAME_MAX (20)
+
+typedef struct raidz_impl_ops {
+ init_impl_f init;
+ fini_impl_f fini;
+ raidz_gen_f gen[RAIDZ_GEN_NUM]; /* Parity generate functions */
+ raidz_rec_f rec[RAIDZ_REC_NUM]; /* Data reconstruction functions */
+ will_work_f is_supported; /* Support check function */
+ char name[RAIDZ_IMPL_NAME_MAX]; /* Name of the implementation */
+} raidz_impl_ops_t;
+
+typedef struct raidz_col {
+ size_t rc_devidx; /* child device index for I/O */
+ size_t rc_offset; /* device offset */
+ size_t rc_size; /* I/O size */
+ abd_t *rc_abd; /* I/O data */
+ void *rc_gdata; /* used to store the "good" version */
+ int rc_error; /* I/O error for this device */
+ unsigned int rc_tried; /* Did we attempt this I/O column? */
+ unsigned int rc_skipped; /* Did we skip this I/O column? */
+} raidz_col_t;
+
+typedef struct raidz_map {
+ size_t rm_cols; /* Regular column count */
+ size_t rm_scols; /* Count including skipped columns */
+ size_t rm_bigcols; /* Number of oversized columns */
+ size_t rm_asize; /* Actual total I/O size */
+ size_t rm_missingdata; /* Count of missing data devices */
+ size_t rm_missingparity; /* Count of missing parity devices */
+ size_t rm_firstdatacol; /* First data column/parity count */
+ size_t rm_nskip; /* Skipped sectors for padding */
+ size_t rm_skipstart; /* Column index of padding start */
+ void *rm_abd_copy; /* rm_asize-buffer of copied data */
+ size_t rm_reports; /* # of referencing checksum reports */
+ unsigned int rm_freed; /* map no longer has referencing ZIO */
+ unsigned int rm_ecksuminjected; /* checksum error was injected */
+ const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
+ raidz_col_t rm_col[1]; /* Flexible array of I/O columns */
+} raidz_map_t;
+
+#define RAIDZ_ORIGINAL_IMPL (INT_MAX)
+
+extern const raidz_impl_ops_t vdev_raidz_scalar_impl;
+
+/*
+ * Commonly used raidz_map helpers
+ *
+ * raidz_parity Returns parity of the RAIDZ block
+ * raidz_ncols Returns number of columns the block spans
+ * raidz_nbigcols Returns number of big columns columns
+ * raidz_col_p Returns pointer to a column
+ * raidz_col_size Returns size of a column
+ * raidz_big_size Returns size of big columns
+ * raidz_short_size Returns size of short columns
+ */
+#define raidz_parity(rm) ((rm)->rm_firstdatacol)
+#define raidz_ncols(rm) ((rm)->rm_cols)
+#define raidz_nbigcols(rm) ((rm)->rm_bigcols)
+#define raidz_col_p(rm, c) ((rm)->rm_col + (c))
+#define raidz_col_size(rm, c) ((rm)->rm_col[c].rc_size)
+#define raidz_big_size(rm) (raidz_col_size(rm, CODE_P))
+#define raidz_short_size(rm) (raidz_col_size(rm, raidz_ncols(rm)-1))
+
+/*
+ * Macro defines an RAIDZ parity generation method
+ *
+ * @code parity the function produce
+ * @impl name of the implementation
+ */
+#define _RAIDZ_GEN_WRAP(code, impl) \
+static void \
+impl ## _gen_ ## code(void *rmp) \
+{ \
+ raidz_map_t *rm = (raidz_map_t *) rmp; \
+ raidz_generate_## code ## _impl(rm); \
+}
+
+/*
+ * Macro defines an RAIDZ data reconstruction method
+ *
+ * @code parity the function produce
+ * @impl name of the implementation
+ */
+#define _RAIDZ_REC_WRAP(code, impl) \
+static int \
+impl ## _rec_ ## code(void *rmp, const int *tgtidx) \
+{ \
+ raidz_map_t *rm = (raidz_map_t *) rmp; \
+ return (raidz_reconstruct_## code ## _impl(rm, tgtidx)); \
+}
+
+/*
+ * Define all gen methods for an implementation
+ *
+ * @impl name of the implementation
+ */
+#define DEFINE_GEN_METHODS(impl) \
+ _RAIDZ_GEN_WRAP(p, impl); \
+ _RAIDZ_GEN_WRAP(pq, impl); \
+ _RAIDZ_GEN_WRAP(pqr, impl)
+
+/*
+ * Define all rec functions for an implementation
+ *
+ * @impl name of the implementation
+ */
+#define DEFINE_REC_METHODS(impl) \
+ _RAIDZ_REC_WRAP(p, impl); \
+ _RAIDZ_REC_WRAP(q, impl); \
+ _RAIDZ_REC_WRAP(r, impl); \
+ _RAIDZ_REC_WRAP(pq, impl); \
+ _RAIDZ_REC_WRAP(pr, impl); \
+ _RAIDZ_REC_WRAP(qr, impl); \
+ _RAIDZ_REC_WRAP(pqr, impl)
+
+#define RAIDZ_GEN_METHODS(impl) \
+{ \
+ [RAIDZ_GEN_P] = & impl ## _gen_p, \
+ [RAIDZ_GEN_PQ] = & impl ## _gen_pq, \
+ [RAIDZ_GEN_PQR] = & impl ## _gen_pqr \
+}
+
+#define RAIDZ_REC_METHODS(impl) \
+{ \
+ [RAIDZ_REC_P] = & impl ## _rec_p, \
+ [RAIDZ_REC_Q] = & impl ## _rec_q, \
+ [RAIDZ_REC_R] = & impl ## _rec_r, \
+ [RAIDZ_REC_PQ] = & impl ## _rec_pq, \
+ [RAIDZ_REC_PR] = & impl ## _rec_pr, \
+ [RAIDZ_REC_QR] = & impl ## _rec_qr, \
+ [RAIDZ_REC_PQR] = & impl ## _rec_pqr \
+}
+
+
+typedef struct raidz_impl_kstat {
+ uint64_t gen[RAIDZ_GEN_NUM]; /* gen method speed kiB/s */
+ uint64_t rec[RAIDZ_REC_NUM]; /* rec method speed kiB/s */
+} raidz_impl_kstat_t;
+
+/*
+ * Enumerate various multiplication constants
+ * used in reconstruction methods
+ */
+typedef enum raidz_mul_info {
+ /* Reconstruct Q */
+ MUL_Q_X = 0,
+ /* Reconstruct R */
+ MUL_R_X = 0,
+ /* Reconstruct PQ */
+ MUL_PQ_X = 0,
+ MUL_PQ_Y = 1,
+ /* Reconstruct PR */
+ MUL_PR_X = 0,
+ MUL_PR_Y = 1,
+ /* Reconstruct QR */
+ MUL_QR_XQ = 0,
+ MUL_QR_X = 1,
+ MUL_QR_YQ = 2,
+ MUL_QR_Y = 3,
+ /* Reconstruct PQR */
+ MUL_PQR_XP = 0,
+ MUL_PQR_XQ = 1,
+ MUL_PQR_XR = 2,
+ MUL_PQR_YU = 3,
+ MUL_PQR_YP = 4,
+ MUL_PQR_YQ = 5,
+
+ MUL_CNT = 6
+} raidz_mul_info_t;
+
+/*
+ * Powers of 2 in the Galois field.
+ */
+extern const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256)));
+/* Logs of 2 in the Galois field defined above. */
+extern const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256)));
+
+/*
+ * Multiply a given number by 2 raised to the given power.
+ */
+static inline uint8_t
+vdev_raidz_exp2(const uint8_t a, const unsigned exp)
+{
+ if (a == 0)
+ return (0);
+
+ return (vdev_raidz_pow2[(exp + (unsigned) vdev_raidz_log2[a]) % 255]);
+}
+
+/*
+ * Galois Field operations.
+ *
+ * gf_exp2 - computes 2 raised to the given power
+ * gf_exp2 - computes 4 raised to the given power
+ * gf_mul - multiplication
+ * gf_div - division
+ * gf_inv - multiplicative inverse
+ */
+typedef unsigned gf_t;
+typedef unsigned gf_log_t;
+
+static inline gf_t
+gf_mul(const gf_t a, const gf_t b)
+{
+ gf_log_t logsum;
+
+ if (a == 0 || b == 0)
+ return (0);
+
+ logsum = (gf_log_t) vdev_raidz_log2[a] + (gf_log_t) vdev_raidz_log2[b];
+
+ return ((gf_t) vdev_raidz_pow2[logsum % 255]);
+}
+
+static inline gf_t
+gf_div(const gf_t a, const gf_t b)
+{
+ gf_log_t logsum;
+
+ ASSERT3U(b, >, 0);
+ if (a == 0)
+ return (0);
+
+ logsum = (gf_log_t) 255 + (gf_log_t) vdev_raidz_log2[a] -
+ (gf_log_t) vdev_raidz_log2[b];
+
+ return ((gf_t) vdev_raidz_pow2[logsum % 255]);
+}
+
+static inline gf_t
+gf_inv(const gf_t a)
+{
+ gf_log_t logsum;
+
+ ASSERT3U(a, >, 0);
+
+ logsum = (gf_log_t) 255 - (gf_log_t) vdev_raidz_log2[a];
+
+ return ((gf_t) vdev_raidz_pow2[logsum]);
+}
+
+static inline gf_t
+gf_exp2(gf_log_t exp)
+{
+ return (vdev_raidz_pow2[exp % 255]);
+}
+
+static inline gf_t
+gf_exp4(gf_log_t exp)
+{
+ ASSERT3U(exp, <=, 255);
+ return ((gf_t) vdev_raidz_pow2[(2 * exp) % 255]);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VDEV_RAIDZ_H */
diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c
index 10772d5265..e4db03ce89 100644
--- a/usr/src/uts/common/fs/zfs/vdev_raidz.c
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2016 Gvozden Nešković. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright (c) 2014 Integros [integros.com]
*/
@@ -35,6 +36,8 @@
#include <sys/abd.h>
#include <sys/fs/zfs.h>
#include <sys/fm/fs/zfs.h>
+#include <sys/vdev_raidz.h>
+#include <sys/vdev_raidz_impl.h>
#ifdef ZFS_DEBUG
#include <sys/vdev.h> /* For vdev_xlate() in vdev_raidz_io_verify() */
@@ -98,7 +101,7 @@
* R = 4^n-1 * D_0 + 4^n-2 * D_1 + ... + 4^1 * D_n-2 + 4^0 * D_n-1
* = ((...((D_0) * 4 + D_1) * 4 + ...) * 4 + D_n-2) * 4 + D_n-1
*
- * We chose 1, 2, and 4 as our generators because 1 corresponds to the trival
+ * We chose 1, 2, and 4 as our generators because 1 corresponds to the trivial
* XOR operation, and 2 and 4 can be computed quickly and generate linearly-
* independent coefficients. (There are no additional coefficients that have
* this property which is why the uncorrected Plank method breaks down.)
@@ -107,34 +110,6 @@
* or in concert to recover missing data columns.
*/
-typedef struct raidz_col {
- uint64_t rc_devidx; /* child device index for I/O */
- uint64_t rc_offset; /* device offset */
- uint64_t rc_size; /* I/O size */
- abd_t *rc_abd; /* I/O data */
- void *rc_gdata; /* used to store the "good" version */
- int rc_error; /* I/O error for this device */
- uint8_t rc_tried; /* Did we attempt this I/O column? */
- uint8_t rc_skipped; /* Did we skip this I/O column? */
-} raidz_col_t;
-
-typedef struct raidz_map {
- uint64_t rm_cols; /* Regular column count */
- uint64_t rm_scols; /* Count including skipped columns */
- uint64_t rm_bigcols; /* Number of oversized columns */
- uint64_t rm_asize; /* Actual total I/O size */
- uint64_t rm_missingdata; /* Count of missing data devices */
- uint64_t rm_missingparity; /* Count of missing parity devices */
- uint64_t rm_firstdatacol; /* First data column/parity count */
- uint64_t rm_nskip; /* Skipped sectors for padding */
- uint64_t rm_skipstart; /* Column index of padding start */
- abd_t *rm_abd_copy; /* rm_asize-buffer of copied data */
- uintptr_t rm_reports; /* # of referencing checksum reports */
- uint8_t rm_freed; /* map no longer has referencing ZIO */
- uint8_t rm_ecksuminjected; /* checksum error was injected */
- raidz_col_t rm_col[1]; /* Flexible array of I/O columns */
-} raidz_map_t;
-
#define VDEV_RAIDZ_P 0
#define VDEV_RAIDZ_Q 1
#define VDEV_RAIDZ_R 2
@@ -153,7 +128,7 @@ typedef struct raidz_map {
(mask) = (x) & 0x8080808080808080ULL; \
(mask) = ((mask) << 1) - ((mask) >> 7); \
(x) = (((x) << 1) & 0xfefefefefefefefeULL) ^ \
- ((mask) & 0x1d1d1d1d1d1d1d1d); \
+ ((mask) & 0x1d1d1d1d1d1d1d1dULL); \
}
#define VDEV_RAIDZ_64MUL_4(x, mask) \
@@ -164,104 +139,7 @@ typedef struct raidz_map {
#define VDEV_LABEL_OFFSET(x) (x + VDEV_LABEL_START_SIZE)
-/*
- * Force reconstruction to use the general purpose method.
- */
-int vdev_raidz_default_to_general;
-
-/* Powers of 2 in the Galois field defined above. */
-static const uint8_t vdev_raidz_pow2[256] = {
- 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
- 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
- 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
- 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
- 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
- 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
- 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
- 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
- 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
- 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
- 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
- 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
- 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
- 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
- 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
- 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
- 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
- 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
- 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
- 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
- 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
- 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
- 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
- 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
- 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
- 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
- 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
- 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
- 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
- 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
- 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
- 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
-};
-/* Logs of 2 in the Galois field defined above. */
-static const uint8_t vdev_raidz_log2[256] = {
- 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
- 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
- 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
- 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
- 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
- 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
- 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
- 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
- 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
- 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
- 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
- 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
- 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
- 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
- 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
- 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
- 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
- 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
- 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
- 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
- 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
- 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
- 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
- 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
- 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
- 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
- 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
- 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
- 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
- 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
- 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
- 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
-};
-
-static void vdev_raidz_generate_parity(raidz_map_t *rm);
-
-/*
- * Multiply a given number by 2 raised to the given power.
- */
-static uint8_t
-vdev_raidz_exp2(uint_t a, int exp)
-{
- if (a == 0)
- return (0);
-
- ASSERT(exp >= 0);
- ASSERT(vdev_raidz_log2[a] > 0 || a == 1);
-
- exp += vdev_raidz_log2[a];
- if (exp > 255)
- exp -= 255;
-
- return (vdev_raidz_pow2[exp]);
-}
-
-static void
+void
vdev_raidz_map_free(raidz_map_t *rm)
{
int c;
@@ -271,7 +149,6 @@ vdev_raidz_map_free(raidz_map_t *rm)
if (rm->rm_col[c].rc_gdata != NULL)
abd_free(rm->rm_col[c].rc_gdata);
-
}
for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++)
@@ -311,7 +188,7 @@ static void
vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
{
raidz_map_t *rm = zcr->zcr_cbdata;
- size_t c = zcr->zcr_cbinfo;
+ const size_t c = zcr->zcr_cbinfo;
size_t x, offset;
const abd_t *good = NULL;
@@ -459,19 +336,19 @@ static const zio_vsd_ops_t vdev_raidz_vsd_ops = {
* Divides the IO evenly across all child vdevs; usually, dcols is
* the number of children in the target vdev.
*/
-static raidz_map_t *
-vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset,
- uint64_t unit_shift, uint64_t dcols, uint64_t nparity)
+raidz_map_t *
+vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols,
+ uint64_t nparity)
{
raidz_map_t *rm;
/* The starting RAIDZ (parent) vdev sector of the block. */
- uint64_t b = offset >> unit_shift;
+ uint64_t b = zio->io_offset >> ashift;
/* The zio's size in units of the vdev's minimum sector size. */
- uint64_t s = size >> unit_shift;
+ uint64_t s = zio->io_size >> ashift;
/* The first column for this stripe. */
uint64_t f = b % dcols;
/* The starting byte offset on each child vdev. */
- uint64_t o = (b / dcols) << unit_shift;
+ uint64_t o = (b / dcols) << ashift;
uint64_t q, r, c, bc, col, acols, scols, coff, devidx, asize, tot;
uint64_t off = 0;
@@ -530,7 +407,7 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset,
coff = o;
if (col >= dcols) {
col -= dcols;
- coff += 1ULL << unit_shift;
+ coff += 1ULL << ashift;
}
rm->rm_col[c].rc_devidx = col;
rm->rm_col[c].rc_offset = coff;
@@ -543,29 +420,29 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset,
if (c >= acols)
rm->rm_col[c].rc_size = 0;
else if (c < bc)
- rm->rm_col[c].rc_size = (q + 1) << unit_shift;
+ rm->rm_col[c].rc_size = (q + 1) << ashift;
else
- rm->rm_col[c].rc_size = q << unit_shift;
+ rm->rm_col[c].rc_size = q << ashift;
asize += rm->rm_col[c].rc_size;
}
- ASSERT3U(asize, ==, tot << unit_shift);
- rm->rm_asize = roundup(asize, (nparity + 1) << unit_shift);
+ ASSERT3U(asize, ==, tot << ashift);
+ rm->rm_asize = roundup(asize, (nparity + 1) << ashift);
rm->rm_nskip = roundup(tot, nparity + 1) - tot;
- ASSERT3U(rm->rm_asize - asize, ==, rm->rm_nskip << unit_shift);
+ ASSERT3U(rm->rm_asize - asize, ==, rm->rm_nskip << ashift);
ASSERT3U(rm->rm_nskip, <=, nparity);
for (c = 0; c < rm->rm_firstdatacol; c++)
rm->rm_col[c].rc_abd =
abd_alloc_linear(rm->rm_col[c].rc_size, B_FALSE);
- rm->rm_col[c].rc_abd = abd_get_offset_size(abd, 0,
+ rm->rm_col[c].rc_abd = abd_get_offset_size(zio->io_abd, 0,
rm->rm_col[c].rc_size);
off = rm->rm_col[c].rc_size;
for (c = c + 1; c < acols; c++) {
- rm->rm_col[c].rc_abd = abd_get_offset_size(abd, off,
+ rm->rm_col[c].rc_abd = abd_get_offset_size(zio->io_abd, off,
rm->rm_col[c].rc_size);
off += rm->rm_col[c].rc_size;
}
@@ -573,7 +450,7 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset,
/*
* If all data stored spans all columns, there's a danger that parity
* will always be on the same device and, since parity isn't read
- * during normal operation, that that device's I/O bandwidth won't be
+ * during normal operation, that device's I/O bandwidth won't be
* used effectively. We therefore switch the parity every 1MB.
*
* ... at least that was, ostensibly, the theory. As a practical
@@ -593,7 +470,7 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset,
ASSERT(rm->rm_cols >= 2);
ASSERT(rm->rm_col[0].rc_size == rm->rm_col[1].rc_size);
- if (rm->rm_firstdatacol == 1 && (offset & (1ULL << 20))) {
+ if (rm->rm_firstdatacol == 1 && (zio->io_offset & (1ULL << 20))) {
devidx = rm->rm_col[0].rc_devidx;
o = rm->rm_col[0].rc_offset;
rm->rm_col[0].rc_devidx = rm->rm_col[1].rc_devidx;
@@ -605,6 +482,9 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset,
rm->rm_skipstart = 1;
}
+ /* init RAIDZ parity ops */
+ rm->rm_ops = vdev_raidz_math_get_ops();
+
return (rm);
}
@@ -681,7 +561,6 @@ vdev_raidz_generate_parity_p(raidz_map_t *rm)
p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd);
if (c == rm->rm_firstdatacol) {
- ASSERT3U(src->abd_size, >=, rm->rm_col[c].rc_size);
abd_copy_to_buf_off(p, src, 0, rm->rm_col[c].rc_size);
} else {
struct pqr_struct pqr = { p, NULL, NULL };
@@ -793,9 +672,13 @@ vdev_raidz_generate_parity_pqr(raidz_map_t *rm)
* Generate RAID parity in the first virtual columns according to the number of
* parity columns available.
*/
-static void
+void
vdev_raidz_generate_parity(raidz_map_t *rm)
{
+ /* Generate using the new math implementation */
+ if (vdev_raidz_math_generate(rm) != RAIDZ_ORIGINAL_IMPL)
+ return;
+
switch (rm->rm_firstdatacol) {
case 1:
vdev_raidz_generate_parity_p(rm);
@@ -873,8 +756,8 @@ vdev_raidz_reconst_q_post_func(void *buf, size_t size, void *private)
int cnt = size / sizeof (dst[0]);
for (int i = 0; i < cnt; i++, dst++, rq->q++) {
- *dst ^= *rq->q;
+ *dst ^= *rq->q;
int j;
uint8_t *b;
for (j = 0, b = (uint8_t *)dst; j < 8; j++, b++) {
@@ -1159,9 +1042,12 @@ vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts)
* ~~ ~~
* __ __
* | 1 1 1 1 1 1 1 1 |
+ * | 128 64 32 16 8 4 2 1 |
* | 19 205 116 29 64 16 4 1 |
* | 1 0 0 0 0 0 0 0 |
- * (V|I)' = | 0 0 0 1 0 0 0 0 |
+ * | 0 1 0 0 0 0 0 0 |
+ * (V|I)' = | 0 0 1 0 0 0 0 0 |
+ * | 0 0 0 1 0 0 0 0 |
* | 0 0 0 0 1 0 0 0 |
* | 0 0 0 0 0 1 0 0 |
* | 0 0 0 0 0 0 1 0 |
@@ -1385,8 +1271,8 @@ vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing,
int i, j, x, cc, c;
uint8_t *src;
uint64_t ccount;
- uint8_t *dst[VDEV_RAIDZ_MAXPARITY];
- uint64_t dcount[VDEV_RAIDZ_MAXPARITY];
+ uint8_t *dst[VDEV_RAIDZ_MAXPARITY] = { NULL };
+ uint64_t dcount[VDEV_RAIDZ_MAXPARITY] = { 0 };
uint8_t log = 0;
uint8_t val;
int ll;
@@ -1595,12 +1481,12 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts)
return (code);
}
-static int
-vdev_raidz_reconstruct(raidz_map_t *rm, int *t, int nt)
+int
+vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt)
{
int tgts[VDEV_RAIDZ_MAXPARITY], *dt;
int ntgts;
- int i, c;
+ int i, c, ret;
int code;
int nbadparity, nbaddata;
int parity_valid[VDEV_RAIDZ_MAXPARITY];
@@ -1638,34 +1524,37 @@ vdev_raidz_reconstruct(raidz_map_t *rm, int *t, int nt)
dt = &tgts[nbadparity];
+ /* Reconstruct using the new math implementation */
+ ret = vdev_raidz_math_reconstruct(rm, parity_valid, dt, nbaddata);
+ if (ret != RAIDZ_ORIGINAL_IMPL)
+ return (ret);
+
/*
* See if we can use any of our optimized reconstruction routines.
*/
- if (!vdev_raidz_default_to_general) {
- switch (nbaddata) {
- case 1:
- if (parity_valid[VDEV_RAIDZ_P])
- return (vdev_raidz_reconstruct_p(rm, dt, 1));
+ switch (nbaddata) {
+ case 1:
+ if (parity_valid[VDEV_RAIDZ_P])
+ return (vdev_raidz_reconstruct_p(rm, dt, 1));
- ASSERT(rm->rm_firstdatacol > 1);
+ ASSERT(rm->rm_firstdatacol > 1);
- if (parity_valid[VDEV_RAIDZ_Q])
- return (vdev_raidz_reconstruct_q(rm, dt, 1));
+ if (parity_valid[VDEV_RAIDZ_Q])
+ return (vdev_raidz_reconstruct_q(rm, dt, 1));
- ASSERT(rm->rm_firstdatacol > 2);
- break;
+ ASSERT(rm->rm_firstdatacol > 2);
+ break;
- case 2:
- ASSERT(rm->rm_firstdatacol > 1);
+ case 2:
+ ASSERT(rm->rm_firstdatacol > 1);
- if (parity_valid[VDEV_RAIDZ_P] &&
- parity_valid[VDEV_RAIDZ_Q])
- return (vdev_raidz_reconstruct_pq(rm, dt, 2));
+ if (parity_valid[VDEV_RAIDZ_P] &&
+ parity_valid[VDEV_RAIDZ_Q])
+ return (vdev_raidz_reconstruct_pq(rm, dt, 2));
- ASSERT(rm->rm_firstdatacol > 2);
+ ASSERT(rm->rm_firstdatacol > 2);
- break;
- }
+ break;
}
code = vdev_raidz_reconstruct_general(rm, tgts, ntgts);
@@ -1821,11 +1710,16 @@ vdev_raidz_dumpio(vdev_t *vd, caddr_t data, size_t size,
* treat the on-disk format as if the only blocks are the complete 128
* KB size.
*/
- abd_t *abd = abd_get_from_buf(data - (offset - origoffset),
+
+ /* First, fake a zio for vdev_raidz_map_alloc. */
+ zio_t *zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
+ zio->io_offset = origoffset;
+ zio->io_size = SPA_OLD_MAXBLOCKSIZE;
+ zio->io_abd = abd_get_from_buf(data - (offset - origoffset),
SPA_OLD_MAXBLOCKSIZE);
- rm = vdev_raidz_map_alloc(abd,
- SPA_OLD_MAXBLOCKSIZE, origoffset, tvd->vdev_ashift,
- vd->vdev_children, vd->vdev_nparity);
+
+ rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, vd->vdev_children,
+ vd->vdev_nparity);
coloffset = origoffset;
@@ -1874,7 +1768,9 @@ vdev_raidz_dumpio(vdev_t *vd, caddr_t data, size_t size,
}
vdev_raidz_map_free(rm);
- abd_put(abd);
+ abd_put(zio->io_abd);
+ kmem_free(zio, sizeof (zio_t));
+
#endif /* KERNEL */
return (err);
@@ -1965,8 +1861,7 @@ vdev_raidz_io_start(zio_t *zio)
raidz_col_t *rc;
int c, i;
- rm = vdev_raidz_map_alloc(zio->io_abd, zio->io_size, zio->io_offset,
- tvd->vdev_ashift, vd->vdev_children,
+ rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift, vd->vdev_children,
vd->vdev_nparity);
zio->io_vsd = rm;
@@ -2141,11 +2036,6 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
return (ret);
}
-/*
- * Keep statistics on all the ways that we used parity to correct data.
- */
-static uint64_t raidz_corrected[1 << VDEV_RAIDZ_MAXPARITY];
-
static int
vdev_raidz_worst_error(raidz_map_t *rm)
{
@@ -2251,7 +2141,6 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
*/
code = vdev_raidz_reconstruct(rm, tgts, n);
if (raidz_checksum_verify(zio) == 0) {
- atomic_inc_64(&raidz_corrected[code]);
for (i = 0; i < n; i++) {
c = tgts[i];
@@ -2466,8 +2355,6 @@ vdev_raidz_io_done(zio_t *zio)
code = vdev_raidz_reconstruct(rm, tgts, n);
if (raidz_checksum_verify(zio) == 0) {
- atomic_inc_64(&raidz_corrected[code]);
-
/*
* If we read more parity disks than were used
* for reconstruction, confirm that the other
@@ -2620,7 +2507,7 @@ vdev_raidz_state_change(vdev_t *vd, int faulted, int degraded)
/*
* Determine if any portion of the provided block resides on a child vdev
* with a dirty DTL and therefore needs to be resilvered. The function
- * assumes that at least one DTL is dirty which imples that full stripe
+ * assumes that at least one DTL is dirty which implies that full stripe
* width blocks must be resilvered.
*/
static boolean_t
diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz_math.c b/usr/src/uts/common/fs/zfs/vdev_raidz_math.c
new file mode 100644
index 0000000000..2a1dac33c5
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz_math.c
@@ -0,0 +1,571 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/types.h>
+#include <sys/zio.h>
+#include <sys/debug.h>
+#include <sys/zfs_debug.h>
+#include <sys/vdev_raidz.h>
+#include <sys/vdev_raidz_impl.h>
+#include <sys/simd.h>
+
+#ifndef isspace
+#define isspace(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || \
+ (c) == '\r' || (c) == '\f' || (c) == '\013')
+#endif
+
+extern boolean_t raidz_will_scalar_work(void);
+
+/* Opaque implementation with NULL methods to represent original methods */
+static const raidz_impl_ops_t vdev_raidz_original_impl = {
+ .name = "original",
+ .is_supported = raidz_will_scalar_work,
+};
+
+/* RAIDZ parity op that contain the fastest methods */
+static raidz_impl_ops_t vdev_raidz_fastest_impl = {
+ .name = "fastest"
+};
+
+/* All compiled in implementations */
+const raidz_impl_ops_t *raidz_all_maths[] = {
+ &vdev_raidz_original_impl,
+ &vdev_raidz_scalar_impl,
+};
+
+/* Indicate that benchmark has been completed */
+static boolean_t raidz_math_initialized = B_FALSE;
+
+/* Select raidz implementation */
+#define IMPL_FASTEST (UINT32_MAX)
+#define IMPL_CYCLE (UINT32_MAX - 1)
+#define IMPL_ORIGINAL (0)
+#define IMPL_SCALAR (1)
+
+#define RAIDZ_IMPL_READ(i) (*(volatile uint32_t *) &(i))
+
+static uint32_t zfs_vdev_raidz_impl = IMPL_SCALAR;
+static uint32_t user_sel_impl = IMPL_FASTEST;
+
+/* Hold all supported implementations */
+static size_t raidz_supp_impl_cnt = 0;
+static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)];
+
+#if defined(_KERNEL)
+/*
+ * kstats values for supported implementations
+ * Values represent per disk throughput of 8 disk+parity raidz vdev [B/s]
+ *
+ * PORTING NOTE:
+ * On illumos this is not a kstat. OpenZFS uses their home-grown kstat code
+ * which implements a free-form kstat using additional functionality that does
+ * not exist in illumos. Because there are no software consumers of this
+ * information, we omit a kstat API. If an administrator needs to see this
+ * data for some reason, they can use mdb.
+ *
+ * The format of the kstat data on OpenZFS would be a "header" that looks like
+ * this (a column for each entry in the "raidz_gen_name" and "raidz_rec_name"
+ * arrays, starting with the parity function "implementation" name):
+ * impl gen_p gen_pq gen_pqr rec_p rec_q rec_r rec_pq rec_pr rec_qr rec_pqr
+ * This is followed by a row for each parity function implementation, showing
+ * the "speed" values calculated for that implementation for each of the
+ * parity generation and reconstruction functions in the "raidz_all_maths"
+ * array.
+ */
+static raidz_impl_kstat_t raidz_impl_kstats[ARRAY_SIZE(raidz_all_maths) + 1];
+
+#endif
+
+/*
+ * Returns the RAIDZ operations for raidz_map() parity calculations. When
+ * a SIMD implementation is not allowed in the current context, then fallback
+ * to the fastest generic implementation.
+ */
+const raidz_impl_ops_t *
+vdev_raidz_math_get_ops(void)
+{
+ /*
+ * illumos porting note:
+ * The following check from OpenZFS is disabled since we don't have
+ * this compiled in yet and we need to be able to change the
+ * implementation for the user-level test suite.
+ *
+ * if (!kfpu_allowed())
+ * return (&vdev_raidz_scalar_impl);
+ */
+
+ raidz_impl_ops_t *ops = NULL;
+ const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
+
+ switch (impl) {
+ case IMPL_FASTEST:
+ ASSERT(raidz_math_initialized);
+ ops = &vdev_raidz_fastest_impl;
+ break;
+ case IMPL_CYCLE:
+ /* Cycle through all supported implementations */
+ ASSERT(raidz_math_initialized);
+ ASSERT3U(raidz_supp_impl_cnt, >, 0);
+ static size_t cycle_impl_idx = 0;
+ size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt;
+ ops = raidz_supp_impl[idx];
+ break;
+ case IMPL_ORIGINAL:
+ ops = (raidz_impl_ops_t *)&vdev_raidz_original_impl;
+ break;
+ case IMPL_SCALAR:
+ ops = (raidz_impl_ops_t *)&vdev_raidz_scalar_impl;
+ break;
+ default:
+ ASSERT3U(impl, <, raidz_supp_impl_cnt);
+ ASSERT3U(raidz_supp_impl_cnt, >, 0);
+ if (impl < ARRAY_SIZE(raidz_all_maths))
+ ops = raidz_supp_impl[impl];
+ break;
+ }
+
+ ASSERT3P(ops, !=, NULL);
+
+ return (ops);
+}
+
+/*
+ * Select parity generation method for raidz_map
+ */
+int
+vdev_raidz_math_generate(raidz_map_t *rm)
+{
+ raidz_gen_f gen_parity = NULL;
+
+ switch (raidz_parity(rm)) {
+ case 1:
+ gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P];
+ break;
+ case 2:
+ gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQ];
+ break;
+ case 3:
+ gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQR];
+ break;
+ default:
+ gen_parity = NULL;
+ cmn_err(CE_PANIC, "invalid RAID-Z configuration %u",
+ (uint_t)raidz_parity(rm));
+ break;
+ }
+
+ /* if method is NULL execute the original implementation */
+ if (gen_parity == NULL)
+ return (RAIDZ_ORIGINAL_IMPL);
+
+ gen_parity(rm);
+
+ return (0);
+}
+
+static raidz_rec_f
+reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid,
+ const int nbaddata)
+{
+ if (nbaddata == 1 && parity_valid[CODE_P]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_P]);
+ }
+ return ((raidz_rec_f) NULL);
+}
+
+static raidz_rec_f
+reconstruct_fun_pq_sel(raidz_map_t *rm, const int *parity_valid,
+ const int nbaddata)
+{
+ if (nbaddata == 1) {
+ if (parity_valid[CODE_P]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_P]);
+ } else if (parity_valid[CODE_Q]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_Q]);
+ }
+ } else if (nbaddata == 2 &&
+ parity_valid[CODE_P] && parity_valid[CODE_Q]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
+ }
+ return ((raidz_rec_f) NULL);
+}
+
+static raidz_rec_f
+reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid,
+ const int nbaddata)
+{
+ if (nbaddata == 1) {
+ if (parity_valid[CODE_P]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_P]);
+ } else if (parity_valid[CODE_Q]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_Q]);
+ } else if (parity_valid[CODE_R]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_R]);
+ }
+ } else if (nbaddata == 2) {
+ if (parity_valid[CODE_P] && parity_valid[CODE_Q]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
+ } else if (parity_valid[CODE_P] && parity_valid[CODE_R]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_PR]);
+ } else if (parity_valid[CODE_Q] && parity_valid[CODE_R]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_QR]);
+ }
+ } else if (nbaddata == 3 &&
+ parity_valid[CODE_P] && parity_valid[CODE_Q] &&
+ parity_valid[CODE_R]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_PQR]);
+ }
+ return ((raidz_rec_f) NULL);
+}
+
+/*
+ * Select data reconstruction method for raidz_map
+ * @parity_valid - Parity validity flag
+ * @dt - Failed data index array
+ * @nbaddata - Number of failed data columns
+ */
+int
+vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid,
+ const int *dt, const int nbaddata)
+{
+ raidz_rec_f rec_fn = NULL;
+
+ switch (raidz_parity(rm)) {
+ case PARITY_P:
+ rec_fn = reconstruct_fun_p_sel(rm, parity_valid, nbaddata);
+ break;
+ case PARITY_PQ:
+ rec_fn = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata);
+ break;
+ case PARITY_PQR:
+ rec_fn = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata);
+ break;
+ default:
+ cmn_err(CE_PANIC, "invalid RAID-Z configuration %u",
+ (uint_t)raidz_parity(rm));
+ break;
+ }
+
+ if (rec_fn == NULL)
+ return (RAIDZ_ORIGINAL_IMPL);
+ else
+ return (rec_fn(rm, dt));
+}
+
+const char *raidz_gen_name[] = {
+ "gen_p", "gen_pq", "gen_pqr"
+};
+const char *raidz_rec_name[] = {
+ "rec_p", "rec_q", "rec_r",
+ "rec_pq", "rec_pr", "rec_qr", "rec_pqr"
+};
+
+#if defined(_KERNEL)
+
+#define BENCH_D_COLS (8ULL)
+#define BENCH_COLS (BENCH_D_COLS + PARITY_PQR)
+#define BENCH_ZIO_SIZE (1ULL << SPA_OLD_MAXBLOCKSHIFT) /* 128 kiB */
+#define BENCH_NS MSEC2NSEC(25) /* 25ms */
+
+typedef void (*benchmark_fn)(raidz_map_t *rm, const int fn);
+
+static void
+benchmark_gen_impl(raidz_map_t *rm, const int fn)
+{
+ (void) fn;
+ vdev_raidz_generate_parity(rm);
+}
+
+static void
+benchmark_rec_impl(raidz_map_t *rm, const int fn)
+{
+ static const int rec_tgt[7][3] = {
+ {1, 2, 3}, /* rec_p: bad QR & D[0] */
+ {0, 2, 3}, /* rec_q: bad PR & D[0] */
+ {0, 1, 3}, /* rec_r: bad PQ & D[0] */
+ {2, 3, 4}, /* rec_pq: bad R & D[0][1] */
+ {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
+ {0, 3, 4}, /* rec_qr: bad P & D[0][1] */
+ {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
+ };
+
+ vdev_raidz_reconstruct(rm, rec_tgt[fn], 3);
+}
+
+/*
+ * Benchmarking of all supported implementations (raidz_supp_impl_cnt)
+ * is performed by setting the rm_ops pointer and calling the top level
+ * generate/reconstruct methods of bench_rm.
+ */
+static void
+benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn)
+{
+ uint64_t run_cnt, speed, best_speed = 0;
+ hrtime_t t_start, t_diff;
+ raidz_impl_ops_t *curr_impl;
+ raidz_impl_kstat_t *fstat = &raidz_impl_kstats[raidz_supp_impl_cnt];
+ int impl, i;
+
+ for (impl = 0; impl < raidz_supp_impl_cnt; impl++) {
+ /* set an implementation to benchmark */
+ curr_impl = raidz_supp_impl[impl];
+ bench_rm->rm_ops = curr_impl;
+
+ run_cnt = 0;
+ t_start = gethrtime();
+
+ do {
+ for (i = 0; i < 25; i++, run_cnt++)
+ bench_fn(bench_rm, fn);
+
+ t_diff = gethrtime() - t_start;
+ } while (t_diff < BENCH_NS);
+
+ speed = run_cnt * BENCH_ZIO_SIZE * NANOSEC;
+ speed /= (t_diff * BENCH_COLS);
+
+ if (bench_fn == benchmark_gen_impl)
+ raidz_impl_kstats[impl].gen[fn] = speed;
+ else
+ raidz_impl_kstats[impl].rec[fn] = speed;
+
+ /* Update fastest implementation method */
+ if (speed > best_speed) {
+ best_speed = speed;
+
+ if (bench_fn == benchmark_gen_impl) {
+ fstat->gen[fn] = impl;
+ vdev_raidz_fastest_impl.gen[fn] =
+ curr_impl->gen[fn];
+ } else {
+ fstat->rec[fn] = impl;
+ vdev_raidz_fastest_impl.rec[fn] =
+ curr_impl->rec[fn];
+ }
+ }
+ }
+}
+#endif
+
+/*
+ * Initialize and benchmark all supported implementations.
+ */
+static void
+benchmark_raidz(void)
+{
+ raidz_impl_ops_t *curr_impl;
+ int i, c;
+
+ /* Move supported impl into raidz_supp_impl */
+ for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
+ curr_impl = (raidz_impl_ops_t *)raidz_all_maths[i];
+
+ if (curr_impl->init)
+ curr_impl->init();
+
+ if (curr_impl->is_supported())
+ raidz_supp_impl[c++] = (raidz_impl_ops_t *)curr_impl;
+ }
+ membar_producer(); /* complete raidz_supp_impl[] init */
+ raidz_supp_impl_cnt = c; /* number of supported impl */
+
+#if defined(_KERNEL)
+ zio_t *bench_zio = NULL;
+ raidz_map_t *bench_rm = NULL;
+ uint64_t bench_parity;
+
+ /* Fake a zio and run the benchmark on a warmed up buffer */
+ bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
+ bench_zio->io_offset = 0;
+ bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */
+ bench_zio->io_abd = abd_alloc_linear(BENCH_ZIO_SIZE, B_TRUE);
+ memset(abd_to_buf(bench_zio->io_abd), 0xAA, BENCH_ZIO_SIZE);
+
+ /* Benchmark parity generation methods */
+ for (int fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
+ bench_parity = fn + 1;
+ /* New raidz_map is needed for each generate_p/q/r */
+ bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
+ BENCH_D_COLS + bench_parity, bench_parity);
+
+ benchmark_raidz_impl(bench_rm, fn, benchmark_gen_impl);
+
+ vdev_raidz_map_free(bench_rm);
+ }
+
+ /* Benchmark data reconstruction methods */
+ bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
+ BENCH_COLS, PARITY_PQR);
+
+ for (int fn = 0; fn < RAIDZ_REC_NUM; fn++)
+ benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);
+
+ vdev_raidz_map_free(bench_rm);
+
+ /* cleanup the bench zio */
+ abd_free(bench_zio->io_abd);
+ kmem_free(bench_zio, sizeof (zio_t));
+#else
+ /*
+ * Skip the benchmark in user space to avoid impacting libzpool
+ * consumers (zdb, zhack, zinject, ztest). The last implementation
+ * is assumed to be the fastest and used by default.
+ */
+ memcpy(&vdev_raidz_fastest_impl,
+ raidz_supp_impl[raidz_supp_impl_cnt - 1],
+ sizeof (vdev_raidz_fastest_impl));
+ strcpy(vdev_raidz_fastest_impl.name, "fastest");
+#endif /* _KERNEL */
+}
+
+void
+vdev_raidz_math_init(void)
+{
+ /* Determine the fastest available implementation. */
+ benchmark_raidz();
+
+ /* Finish initialization */
+ atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl);
+ raidz_math_initialized = B_TRUE;
+}
+
+void
+vdev_raidz_math_fini(void)
+{
+ raidz_impl_ops_t const *curr_impl;
+
+ for (int i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
+ curr_impl = raidz_all_maths[i];
+ if (curr_impl->fini)
+ curr_impl->fini();
+ }
+}
+
+static const struct {
+ char *name;
+ uint32_t sel;
+} math_impl_opts[] = {
+ { "cycle", IMPL_CYCLE },
+ { "fastest", IMPL_FASTEST },
+ { "original", IMPL_ORIGINAL },
+ { "scalar", IMPL_SCALAR }
+};
+
+/*
+ * Function sets desired raidz implementation.
+ *
+ * If we are called before init(), user preference will be saved in
+ * user_sel_impl, and applied in later init() call. This occurs when module
+ * parameter is specified on module load. Otherwise, directly update
+ * zfs_vdev_raidz_impl.
+ *
+ * @val Name of raidz implementation to use
+ * @param Unused.
+ */
+int
+vdev_raidz_impl_set(const char *val)
+{
+ int err = -EINVAL;
+ char req_name[RAIDZ_IMPL_NAME_MAX];
+ uint32_t impl = RAIDZ_IMPL_READ(user_sel_impl);
+ size_t i;
+
+ /* sanitize input */
+ i = strnlen(val, RAIDZ_IMPL_NAME_MAX);
+ if (i == 0 || i == RAIDZ_IMPL_NAME_MAX)
+ return (err);
+
+ strlcpy(req_name, val, RAIDZ_IMPL_NAME_MAX);
+ while (i > 0 && !!isspace(req_name[i-1]))
+ i--;
+ req_name[i] = '\0';
+
+ /* Check mandatory options */
+ for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) {
+ if (strcmp(req_name, math_impl_opts[i].name) == 0) {
+ impl = math_impl_opts[i].sel;
+ err = 0;
+ break;
+ }
+ }
+
+ /* check all supported impl if init() was already called */
+ if (err != 0 && raidz_math_initialized) {
+ /* check all supported implementations */
+ for (i = 0; i < raidz_supp_impl_cnt; i++) {
+ if (strcmp(req_name, raidz_supp_impl[i]->name) == 0) {
+ impl = i;
+ err = 0;
+ break;
+ }
+ }
+ }
+
+ if (err == 0) {
+ if (raidz_math_initialized)
+ atomic_swap_32(&zfs_vdev_raidz_impl, impl);
+ else
+ atomic_swap_32(&user_sel_impl, impl);
+ }
+
+ return (err);
+}
+
+#if defined(_KERNEL) && defined(__linux__)
+
+static int
+zfs_vdev_raidz_impl_set(const char *val, zfs_kernel_param_t *kp)
+{
+ return (vdev_raidz_impl_set(val));
+}
+
+static int
+zfs_vdev_raidz_impl_get(char *buffer, zfs_kernel_param_t *kp)
+{
+ int i, cnt = 0;
+ char *fmt;
+ const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
+
+ ASSERT(raidz_math_initialized);
+
+ /* list mandatory options */
+ for (i = 0; i < ARRAY_SIZE(math_impl_opts) - 2; i++) {
+ fmt = (impl == math_impl_opts[i].sel) ? "[%s] " : "%s ";
+ cnt += sprintf(buffer + cnt, fmt, math_impl_opts[i].name);
+ }
+
+ /* list all supported implementations */
+ for (i = 0; i < raidz_supp_impl_cnt; i++) {
+ fmt = (i == impl) ? "[%s] " : "%s ";
+ cnt += sprintf(buffer + cnt, fmt, raidz_supp_impl[i]->name);
+ }
+
+ return (cnt);
+}
+
+module_param_call(zfs_vdev_raidz_impl, zfs_vdev_raidz_impl_set,
+ zfs_vdev_raidz_impl_get, NULL, 0644);
+MODULE_PARM_DESC(zfs_vdev_raidz_impl, "Select raidz implementation.");
+#endif
diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz_math_impl.h b/usr/src/uts/common/fs/zfs/vdev_raidz_math_impl.h
new file mode 100644
index 0000000000..89c2082c4a
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz_math_impl.h
@@ -0,0 +1,1477 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#ifndef _VDEV_RAIDZ_MATH_IMPL_H
+#define _VDEV_RAIDZ_MATH_IMPL_H
+
+#include <sys/types.h>
+
+#define raidz_inline inline __attribute__((always_inline))
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
+/*
+ * Functions calculate multiplication constants for data reconstruction.
+ * Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and
+ * used parity columns for reconstruction.
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ * @coeff output array of coefficients. Array must be provided by
+ * user and must hold minimum MUL_CNT values.
+ */
+static noinline void
+raidz_rec_q_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+
+ coeff[MUL_Q_X] = gf_exp2(255 - (ncols - x - 1));
+}
+
+static noinline void
+raidz_rec_r_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+
+ coeff[MUL_R_X] = gf_exp4(255 - (ncols - x - 1));
+}
+
+static noinline void
+raidz_rec_pq_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+ const unsigned y = tgtidx[TARGET_Y];
+ gf_t a, b, e;
+
+ a = gf_exp2(x + 255 - y);
+ b = gf_exp2(255 - (ncols - x - 1));
+ e = a ^ 0x01;
+
+ coeff[MUL_PQ_X] = gf_div(a, e);
+ coeff[MUL_PQ_Y] = gf_div(b, e);
+}
+
+static noinline void
+raidz_rec_pr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+ const unsigned y = tgtidx[TARGET_Y];
+
+ gf_t a, b, e;
+
+ a = gf_exp4(x + 255 - y);
+ b = gf_exp4(255 - (ncols - x - 1));
+ e = a ^ 0x01;
+
+ coeff[MUL_PR_X] = gf_div(a, e);
+ coeff[MUL_PR_Y] = gf_div(b, e);
+}
+
+static noinline void
+raidz_rec_qr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+ const unsigned y = tgtidx[TARGET_Y];
+
+ gf_t nx, ny, nxxy, nxyy, d;
+
+ nx = gf_exp2(ncols - x - 1);
+ ny = gf_exp2(ncols - y - 1);
+ nxxy = gf_mul(gf_mul(nx, nx), ny);
+ nxyy = gf_mul(gf_mul(nx, ny), ny);
+ d = nxxy ^ nxyy;
+
+ coeff[MUL_QR_XQ] = ny;
+ coeff[MUL_QR_X] = gf_div(ny, d);
+ coeff[MUL_QR_YQ] = nx;
+ coeff[MUL_QR_Y] = gf_div(nx, d);
+}
+
+static noinline void
+raidz_rec_pqr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+ const unsigned y = tgtidx[TARGET_Y];
+ const unsigned z = tgtidx[TARGET_Z];
+
+ gf_t nx, ny, nz, nxx, nyy, nzz, nyyz, nyzz, xd, yd;
+
+ nx = gf_exp2(ncols - x - 1);
+ ny = gf_exp2(ncols - y - 1);
+ nz = gf_exp2(ncols - z - 1);
+
+ nxx = gf_exp4(ncols - x - 1);
+ nyy = gf_exp4(ncols - y - 1);
+ nzz = gf_exp4(ncols - z - 1);
+
+ nyyz = gf_mul(gf_mul(ny, nz), ny);
+ nyzz = gf_mul(nzz, ny);
+
+ xd = gf_mul(nxx, ny) ^ gf_mul(nx, nyy) ^ nyyz ^
+ gf_mul(nxx, nz) ^ gf_mul(nzz, nx) ^ nyzz;
+
+ yd = gf_inv(ny ^ nz);
+
+ coeff[MUL_PQR_XP] = gf_div(nyyz ^ nyzz, xd);
+ coeff[MUL_PQR_XQ] = gf_div(nyy ^ nzz, xd);
+ coeff[MUL_PQR_XR] = gf_div(ny ^ nz, xd);
+ coeff[MUL_PQR_YU] = nx;
+ coeff[MUL_PQR_YP] = gf_mul(nz, yd);
+ coeff[MUL_PQR_YQ] = yd;
+}
+
+/*
+ * Method for zeroing a buffer (can be implemented using SIMD).
+ * This method is used by multiple for gen/rec functions.
+ *
+ * @dc Destination buffer
+ * @dsize Destination buffer size
+ * @private Unused
+ */
+static int
+raidz_zero_abd_cb(void *dc, size_t dsize, void *private)
+{
+ v_t *dst = (v_t *)dc;
+ size_t i;
+
+ ZERO_DEFINE();
+
+ (void) private; /* unused */
+
+ ZERO(ZERO_D);
+
+ for (i = 0; i < dsize / sizeof (v_t); i += (2 * ZERO_STRIDE)) {
+ STORE(dst + i, ZERO_D);
+ STORE(dst + i + ZERO_STRIDE, ZERO_D);
+ }
+
+ return (0);
+}
+
+#define raidz_zero(dabd, size) \
+{ \
+ abd_iterate_func(dabd, 0, size, raidz_zero_abd_cb, NULL); \
+}
+
+/*
+ * Method for copying two buffers (can be implemented using SIMD).
+ * This method is used by multiple for gen/rec functions.
+ *
+ * @dc Destination buffer
+ * @sc Source buffer
+ * @dsize Destination buffer size
+ * @ssize Source buffer size
+ * @private Unused
+ */
+static int
+raidz_copy_abd_cb(void *dc, void *sc, size_t size, void *private)
+{
+ v_t *dst = (v_t *)dc;
+ const v_t *src = (v_t *)sc;
+ size_t i;
+
+ COPY_DEFINE();
+
+ (void) private; /* unused */
+
+ for (i = 0; i < size / sizeof (v_t); i += (2 * COPY_STRIDE)) {
+ LOAD(src + i, COPY_D);
+ STORE(dst + i, COPY_D);
+
+ LOAD(src + i + COPY_STRIDE, COPY_D);
+ STORE(dst + i + COPY_STRIDE, COPY_D);
+ }
+
+ return (0);
+}
+
+
+#define raidz_copy(dabd, sabd, size) \
+{ \
+ abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_copy_abd_cb, NULL);\
+}
+
+/*
+ * Method for adding (XORing) two buffers.
+ * Source and destination are XORed together and result is stored in
+ * destination buffer. This method is used by multiple for gen/rec functions.
+ *
+ * @dc Destination buffer
+ * @sc Source buffer
+ * @dsize Destination buffer size
+ * @ssize Source buffer size
+ * @private Unused
+ */
+static int
+raidz_add_abd_cb(void *dc, void *sc, size_t size, void *private)
+{
+ v_t *dst = (v_t *)dc;
+ const v_t *src = (v_t *)sc;
+ size_t i;
+
+ ADD_DEFINE();
+
+ (void) private; /* unused */
+
+ for (i = 0; i < size / sizeof (v_t); i += (2 * ADD_STRIDE)) {
+ LOAD(dst + i, ADD_D);
+ XOR_ACC(src + i, ADD_D);
+ STORE(dst + i, ADD_D);
+
+ LOAD(dst + i + ADD_STRIDE, ADD_D);
+ XOR_ACC(src + i + ADD_STRIDE, ADD_D);
+ STORE(dst + i + ADD_STRIDE, ADD_D);
+ }
+
+ return (0);
+}
+
+#define raidz_add(dabd, sabd, size) \
+{ \
+ abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_add_abd_cb, NULL);\
+}
+
+/*
+ * Method for multiplying a buffer with a constant in GF(2^8).
+ * Symbols from buffer are multiplied by a constant and result is stored
+ * back in the same buffer.
+ *
+ * @dc In/Out data buffer.
+ * @size Size of the buffer
+ * @private pointer to the multiplication constant (unsigned)
+ */
+static int
+raidz_mul_abd_cb(void *dc, size_t size, void *private)
+{
+ const unsigned mul = *((unsigned *)private);
+ v_t *d = (v_t *)dc;
+ size_t i;
+
+ MUL_DEFINE();
+
+ for (i = 0; i < size / sizeof (v_t); i += (2 * MUL_STRIDE)) {
+ LOAD(d + i, MUL_D);
+ MUL(mul, MUL_D);
+ STORE(d + i, MUL_D);
+
+ LOAD(d + i + MUL_STRIDE, MUL_D);
+ MUL(mul, MUL_D);
+ STORE(d + i + MUL_STRIDE, MUL_D);
+ }
+
+ return (0);
+}
+
+
+/*
+ * Syndrome generation/update macros
+ *
+ * Require LOAD(), XOR(), STORE(), MUL2(), and MUL4() macros
+ */
+#define P_D_SYNDROME(D, T, t) \
+{ \
+ LOAD((t), T); \
+ XOR(D, T); \
+ STORE((t), T); \
+}
+
+#define Q_D_SYNDROME(D, T, t) \
+{ \
+ LOAD((t), T); \
+ MUL2(T); \
+ XOR(D, T); \
+ STORE((t), T); \
+}
+
+#define Q_SYNDROME(T, t) \
+{ \
+ LOAD((t), T); \
+ MUL2(T); \
+ STORE((t), T); \
+}
+
+#define R_D_SYNDROME(D, T, t) \
+{ \
+ LOAD((t), T); \
+ MUL4(T); \
+ XOR(D, T); \
+ STORE((t), T); \
+}
+
+#define R_SYNDROME(T, t) \
+{ \
+ LOAD((t), T); \
+ MUL4(T); \
+ STORE((t), T); \
+}
+
+
+/*
+ * PARITY CALCULATION
+ *
+ * Macros *_SYNDROME are used for parity/syndrome calculation.
+ * *_D_SYNDROME() macros are used to calculate syndrome between 0 and
+ * length of data column, and *_SYNDROME() macros are only for updating
+ * the parity/syndrome if data column is shorter.
+ *
+ * P parity is calculated using raidz_add_abd().
+ */
+
+/*
+ * Generate P parity (RAIDZ1)
+ *
+ * @rm RAIDZ map
+ */
+static raidz_inline void
+raidz_generate_p_impl(raidz_map_t * const rm)
+{
+ size_t c;
+ const size_t ncols = raidz_ncols(rm);
+ const size_t psize = rm->rm_col[CODE_P].rc_size;
+ abd_t *pabd = rm->rm_col[CODE_P].rc_abd;
+ size_t size;
+ abd_t *dabd;
+
+ raidz_math_begin();
+
+ /* start with first data column */
+ raidz_copy(pabd, rm->rm_col[1].rc_abd, psize);
+
+ for (c = 2; c < ncols; c++) {
+ dabd = rm->rm_col[c].rc_abd;
+ size = rm->rm_col[c].rc_size;
+
+ /* add data column */
+ raidz_add(pabd, dabd, size);
+ }
+
+ raidz_math_end();
+}
+
+
+/*
+ * Generate PQ parity (RAIDZ2)
+ * The function is called per data column.
+ *
+ * @c array of pointers to parity (code) columns
+ * @dc pointer to data column
+ * @csize size of parity columns
+ * @dsize size of data column
+ */
+static void
+raidz_gen_pq_add(void **c, const void *dc, const size_t csize,
+ const size_t dsize)
+{
+ v_t *p = (v_t *)c[0];
+ v_t *q = (v_t *)c[1];
+ const v_t *d = (const v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const qend = q + (csize / sizeof (v_t));
+
+ GEN_PQ_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += GEN_PQ_STRIDE, p += GEN_PQ_STRIDE,
+ q += GEN_PQ_STRIDE) {
+ LOAD(d, GEN_PQ_D);
+ P_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, p);
+ Q_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, q);
+ }
+ for (; q < qend; q += GEN_PQ_STRIDE) {
+ Q_SYNDROME(GEN_PQ_C, q);
+ }
+}
+
+
+/*
+ * Generate PQ parity (RAIDZ2)
+ *
+ * @rm RAIDZ map
+ */
+static raidz_inline void
+raidz_generate_pq_impl(raidz_map_t * const rm)
+{
+ size_t c;
+ const size_t ncols = raidz_ncols(rm);
+ const size_t csize = rm->rm_col[CODE_P].rc_size;
+ size_t dsize;
+ abd_t *dabd;
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_Q].rc_abd
+ };
+
+ raidz_math_begin();
+
+ raidz_copy(cabds[CODE_P], rm->rm_col[2].rc_abd, csize);
+ raidz_copy(cabds[CODE_Q], rm->rm_col[2].rc_abd, csize);
+
+ for (c = 3; c < ncols; c++) {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+
+ abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 2,
+ raidz_gen_pq_add);
+ }
+
+ raidz_math_end();
+}
+
+
+/*
+ * Generate PQR parity (RAIDZ3)
+ * The function is called per data column.
+ *
+ * @c array of pointers to parity (code) columns
+ * @dc pointer to data column
+ * @csize size of parity columns
+ * @dsize size of data column
+ */
+static void
+raidz_gen_pqr_add(void **c, const void *dc, const size_t csize,
+ const size_t dsize)
+{
+ v_t *p = (v_t *)c[0];
+ v_t *q = (v_t *)c[1];
+ v_t *r = (v_t *)c[CODE_R];
+ const v_t *d = (const v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const qend = q + (csize / sizeof (v_t));
+
+ GEN_PQR_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += GEN_PQR_STRIDE, p += GEN_PQR_STRIDE,
+ q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
+ LOAD(d, GEN_PQR_D);
+ P_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, p);
+ Q_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, q);
+ R_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, r);
+ }
+ for (; q < qend; q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
+ Q_SYNDROME(GEN_PQR_C, q);
+ R_SYNDROME(GEN_PQR_C, r);
+ }
+}
+
+
+/*
+ * Generate PQR parity (RAIDZ2)
+ *
+ * @rm RAIDZ map
+ */
+static raidz_inline void
+raidz_generate_pqr_impl(raidz_map_t * const rm)
+{
+ size_t c;
+ const size_t ncols = raidz_ncols(rm);
+ const size_t csize = rm->rm_col[CODE_P].rc_size;
+ size_t dsize;
+ abd_t *dabd;
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_Q].rc_abd,
+ rm->rm_col[CODE_R].rc_abd
+ };
+
+ raidz_math_begin();
+
+ raidz_copy(cabds[CODE_P], rm->rm_col[3].rc_abd, csize);
+ raidz_copy(cabds[CODE_Q], rm->rm_col[3].rc_abd, csize);
+ raidz_copy(cabds[CODE_R], rm->rm_col[3].rc_abd, csize);
+
+ for (c = 4; c < ncols; c++) {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+
+ abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 3,
+ raidz_gen_pqr_add);
+ }
+
+ raidz_math_end();
+}
+
+
+/*
+ * DATA RECONSTRUCTION
+ *
+ * Data reconstruction process consists of two phases:
+ * - Syndrome calculation
+ * - Data reconstruction
+ *
+ * Syndrome is calculated by generating parity using available data columns
+ * and zeros in places of erasure. Existing parity is added to corresponding
+ * syndrome value to obtain the [P|Q|R]syn values from equation:
+ * P = Psyn + Dx + Dy + Dz
+ * Q = Qsyn + 2^x * Dx + 2^y * Dy + 2^z * Dz
+ * R = Rsyn + 4^x * Dx + 4^y * Dy + 4^z * Dz
+ *
+ * For data reconstruction phase, the corresponding equations are solved
+ * for missing data (Dx, Dy, Dz). This generally involves multiplying known
+ * symbols by an coefficient and adding them together. The multiplication
+ * constant coefficients are calculated ahead of the operation in
+ * raidz_rec_[q|r|pq|pq|qr|pqr]_coeff() functions.
+ *
+ * IMPLEMENTATION NOTE: RAID-Z block can have complex geometry, with "big"
+ * and "short" columns.
+ * For this reason, reconstruction is performed in minimum of
+ * two steps. First, from offset 0 to short_size, then from short_size to
+ * short_size. Calculation functions REC_[*]_BLOCK() are implemented to work
+ * over both ranges. The split also enables removal of conditional expressions
+ * from loop bodies, improving throughput of SIMD implementations.
+ * For the best performance, all functions marked with raidz_inline attribute
+ * must be inlined by compiler.
+ *
+ * parity data
+ * columns columns
+ * <----------> <------------------>
+ * x y <----+ missing columns (x, y)
+ * | |
+ * +---+---+---+---+-v-+---+-v-+---+ ^ 0
+ * | | | | | | | | | |
+ * | | | | | | | | | |
+ * | P | Q | R | D | D | D | D | D | |
+ * | | | | 0 | 1 | 2 | 3 | 4 | |
+ * | | | | | | | | | v
+ * | | | | | +---+---+---+ ^ short_size
+ * | | | | | | |
+ * +---+---+---+---+---+ v big_size
+ * <------------------> <---------->
+ * big columns short columns
+ *
+ */
+
+
+
+
+/*
+ * Reconstruct single data column using P parity
+ *
+ * @syn_method raidz_add_abd()
+ * @rec_method not applicable
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_p_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ size_t size;
+ abd_t *dabd;
+
+ raidz_math_begin();
+
+ /* copy P into target */
+ raidz_copy(xabd, rm->rm_col[CODE_P].rc_abd, xsize);
+
+ /* generate p_syndrome */
+ for (c = firstdc; c < ncols; c++) {
+ if (c == x)
+ continue;
+
+ dabd = rm->rm_col[c].rc_abd;
+ size = MIN(rm->rm_col[c].rc_size, xsize);
+
+ raidz_add(xabd, dabd, size);
+ }
+
+ raidz_math_end();
+
+ return (1 << CODE_P);
+}
+
+
+/*
+ * Generate Q syndrome (Qsyn)
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @xsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_q_abd(void **xc, const void *dc, const size_t xsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)xc[TARGET_X];
+ const v_t *d = (const v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const xend = x + (xsize / sizeof (v_t));
+
+ SYN_Q_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
+ LOAD(d, SYN_Q_D);
+ Q_D_SYNDROME(SYN_Q_D, SYN_Q_X, x);
+ }
+ for (; x < xend; x += SYN_STRIDE) {
+ Q_SYNDROME(SYN_Q_X, x);
+ }
+}
+
+
+/*
+ * Reconstruct single data column using Q parity
+ *
+ * @syn_method raidz_add_abd()
+ * @rec_method raidz_mul_abd_cb()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ const size_t xsize = rm->rm_col[x].rc_size;
+ abd_t *tabds[] = { xabd };
+
+ unsigned coeff[MUL_CNT];
+ raidz_rec_q_coeff(rm, tgtidx, coeff);
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
+ raidz_syn_q_abd);
+ }
+
+ /* add Q to the syndrome */
+ raidz_add(xabd, rm->rm_col[CODE_Q].rc_abd, xsize);
+
+ /* transform the syndrome */
+ abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void*) coeff);
+
+ raidz_math_end();
+
+ return (1 << CODE_Q);
+}
+
+
+/*
+ * Generate R syndrome (Rsyn)
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_r_abd(void **xc, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)xc[TARGET_X];
+ const v_t *d = (const v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+
+ SYN_R_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
+ LOAD(d, SYN_R_D);
+ R_D_SYNDROME(SYN_R_D, SYN_R_X, x);
+ }
+ for (; x < xend; x += SYN_STRIDE) {
+ R_SYNDROME(SYN_R_X, x);
+ }
+}
+
+
+/*
+ * Reconstruct single data column using R parity
+ *
+ * @syn_method raidz_add_abd()
+ * @rec_method raidz_mul_abd_cb()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_r_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *tabds[] = { xabd };
+
+ unsigned coeff[MUL_CNT];
+ raidz_rec_r_coeff(rm, tgtidx, coeff);
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ }
+
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
+ raidz_syn_r_abd);
+ }
+
+ /* add R to the syndrome */
+ raidz_add(xabd, rm->rm_col[CODE_R].rc_abd, xsize);
+
+ /* transform the syndrome */
+ abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void *)coeff);
+
+ raidz_math_end();
+
+ return (1 << CODE_R);
+}
+
+
+/*
+ * Generate P and Q syndromes
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_pq_abd(void **tc, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)tc[TARGET_X];
+ v_t *y = (v_t *)tc[TARGET_Y];
+ const v_t *d = (const v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const yend = y + (tsize / sizeof (v_t));
+
+ SYN_PQ_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
+ LOAD(d, SYN_PQ_D);
+ P_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, x);
+ Q_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, y);
+ }
+ for (; y < yend; y += SYN_STRIDE) {
+ Q_SYNDROME(SYN_PQ_X, y);
+ }
+}
+
+/*
+ * Reconstruct data using PQ parity and PQ syndromes
+ *
+ * @tc syndrome/result columns
+ * @tsize size of syndrome/result columns
+ * @c parity columns
+ * @mul array of multiplication constants
+ */
+static void
+raidz_rec_pq_abd(void **tc, const size_t tsize, void **c,
+ const unsigned *mul)
+{
+ v_t *x = (v_t *)tc[TARGET_X];
+ v_t *y = (v_t *)tc[TARGET_Y];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *p = (v_t *)c[CODE_P];
+ const v_t *q = (v_t *)c[CODE_Q];
+
+ REC_PQ_DEFINE();
+
+ for (; x < xend; x += REC_PQ_STRIDE, y += REC_PQ_STRIDE,
+ p += REC_PQ_STRIDE, q += REC_PQ_STRIDE) {
+ LOAD(x, REC_PQ_X);
+ LOAD(y, REC_PQ_Y);
+
+ XOR_ACC(p, REC_PQ_X);
+ XOR_ACC(q, REC_PQ_Y);
+
+ /* Save Pxy */
+ COPY(REC_PQ_X, REC_PQ_T);
+
+ /* Calc X */
+ MUL(mul[MUL_PQ_X], REC_PQ_X);
+ MUL(mul[MUL_PQ_Y], REC_PQ_Y);
+ XOR(REC_PQ_Y, REC_PQ_X);
+ STORE(x, REC_PQ_X);
+
+ /* Calc Y */
+ XOR(REC_PQ_T, REC_PQ_X);
+ STORE(y, REC_PQ_X);
+ }
+}
+
+
+/*
+ * Reconstruct two data columns using PQ parity
+ *
+ * @syn_method raidz_syn_pq_abd()
+ * @rec_method raidz_rec_pq_abd()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t y = tgtidx[TARGET_Y];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ const size_t ysize = rm->rm_col[y].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *yabd = rm->rm_col[y].rc_abd;
+ abd_t *tabds[2] = { xabd, yabd };
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_Q].rc_abd
+ };
+
+ unsigned coeff[MUL_CNT];
+ raidz_rec_pq_coeff(rm, tgtidx, coeff);
+
+ /*
+ * Check if some of targets is shorter then others
+ * In this case, shorter target needs to be replaced with
+ * new buffer so that syndrome can be calculated.
+ */
+ if (ysize < xsize) {
+ yabd = abd_alloc(xsize, B_FALSE);
+ tabds[1] = yabd;
+ }
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ raidz_zero(yabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x || c == y) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
+ raidz_syn_pq_abd);
+ }
+
+ abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pq_abd, coeff);
+
+ /* Copy shorter targets back to the original abd buffer */
+ if (ysize < xsize)
+ raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+
+ raidz_math_end();
+
+ if (ysize < xsize)
+ abd_free(yabd);
+
+ return ((1 << CODE_P) | (1 << CODE_Q));
+}
+
+
+/*
+ * Generate P and R syndromes
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_pr_abd(void **c, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)c[TARGET_X];
+ v_t *y = (v_t *)c[TARGET_Y];
+ const v_t *d = (const v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const yend = y + (tsize / sizeof (v_t));
+
+ SYN_PR_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
+ LOAD(d, SYN_PR_D);
+ P_D_SYNDROME(SYN_PR_D, SYN_PR_X, x);
+ R_D_SYNDROME(SYN_PR_D, SYN_PR_X, y);
+ }
+ for (; y < yend; y += SYN_STRIDE) {
+ R_SYNDROME(SYN_PR_X, y);
+ }
+}
+
+/*
+ * Reconstruct data using PR parity and PR syndromes
+ *
+ * @tc syndrome/result columns
+ * @tsize size of syndrome/result columns
+ * @c parity columns
+ * @mul array of multiplication constants
+ */
+static void
+raidz_rec_pr_abd(void **t, const size_t tsize, void **c,
+ const unsigned *mul)
+{
+ v_t *x = (v_t *)t[TARGET_X];
+ v_t *y = (v_t *)t[TARGET_Y];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *p = (v_t *)c[CODE_P];
+ const v_t *q = (v_t *)c[CODE_Q];
+
+ REC_PR_DEFINE();
+
+ for (; x < xend; x += REC_PR_STRIDE, y += REC_PR_STRIDE,
+ p += REC_PR_STRIDE, q += REC_PR_STRIDE) {
+ LOAD(x, REC_PR_X);
+ LOAD(y, REC_PR_Y);
+ XOR_ACC(p, REC_PR_X);
+ XOR_ACC(q, REC_PR_Y);
+
+ /* Save Pxy */
+ COPY(REC_PR_X, REC_PR_T);
+
+ /* Calc X */
+ MUL(mul[MUL_PR_X], REC_PR_X);
+ MUL(mul[MUL_PR_Y], REC_PR_Y);
+ XOR(REC_PR_Y, REC_PR_X);
+ STORE(x, REC_PR_X);
+
+ /* Calc Y */
+ XOR(REC_PR_T, REC_PR_X);
+ STORE(y, REC_PR_X);
+ }
+}
+
+
+/*
+ * Reconstruct two data columns using PR parity
+ *
+ * @syn_method raidz_syn_pr_abd()
+ * @rec_method raidz_rec_pr_abd()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[0];
+ const size_t y = tgtidx[1];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ const size_t ysize = rm->rm_col[y].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *yabd = rm->rm_col[y].rc_abd;
+ abd_t *tabds[2] = { xabd, yabd };
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_R].rc_abd
+ };
+ unsigned coeff[MUL_CNT];
+ raidz_rec_pr_coeff(rm, tgtidx, coeff);
+
+ /*
+ * Check if some of targets are shorter then others.
+ * They need to be replaced with a new buffer so that syndrome can
+ * be calculated on full length.
+ */
+ if (ysize < xsize) {
+ yabd = abd_alloc(xsize, B_FALSE);
+ tabds[1] = yabd;
+ }
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ raidz_zero(yabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x || c == y) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
+ raidz_syn_pr_abd);
+ }
+
+ abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pr_abd, coeff);
+
+ /*
+ * Copy shorter targets back to the original abd buffer
+ */
+ if (ysize < xsize)
+ raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+
+ raidz_math_end();
+
+ if (ysize < xsize)
+ abd_free(yabd);
+
+ return ((1 << CODE_P) | (1 << CODE_Q));
+}
+
+
+/*
+ * Generate Q and R syndromes
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_qr_abd(void **c, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)c[TARGET_X];
+ v_t *y = (v_t *)c[TARGET_Y];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *d = (const v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+
+ SYN_QR_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
+ LOAD(d, SYN_PQ_D);
+ Q_D_SYNDROME(SYN_QR_D, SYN_QR_X, x);
+ R_D_SYNDROME(SYN_QR_D, SYN_QR_X, y);
+ }
+ for (; x < xend; x += SYN_STRIDE, y += SYN_STRIDE) {
+ Q_SYNDROME(SYN_QR_X, x);
+ R_SYNDROME(SYN_QR_X, y);
+ }
+}
+
+
+/*
+ * Reconstruct data using QR parity and QR syndromes
+ *
+ * @tc syndrome/result columns
+ * @tsize size of syndrome/result columns
+ * @c parity columns
+ * @mul array of multiplication constants
+ */
+static void
+raidz_rec_qr_abd(void **t, const size_t tsize, void **c,
+ const unsigned *mul)
+{
+ v_t *x = (v_t *)t[TARGET_X];
+ v_t *y = (v_t *)t[TARGET_Y];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *p = (v_t *)c[CODE_P];
+ const v_t *q = (v_t *)c[CODE_Q];
+
+ REC_QR_DEFINE();
+
+ for (; x < xend; x += REC_QR_STRIDE, y += REC_QR_STRIDE,
+ p += REC_QR_STRIDE, q += REC_QR_STRIDE) {
+ LOAD(x, REC_QR_X);
+ LOAD(y, REC_QR_Y);
+
+ XOR_ACC(p, REC_QR_X);
+ XOR_ACC(q, REC_QR_Y);
+
+ /* Save Pxy */
+ COPY(REC_QR_X, REC_QR_T);
+
+ /* Calc X */
+ MUL(mul[MUL_QR_XQ], REC_QR_X); /* X = Q * xqm */
+ XOR(REC_QR_Y, REC_QR_X); /* X = R ^ X */
+ MUL(mul[MUL_QR_X], REC_QR_X); /* X = X * xm */
+ STORE(x, REC_QR_X);
+
+ /* Calc Y */
+ MUL(mul[MUL_QR_YQ], REC_QR_T); /* X = Q * xqm */
+ XOR(REC_QR_Y, REC_QR_T); /* X = R ^ X */
+ MUL(mul[MUL_QR_Y], REC_QR_T); /* X = X * xm */
+ STORE(y, REC_QR_T);
+ }
+}
+
+
+/*
+ * Reconstruct two data columns using QR parity
+ *
+ * @syn_method raidz_syn_qr_abd()
+ * @rec_method raidz_rec_qr_abd()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t y = tgtidx[TARGET_Y];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ const size_t ysize = rm->rm_col[y].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *yabd = rm->rm_col[y].rc_abd;
+ abd_t *tabds[2] = { xabd, yabd };
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_Q].rc_abd,
+ rm->rm_col[CODE_R].rc_abd
+ };
+ unsigned coeff[MUL_CNT];
+ raidz_rec_qr_coeff(rm, tgtidx, coeff);
+
+ /*
+ * Check if some of targets is shorter then others
+ * In this case, shorter target needs to be replaced with
+ * new buffer so that syndrome can be calculated.
+ */
+ if (ysize < xsize) {
+ yabd = abd_alloc(xsize, B_FALSE);
+ tabds[1] = yabd;
+ }
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ raidz_zero(yabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x || c == y) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
+ raidz_syn_qr_abd);
+ }
+
+ abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_qr_abd, coeff);
+
+ /*
+ * Copy shorter targets back to the original abd buffer
+ */
+ if (ysize < xsize)
+ raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+
+ raidz_math_end();
+
+ if (ysize < xsize)
+ abd_free(yabd);
+
+
+ return ((1 << CODE_Q) | (1 << CODE_R));
+}
+
+
+/*
+ * Generate P, Q, and R syndromes
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_pqr_abd(void **c, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)c[TARGET_X];
+ v_t *y = (v_t *)c[TARGET_Y];
+ v_t *z = (v_t *)c[TARGET_Z];
+ const v_t * const yend = y + (tsize / sizeof (v_t));
+ const v_t *d = (const v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+
+ SYN_PQR_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE,
+ z += SYN_STRIDE) {
+ LOAD(d, SYN_PQR_D);
+ P_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, x)
+ Q_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, y);
+ R_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, z);
+ }
+ for (; y < yend; y += SYN_STRIDE, z += SYN_STRIDE) {
+ Q_SYNDROME(SYN_PQR_X, y);
+ R_SYNDROME(SYN_PQR_X, z);
+ }
+}
+
+
+/*
+ * Reconstruct data using PRQ parity and PQR syndromes
+ *
+ * @tc syndrome/result columns
+ * @tsize size of syndrome/result columns
+ * @c parity columns
+ * @mul array of multiplication constants
+ */
+static void
+raidz_rec_pqr_abd(void **t, const size_t tsize, void **c,
+ const unsigned * const mul)
+{
+ v_t *x = (v_t *)t[TARGET_X];
+ v_t *y = (v_t *)t[TARGET_Y];
+ v_t *z = (v_t *)t[TARGET_Z];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *p = (v_t *)c[CODE_P];
+ const v_t *q = (v_t *)c[CODE_Q];
+ const v_t *r = (v_t *)c[CODE_R];
+
+ REC_PQR_DEFINE();
+
+ for (; x < xend; x += REC_PQR_STRIDE, y += REC_PQR_STRIDE,
+ z += REC_PQR_STRIDE, p += REC_PQR_STRIDE, q += REC_PQR_STRIDE,
+ r += REC_PQR_STRIDE) {
+ LOAD(x, REC_PQR_X);
+ LOAD(y, REC_PQR_Y);
+ LOAD(z, REC_PQR_Z);
+
+ XOR_ACC(p, REC_PQR_X);
+ XOR_ACC(q, REC_PQR_Y);
+ XOR_ACC(r, REC_PQR_Z);
+
+ /* Save Pxyz and Qxyz */
+ COPY(REC_PQR_X, REC_PQR_XS);
+ COPY(REC_PQR_Y, REC_PQR_YS);
+
+ /* Calc X */
+ MUL(mul[MUL_PQR_XP], REC_PQR_X); /* Xp = Pxyz * xp */
+ MUL(mul[MUL_PQR_XQ], REC_PQR_Y); /* Xq = Qxyz * xq */
+ XOR(REC_PQR_Y, REC_PQR_X);
+ MUL(mul[MUL_PQR_XR], REC_PQR_Z); /* Xr = Rxyz * xr */
+ XOR(REC_PQR_Z, REC_PQR_X); /* X = Xp + Xq + Xr */
+ STORE(x, REC_PQR_X);
+
+ /* Calc Y */
+ XOR(REC_PQR_X, REC_PQR_XS); /* Pyz = Pxyz + X */
+ MUL(mul[MUL_PQR_YU], REC_PQR_X); /* Xq = X * upd_q */
+ XOR(REC_PQR_X, REC_PQR_YS); /* Qyz = Qxyz + Xq */
+ COPY(REC_PQR_XS, REC_PQR_X); /* restore Pyz */
+ MUL(mul[MUL_PQR_YP], REC_PQR_X); /* Yp = Pyz * yp */
+ MUL(mul[MUL_PQR_YQ], REC_PQR_YS); /* Yq = Qyz * yq */
+ XOR(REC_PQR_X, REC_PQR_YS); /* Y = Yp + Yq */
+ STORE(y, REC_PQR_YS);
+
+ /* Calc Z */
+ XOR(REC_PQR_XS, REC_PQR_YS); /* Z = Pz = Pyz + Y */
+ STORE(z, REC_PQR_YS);
+ }
+}
+
+
+/*
+ * Reconstruct three data columns using PQR parity
+ *
+ * @syn_method raidz_syn_pqr_abd()
+ * @rec_method raidz_rec_pqr_abd()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t y = tgtidx[TARGET_Y];
+ const size_t z = tgtidx[TARGET_Z];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ const size_t ysize = rm->rm_col[y].rc_size;
+ const size_t zsize = rm->rm_col[z].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *yabd = rm->rm_col[y].rc_abd;
+ abd_t *zabd = rm->rm_col[z].rc_abd;
+ abd_t *tabds[] = { xabd, yabd, zabd };
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_Q].rc_abd,
+ rm->rm_col[CODE_R].rc_abd
+ };
+ unsigned coeff[MUL_CNT];
+ raidz_rec_pqr_coeff(rm, tgtidx, coeff);
+
+ /*
+ * Check if some of targets is shorter then others
+ * In this case, shorter target needs to be replaced with
+ * new buffer so that syndrome can be calculated.
+ */
+ if (ysize < xsize) {
+ yabd = abd_alloc(xsize, B_FALSE);
+ tabds[1] = yabd;
+ }
+ if (zsize < xsize) {
+ zabd = abd_alloc(xsize, B_FALSE);
+ tabds[2] = zabd;
+ }
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(zabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ raidz_zero(yabd, xsize);
+ raidz_zero(zabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x || c == y || c == z) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 3,
+ raidz_syn_pqr_abd);
+ }
+
+ abd_raidz_rec_iterate(cabds, tabds, xsize, 3, raidz_rec_pqr_abd, coeff);
+
+ /*
+ * Copy shorter targets back to the original abd buffer
+ */
+ if (ysize < xsize)
+ raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+ if (zsize < xsize)
+ raidz_copy(rm->rm_col[z].rc_abd, zabd, zsize);
+
+ raidz_math_end();
+
+ if (ysize < xsize)
+ abd_free(yabd);
+ if (zsize < xsize)
+ abd_free(zabd);
+
+ return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
+}
+
+#endif /* _VDEV_RAIDZ_MATH_IMPL_H */
diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz_math_scalar.c b/usr/src/uts/common/fs/zfs/vdev_raidz_math_scalar.c
new file mode 100644
index 0000000000..cd742e146c
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz_math_scalar.c
@@ -0,0 +1,337 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#include <sys/vdev_raidz_impl.h>
+
+/*
+ * Provide native CPU scalar routines.
+ * Support 32bit and 64bit CPUs.
+ */
+#if ((~(0x0ULL)) >> 24) == 0xffULL
+#define ELEM_SIZE 4
+typedef uint32_t iv_t;
+#elif ((~(0x0ULL)) >> 56) == 0xffULL
+#define ELEM_SIZE 8
+typedef uint64_t iv_t;
+#endif
+
+/*
+ * Vector type used in scalar implementation
+ *
+ * The union is expected to be of native CPU register size. Since addition
+ * uses XOR operation, it can be performed an all byte elements at once.
+ * Multiplication requires per byte access.
+ */
+typedef union {
+ iv_t e;
+ uint8_t b[ELEM_SIZE];
+} v_t;
+
+/*
+ * Precomputed lookup tables for multiplication by a constant
+ *
+ * Reconstruction path requires multiplication by a constant factors. Instead of
+ * performing two step lookup (log & exp tables), a direct lookup can be used
+ * instead. Multiplication of element 'a' by a constant 'c' is obtained as:
+ *
+ * r = vdev_raidz_mul_lt[c_log][a];
+ *
+ * where c_log = vdev_raidz_log2[c]. Log of coefficient factors is used because
+ * they are faster to obtain while solving the syndrome equations.
+ *
+ * PERFORMANCE NOTE:
+ * Even though the complete lookup table uses 64kiB, only relatively small
+ * portion of it is used at the same time. Following shows number of accessed
+ * bytes for different cases:
+ * - 1 failed disk: 256B (1 mul. coefficient)
+ * - 2 failed disks: 512B (2 mul. coefficients)
+ * - 3 failed disks: 1536B (6 mul. coefficients)
+ *
+ * Size of actually accessed lookup table regions is only larger for
+ * reconstruction of 3 failed disks, when compared to traditional log/exp
+ * method. But since the result is obtained in one lookup step performance is
+ * doubled.
+ */
+static uint8_t vdev_raidz_mul_lt[256][256] __attribute__((aligned(256)));
+
+static void
+raidz_init_scalar(void)
+{
+ int c, i;
+ for (c = 0; c < 256; c++)
+ for (i = 0; i < 256; i++)
+ vdev_raidz_mul_lt[c][i] = gf_mul(c, i);
+
+}
+
+#define PREFETCHNTA(ptr, offset) {}
+#define PREFETCH(ptr, offset) {}
+
+#define XOR_ACC(src, acc) acc.e ^= ((v_t *)src)[0].e
+#define XOR(src, acc) acc.e ^= src.e
+#define ZERO(acc) acc.e = 0
+#define COPY(src, dst) dst = src
+#define LOAD(src, val) val = ((v_t *)src)[0]
+#define STORE(dst, val) ((v_t *)dst)[0] = val
+
+/*
+ * Constants used for optimized multiplication by 2.
+ */
+static const struct {
+ iv_t mod;
+ iv_t mask;
+ iv_t msb;
+} scalar_mul2_consts = {
+#if ELEM_SIZE == 8
+ .mod = 0x1d1d1d1d1d1d1d1dULL,
+ .mask = 0xfefefefefefefefeULL,
+ .msb = 0x8080808080808080ULL,
+#else
+ .mod = 0x1d1d1d1dULL,
+ .mask = 0xfefefefeULL,
+ .msb = 0x80808080ULL,
+#endif
+};
+
+#define MUL2_SETUP() {}
+
+#define MUL2(a) \
+{ \
+ iv_t _mask; \
+ \
+ _mask = (a).e & scalar_mul2_consts.msb; \
+ _mask = (_mask << 1) - (_mask >> 7); \
+ (a).e = ((a).e << 1) & scalar_mul2_consts.mask; \
+ (a).e = (a).e ^ (_mask & scalar_mul2_consts.mod); \
+}
+
+#define MUL4(a) \
+{ \
+ MUL2(a); \
+ MUL2(a); \
+}
+
+#define MUL(c, a) \
+{ \
+ const uint8_t *mul_lt = vdev_raidz_mul_lt[c]; \
+ switch (ELEM_SIZE) { \
+ case 8: \
+ a.b[7] = mul_lt[a.b[7]]; \
+ a.b[6] = mul_lt[a.b[6]]; \
+ a.b[5] = mul_lt[a.b[5]]; \
+ a.b[4] = mul_lt[a.b[4]]; \
+ /* falls through */ \
+ case 4: \
+ a.b[3] = mul_lt[a.b[3]]; \
+ a.b[2] = mul_lt[a.b[2]]; \
+ a.b[1] = mul_lt[a.b[1]]; \
+ a.b[0] = mul_lt[a.b[0]]; \
+ break; \
+ } \
+}
+
+#define raidz_math_begin() {}
+#define raidz_math_end() {}
+
+#define SYN_STRIDE 1
+
+#define ZERO_DEFINE() v_t d0
+#define ZERO_STRIDE 1
+#define ZERO_D d0
+
+#define COPY_DEFINE() v_t d0
+#define COPY_STRIDE 1
+#define COPY_D d0
+
+#define ADD_DEFINE() v_t d0
+#define ADD_STRIDE 1
+#define ADD_D d0
+
+#define MUL_DEFINE() v_t d0
+#define MUL_STRIDE 1
+#define MUL_D d0
+
+#define GEN_P_STRIDE 1
+#define GEN_P_DEFINE() v_t p0
+#define GEN_P_P p0
+
+#define GEN_PQ_STRIDE 1
+#define GEN_PQ_DEFINE() v_t d0, c0
+#define GEN_PQ_D d0
+#define GEN_PQ_C c0
+
+#define GEN_PQR_STRIDE 1
+#define GEN_PQR_DEFINE() v_t d0, c0
+#define GEN_PQR_D d0
+#define GEN_PQR_C c0
+
+#define SYN_Q_DEFINE() v_t d0, x0
+#define SYN_Q_D d0
+#define SYN_Q_X x0
+
+
+#define SYN_R_DEFINE() v_t d0, x0
+#define SYN_R_D d0
+#define SYN_R_X x0
+
+
+#define SYN_PQ_DEFINE() v_t d0, x0
+#define SYN_PQ_D d0
+#define SYN_PQ_X x0
+
+
+#define REC_PQ_STRIDE 1
+#define REC_PQ_DEFINE() v_t x0, y0, t0
+#define REC_PQ_X x0
+#define REC_PQ_Y y0
+#define REC_PQ_T t0
+
+
+#define SYN_PR_DEFINE() v_t d0, x0
+#define SYN_PR_D d0
+#define SYN_PR_X x0
+
+#define REC_PR_STRIDE 1
+#define REC_PR_DEFINE() v_t x0, y0, t0
+#define REC_PR_X x0
+#define REC_PR_Y y0
+#define REC_PR_T t0
+
+
+#define SYN_QR_DEFINE() v_t d0, x0
+#define SYN_QR_D d0
+#define SYN_QR_X x0
+
+
+#define REC_QR_STRIDE 1
+#define REC_QR_DEFINE() v_t x0, y0, t0
+#define REC_QR_X x0
+#define REC_QR_Y y0
+#define REC_QR_T t0
+
+
+#define SYN_PQR_DEFINE() v_t d0, x0
+#define SYN_PQR_D d0
+#define SYN_PQR_X x0
+
+#define REC_PQR_STRIDE 1
+#define REC_PQR_DEFINE() v_t x0, y0, z0, xs0, ys0
+#define REC_PQR_X x0
+#define REC_PQR_Y y0
+#define REC_PQR_Z z0
+#define REC_PQR_XS xs0
+#define REC_PQR_YS ys0
+
+#include "vdev_raidz_math_impl.h"
+
+DEFINE_GEN_METHODS(scalar);
+DEFINE_REC_METHODS(scalar);
+
+boolean_t
+raidz_will_scalar_work(void)
+{
+ return (B_TRUE); /* always */
+}
+
+const raidz_impl_ops_t vdev_raidz_scalar_impl = {
+ .init = raidz_init_scalar,
+ .fini = NULL,
+ .gen = RAIDZ_GEN_METHODS(scalar),
+ .rec = RAIDZ_REC_METHODS(scalar),
+ .is_supported = &raidz_will_scalar_work,
+ .name = "scalar"
+};
+
+/* Powers of 2 in the RAID-Z Galois field. */
+const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256))) = {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+ 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
+ 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
+ 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
+ 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
+ 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
+ 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
+ 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
+ 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
+ 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
+ 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
+ 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
+ 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
+ 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
+ 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
+ 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
+ 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
+ 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
+ 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
+ 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
+ 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
+ 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
+ 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
+ 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
+ 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
+ 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
+ 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
+ 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
+ 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
+ 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
+ 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
+ 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
+};
+
+/* Logs of 2 in the RAID-Z Galois field. */
+const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256))) = {
+ 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
+ 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
+ 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
+ 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
+ 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
+ 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
+ 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
+ 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
+ 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
+ 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
+ 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
+ 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
+ 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
+ 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
+ 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
+ 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
+ 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
+ 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
+ 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
+ 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
+ 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
+ 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
+ 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
+ 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
+ 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
+ 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
+ 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
+ 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
+ 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
+ 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
+ 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
+ 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
+};