summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/fs/zfs/vdev_raidz.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/fs/zfs/vdev_raidz.c')
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_raidz.c178
1 files changed, 98 insertions, 80 deletions
diff --git a/usr/src/uts/common/fs/zfs/vdev_raidz.c b/usr/src/uts/common/fs/zfs/vdev_raidz.c
index 6502353542..0dcbb863e3 100644
--- a/usr/src/uts/common/fs/zfs/vdev_raidz.c
+++ b/usr/src/uts/common/fs/zfs/vdev_raidz.c
@@ -267,21 +267,17 @@ static void
vdev_raidz_map_free(raidz_map_t *rm)
{
int c;
- size_t size;
for (c = 0; c < rm->rm_firstdatacol; c++) {
abd_free(rm->rm_col[c].rc_abd);
if (rm->rm_col[c].rc_gdata != NULL)
- zio_buf_free(rm->rm_col[c].rc_gdata,
- rm->rm_col[c].rc_size);
+ abd_free(rm->rm_col[c].rc_gdata);
+
}
- size = 0;
- for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+ for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++)
abd_put(rm->rm_col[c].rc_abd);
- size += rm->rm_col[c].rc_size;
- }
if (rm->rm_abd_copy != NULL)
abd_free(rm->rm_abd_copy);
@@ -314,14 +310,14 @@ vdev_raidz_cksum_free(void *arg, size_t ignored)
}
static void
-vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
+vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
{
raidz_map_t *rm = zcr->zcr_cbdata;
size_t c = zcr->zcr_cbinfo;
- size_t x;
+ size_t x, offset;
- const char *good = NULL;
- char *bad;
+ const abd_t *good = NULL;
+ const abd_t *bad = rm->rm_col[c].rc_abd;
if (good_data == NULL) {
zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE);
@@ -336,8 +332,6 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
*/
if (rm->rm_col[0].rc_gdata == NULL) {
abd_t *bad_parity[VDEV_RAIDZ_MAXPARITY];
- char *buf;
- int offset;
/*
* Set up the rm_col[]s to generate the parity for
@@ -346,20 +340,21 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
*/
for (x = 0; x < rm->rm_firstdatacol; x++) {
bad_parity[x] = rm->rm_col[x].rc_abd;
- rm->rm_col[x].rc_gdata =
- zio_buf_alloc(rm->rm_col[x].rc_size);
rm->rm_col[x].rc_abd =
- abd_get_from_buf(rm->rm_col[x].rc_gdata,
+ rm->rm_col[x].rc_gdata =
+ abd_alloc_sametype(rm->rm_col[x].rc_abd,
rm->rm_col[x].rc_size);
}
/* fill in the data columns from good_data */
- buf = (char *)good_data;
+ offset = 0;
for (; x < rm->rm_cols; x++) {
abd_put(rm->rm_col[x].rc_abd);
- rm->rm_col[x].rc_abd = abd_get_from_buf(buf,
- rm->rm_col[x].rc_size);
- buf += rm->rm_col[x].rc_size;
+
+ rm->rm_col[x].rc_abd =
+ abd_get_offset_size((abd_t *)good_data,
+ offset, rm->rm_col[x].rc_size);
+ offset += rm->rm_col[x].rc_size;
}
/*
@@ -368,34 +363,35 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
vdev_raidz_generate_parity(rm);
/* restore everything back to its original state */
- for (x = 0; x < rm->rm_firstdatacol; x++) {
- abd_put(rm->rm_col[x].rc_abd);
+ for (x = 0; x < rm->rm_firstdatacol; x++)
rm->rm_col[x].rc_abd = bad_parity[x];
- }
offset = 0;
for (x = rm->rm_firstdatacol; x < rm->rm_cols; x++) {
abd_put(rm->rm_col[x].rc_abd);
- rm->rm_col[x].rc_abd = abd_get_offset(
- rm->rm_abd_copy, offset);
+ rm->rm_col[x].rc_abd = abd_get_offset_size(
+ rm->rm_abd_copy, offset,
+ rm->rm_col[x].rc_size);
offset += rm->rm_col[x].rc_size;
}
}
ASSERT3P(rm->rm_col[c].rc_gdata, !=, NULL);
- good = rm->rm_col[c].rc_gdata;
+ good = abd_get_offset_size(rm->rm_col[c].rc_gdata, 0,
+ rm->rm_col[c].rc_size);
} else {
/* adjust good_data to point at the start of our column */
- good = good_data;
-
+ offset = 0;
for (x = rm->rm_firstdatacol; x < c; x++)
- good += rm->rm_col[x].rc_size;
+ offset += rm->rm_col[x].rc_size;
+
+ good = abd_get_offset_size((abd_t *)good_data, offset,
+ rm->rm_col[c].rc_size);
}
- bad = abd_borrow_buf_copy(rm->rm_col[c].rc_abd, rm->rm_col[c].rc_size);
/* we drop the ereport if it ends up that the data was good */
zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE);
- abd_return_buf(rm->rm_col[c].rc_abd, bad, rm->rm_col[c].rc_size);
+ abd_put((abd_t *)good);
}
/*
@@ -438,14 +434,16 @@ vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg)
for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++)
size += rm->rm_col[c].rc_size;
- rm->rm_abd_copy =
- abd_alloc_sametype(rm->rm_col[rm->rm_firstdatacol].rc_abd, size);
+ rm->rm_abd_copy = abd_alloc_for_io(size, B_FALSE);
for (offset = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
raidz_col_t *col = &rm->rm_col[c];
- abd_t *tmp = abd_get_offset(rm->rm_abd_copy, offset);
+ abd_t *tmp = abd_get_offset_size(rm->rm_abd_copy, offset,
+ col->rc_size);
- abd_copy(tmp, col->rc_abd, col->rc_size);
+ ASSERT3S(tmp->abd_size, >=, col->rc_size);
+ ASSERT3S(col->rc_abd->abd_size, >=, col->rc_size);
+ abd_copy_off(tmp, col->rc_abd, 0, 0, col->rc_size);
abd_put(col->rc_abd);
col->rc_abd = tmp;
@@ -562,13 +560,15 @@ vdev_raidz_map_alloc(abd_t *abd, uint64_t size, uint64_t offset,
for (c = 0; c < rm->rm_firstdatacol; c++)
rm->rm_col[c].rc_abd =
- abd_alloc_linear(rm->rm_col[c].rc_size, B_TRUE);
+ abd_alloc_linear(rm->rm_col[c].rc_size, B_FALSE);
- rm->rm_col[c].rc_abd = abd_get_offset(abd, 0);
+ rm->rm_col[c].rc_abd = abd_get_offset_size(abd, 0,
+ rm->rm_col[c].rc_size);
off = rm->rm_col[c].rc_size;
for (c = c + 1; c < acols; c++) {
- rm->rm_col[c].rc_abd = abd_get_offset(abd, off);
+ rm->rm_col[c].rc_abd = abd_get_offset_size(abd, off,
+ rm->rm_col[c].rc_size);
off += rm->rm_col[c].rc_size;
}
@@ -683,7 +683,8 @@ vdev_raidz_generate_parity_p(raidz_map_t *rm)
p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd);
if (c == rm->rm_firstdatacol) {
- abd_copy_to_buf(p, src, rm->rm_col[c].rc_size);
+ ASSERT3U(src->abd_size, >=, rm->rm_col[c].rc_size);
+ abd_copy_to_buf_off(p, src, 0, rm->rm_col[c].rc_size);
} else {
struct pqr_struct pqr = { p, NULL, NULL };
(void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size,
@@ -711,20 +712,22 @@ vdev_raidz_generate_parity_pq(raidz_map_t *rm)
ccnt = rm->rm_col[c].rc_size / sizeof (p[0]);
if (c == rm->rm_firstdatacol) {
- abd_copy_to_buf(p, src, rm->rm_col[c].rc_size);
- (void) memcpy(q, p, rm->rm_col[c].rc_size);
- } else {
- struct pqr_struct pqr = { p, q, NULL };
- (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size,
- vdev_raidz_pq_func, &pqr);
- }
+ ASSERT(ccnt == pcnt || ccnt == 0);
- if (c == rm->rm_firstdatacol) {
+ abd_copy_to_buf_off(p, src, 0, rm->rm_col[c].rc_size);
+ (void) memcpy(q, p, rm->rm_col[c].rc_size);
for (i = ccnt; i < pcnt; i++) {
p[i] = 0;
q[i] = 0;
}
} else {
+ struct pqr_struct pqr = { p, q, NULL };
+
+ ASSERT(ccnt <= pcnt);
+
+ (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size,
+ vdev_raidz_pq_func, &pqr);
+
/*
* Treat short columns as though they are full of 0s.
* Note that there's therefore nothing needed for P.
@@ -758,22 +761,24 @@ vdev_raidz_generate_parity_pqr(raidz_map_t *rm)
ccnt = rm->rm_col[c].rc_size / sizeof (p[0]);
if (c == rm->rm_firstdatacol) {
- abd_copy_to_buf(p, src, rm->rm_col[c].rc_size);
+ ASSERT3S(src->abd_size, >=, rm->rm_col[c].rc_size);
+ ASSERT(ccnt == pcnt || ccnt == 0);
+ abd_copy_to_buf_off(p, src, 0, rm->rm_col[c].rc_size);
(void) memcpy(q, p, rm->rm_col[c].rc_size);
(void) memcpy(r, p, rm->rm_col[c].rc_size);
- } else {
- struct pqr_struct pqr = { p, q, r };
- (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size,
- vdev_raidz_pqr_func, &pqr);
- }
- if (c == rm->rm_firstdatacol) {
for (i = ccnt; i < pcnt; i++) {
p[i] = 0;
q[i] = 0;
r[i] = 0;
}
} else {
+ struct pqr_struct pqr = { p, q, r };
+
+ ASSERT(ccnt <= pcnt);
+ (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size,
+ vdev_raidz_pqr_func, &pqr);
+
/*
* Treat short columns as though they are full of 0s.
* Note that there's therefore nothing needed for P.
@@ -941,7 +946,9 @@ vdev_raidz_reconstruct_p(raidz_map_t *rm, int *tgts, int ntgts)
src = rm->rm_col[VDEV_RAIDZ_P].rc_abd;
dst = rm->rm_col[x].rc_abd;
- abd_copy(dst, src, rm->rm_col[x].rc_size);
+ ASSERT3S(dst->abd_size, >=, rm->rm_col[x].rc_size);
+ ASSERT3S(src->abd_size, >=, rm->rm_col[x].rc_size);
+ abd_copy_off(dst, src, 0, 0, rm->rm_col[x].rc_size);
for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
uint64_t size = MIN(rm->rm_col[x].rc_size,
@@ -979,14 +986,19 @@ vdev_raidz_reconstruct_q(raidz_map_t *rm, int *tgts, int ntgts)
dst = rm->rm_col[x].rc_abd;
if (c == rm->rm_firstdatacol) {
- abd_copy(dst, src, size);
+ if (dst != src) {
+ ASSERT3S(dst->abd_size, >=, size);
+ ASSERT3S(src->abd_size, >=, size);
+ abd_copy_off(dst, src, 0, 0, size);
+ }
if (rm->rm_col[x].rc_size > size)
abd_zero_off(dst, size,
rm->rm_col[x].rc_size - size);
} else {
ASSERT3U(size, <=, rm->rm_col[x].rc_size);
- (void) abd_iterate_func2(dst, src, 0, 0, size,
- vdev_raidz_reconst_q_pre_func, NULL);
+ if (src != dst)
+ (void) abd_iterate_func2(dst, src, 0, 0, size,
+ vdev_raidz_reconst_q_pre_func, NULL);
(void) abd_iterate_func(dst,
size, rm->rm_col[x].rc_size - size,
vdev_raidz_reconst_q_pre_tail_func, NULL);
@@ -1475,7 +1487,9 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts)
bufs[c] = col->rc_abd;
col->rc_abd = abd_alloc_linear(col->rc_size, B_TRUE);
- abd_copy(col->rc_abd, bufs[c], col->rc_size);
+ ASSERT3S(col->rc_abd->abd_size, >=, col->rc_size);
+ ASSERT3S(bufs[c]->abd_size, >=, col->rc_size);
+ abd_copy_off(col->rc_abd, bufs[c], 0, 0, col->rc_size);
}
}
@@ -1571,7 +1585,9 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts)
for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
raidz_col_t *col = &rm->rm_col[c];
- abd_copy(bufs[c], col->rc_abd, col->rc_size);
+ ASSERT3S(bufs[c]->abd_size, >=, col->rc_size);
+ ASSERT3S(col->rc_abd->abd_size, >=, col->rc_size);
+ abd_copy_off(bufs[c], col->rc_abd, 0, 0, col->rc_size);
abd_free(col->rc_abd);
col->rc_abd = bufs[c];
}
@@ -2041,9 +2057,8 @@ vdev_raidz_io_start(zio_t *zio)
* Report a checksum error for a child of a RAID-Z device.
*/
static void
-raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data)
+raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
{
- void *buf;
vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx];
if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
@@ -2057,11 +2072,9 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data)
zbc.zbc_has_cksum = 0;
zbc.zbc_injected = rm->rm_ecksuminjected;
- buf = abd_borrow_buf_copy(rc->rc_abd, rc->rc_size);
- zfs_ereport_post_checksum(zio->io_spa, vd, zio,
- rc->rc_offset, rc->rc_size, buf, bad_data,
- &zbc);
- abd_return_buf(rc->rc_abd, buf, rc->rc_size);
+ zfs_ereport_post_checksum(zio->io_spa, vd,
+ &zio->io_bookmark, zio, rc->rc_offset, rc->rc_size,
+ rc->rc_abd, bad_data, &zbc);
}
}
@@ -2091,7 +2104,7 @@ raidz_checksum_verify(zio_t *zio)
static int
raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
{
- void *orig[VDEV_RAIDZ_MAXPARITY];
+ abd_t *orig[VDEV_RAIDZ_MAXPARITY];
int c, ret = 0;
raidz_col_t *rc;
@@ -2106,8 +2119,8 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
rc = &rm->rm_col[c];
if (!rc->rc_tried || rc->rc_error != 0)
continue;
- orig[c] = zio_buf_alloc(rc->rc_size);
- abd_copy_to_buf(orig[c], rc->rc_abd, rc->rc_size);
+ orig[c] = abd_alloc_sametype(rc->rc_abd, rc->rc_size);
+ abd_copy(orig[c], rc->rc_abd, rc->rc_size);
}
vdev_raidz_generate_parity(rm);
@@ -2116,12 +2129,12 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
rc = &rm->rm_col[c];
if (!rc->rc_tried || rc->rc_error != 0)
continue;
- if (abd_cmp_buf(rc->rc_abd, orig[c], rc->rc_size) != 0) {
+ if (abd_cmp(orig[c], rc->rc_abd, rc->rc_abd->abd_size) != 0) {
raidz_checksum_error(zio, rc, orig[c]);
rc->rc_error = SET_ERROR(ECKSUM);
ret++;
}
- zio_buf_free(orig[c], rc->rc_size);
+ abd_free(orig[c]);
}
return (ret);
@@ -2156,7 +2169,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
{
raidz_map_t *rm = zio->io_vsd;
raidz_col_t *rc;
- void *orig[VDEV_RAIDZ_MAXPARITY];
+ abd_t *orig[VDEV_RAIDZ_MAXPARITY];
int tstore[VDEV_RAIDZ_MAXPARITY + 2];
int *tgts = &tstore[1];
int current, next, i, c, n;
@@ -2205,7 +2218,8 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
ASSERT(orig[i] != NULL);
}
- orig[n - 1] = zio_buf_alloc(rm->rm_col[0].rc_size);
+ orig[n - 1] = abd_alloc_sametype(rm->rm_col[0].rc_abd,
+ rm->rm_col[0].rc_size);
current = 0;
next = tgts[current];
@@ -2224,7 +2238,9 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
ASSERT3S(c, >=, 0);
ASSERT3S(c, <, rm->rm_cols);
rc = &rm->rm_col[c];
- abd_copy_to_buf(orig[i], rc->rc_abd,
+ ASSERT3S(orig[i]->abd_size, >=, rc->rc_size);
+ ASSERT3S(rc->rc_abd->abd_size, >=, rc->rc_size);
+ abd_copy_off(orig[i], rc->rc_abd, 0, 0,
rc->rc_size);
}
@@ -2256,7 +2272,9 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
for (i = 0; i < n; i++) {
c = tgts[i];
rc = &rm->rm_col[c];
- abd_copy_from_buf(rc->rc_abd, orig[i],
+ ASSERT3S(rc->rc_abd->abd_size, >=, rc->rc_size);
+ ASSERT3S(orig[i]->abd_size, >=, rc->rc_size);
+ abd_copy_off(rc->rc_abd, orig[i], 0, 0,
rc->rc_size);
}
@@ -2294,9 +2312,8 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
}
n--;
done:
- for (i = 0; i < n; i++) {
- zio_buf_free(orig[i], rm->rm_col[0].rc_size);
- }
+ for (i = 0; i < n; i++)
+ abd_free(orig[i]);
return (ret);
}
@@ -2555,7 +2572,8 @@ vdev_raidz_io_done(zio_t *zio)
zfs_ereport_start_checksum(
zio->io_spa,
vd->vdev_child[rc->rc_devidx],
- zio, rc->rc_offset, rc->rc_size,
+ &zio->io_bookmark, zio,
+ rc->rc_offset, rc->rc_size,
(void *)(uintptr_t)c, &zbc);
}
}