summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason King <jason.king@joyent.com>2019-12-16 22:51:44 +0000
committerJason King <jason.king@joyent.com>2020-04-09 18:44:25 +0000
commit8039bb9bfdd139f72275fafaa7280ee016d8e548 (patch)
tree60b43d7032f8efc61eec24fabbfff92bbbf66361
parentdaeb6daf8c6eff1b8397f306c722ef02afdff012 (diff)
downloadillumos-joyent-8039bb9bfdd139f72275fafaa7280ee016d8e548.tar.gz
vioblk discard support (wip)
-rw-r--r--usr/src/uts/common/io/vioblk/vioblk.c114
-rw-r--r--usr/src/uts/common/io/vioblk/vioblk.h64
-rw-r--r--usr/src/uts/common/os/dkioc_free_util.c290
-rw-r--r--usr/src/uts/common/sys/dkioc_free_util.h13
4 files changed, 468 insertions, 13 deletions
diff --git a/usr/src/uts/common/io/vioblk/vioblk.c b/usr/src/uts/common/io/vioblk/vioblk.c
index b9459e1d9e..fd5b4541b8 100644
--- a/usr/src/uts/common/io/vioblk/vioblk.c
+++ b/usr/src/uts/common/io/vioblk/vioblk.c
@@ -87,6 +87,7 @@
#include <sys/containerof.h>
#include <sys/ctype.h>
#include <sys/sysmacros.h>
+#include <sys/dkioc_free_util.h>
#include "virtio.h"
#include "vioblk.h"
@@ -612,6 +613,105 @@ vioblk_bd_devid(void *arg, dev_info_t *dip, ddi_devid_t *devid)
devid));
}
+struct vioblk_freesp_arg {
+ vioblk_t *vfpa_vioblk;
+ bd_xfer_t *vfpa_xfer;
+};
+
+static int
+vioblk_free_exts(const dkioc_free_list_ext_t *exts, size_t n_exts,
+ boolean_t last, void *arg)
+{
+ struct vioblk_freesp_arg *args = arg;
+ vioblk_t *vib = args->vfpa_vioblk;
+ virtio_dma_t *dma = NULL;
+ virtio_chain_t *vic = NULL;
+ vioblk_req_t *vbr = NULL;
+ struct vioblk_discard_write_zeroes *wzp = NULL;
+ size_t i;
+ int r = 0;
+
+ dma = virtio_dma_alloc(vib->vib_virtio, n_exts * sizeof (*wzp),
+ &vioblk_dma_attr, DDI_DMA_CONSISTENT | DDI_DMA_WRITE, KM_SLEEP);
+ if (dma == NULL)
+ return (ENOMEM);
+
+ wzp = virtio_dma_va(dma, 0);
+
+ for (i = 0; i < n_exts; i++, exts++, wzp++) {
+ struct vioblk_discard_write_zeroes vdwz = {
+ .vdwz_sector = exts->dfle_start,
+ .vdwz_num_sectors = exts->dfle_length,
+ };
+
+ bcopy(&vdwz, wzp, sizeof (*wzp));
+ }
+
+ mutex_enter(&vib->vib_mutex);
+
+ vic = vioblk_common_start(vib, VIRTIO_BLK_T_DISCARD, 0, B_FALSE);
+ if (vic == NULL) {
+ mutex_exit(&vib->vib_mutex);
+ virtio_dma_free(dma);
+ return (ENOMEM);
+ }
+
+ vbr = virtio_chain_data(vic);
+ if (virtio_chain_append(vic,
+ virtio_dma_cookie_pa(dma, 0),
+ virtio_dma_cookie_size(dma, 0),
+ VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
+ vioblk_req_free(vib, vbr);
+ virtio_chain_free(vic);
+ mutex_exit(&vib->vib_mutex);
+ return (ENOMEM);
+ }
+
+ if (last) {
+ /*
+ * We attach xfer to the final vioblk request we submit.
+ * This will allow the vioblk_complete() to handle any
+ * notifications (e.g. a synchronous request) and
+ * dispose of xfer afterwards.
+ */
+ vbr->vbr_xfer = args->vfpa_xfer;
+ args->vfpa_xfer = NULL;
+ }
+
+ r = vioblk_common_submit(vib, vic);
+ mutex_exit(&vib->vib_mutex);
+ return (r);
+}
+
+static int
+vioblk_bd_free_space(void *arg, bd_xfer_t *xfer)
+{
+ vioblk_t *vib = arg;
+ dkioc_free_align_t align = {
+ .dfa_bsize = DEV_BSIZE,
+ .dfa_max_ext = vib->vib_max_discard_seg,
+ .dfa_max_blocks = vib->vib_max_discard_sectors,
+ .dfa_align = vib->vib_discard_sector_align
+ };
+ struct vioblk_freesp_arg sp_arg = {
+ .vfpa_vioblk = vib,
+ .vfpa_xfer = xfer
+ };
+ int r = dfl_iter(xfer->x_dfl, &align, vioblk_free_exts, &sp_arg,
+ KM_SLEEP, 0);
+
+ /*
+ * If we didn't include xfer as part of the final request, we
+ * need to clean it up now.
+ */
+ if (sp_arg.vfpa_xfer != NULL) {
+ VERIFY3S(r, !=, 0);
+ bd_xfer_done(sp_arg.vfpa_xfer, r);
+ }
+
+ return (r);
+}
+
/*
* As the device completes processing of a request, it returns the chain for
* that request to our I/O queue. This routine is called in two contexts:
@@ -804,6 +904,15 @@ vioblk_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
}
}
+ if (virtio_feature_present(vio, VIRTIO_BLK_F_DISCARD)) {
+ vib->vib_max_discard_sectors = virtio_dev_get32(vio,
+ VIRTIO_BLK_CONFIG_MAX_DISCARD_SECT);
+ vib->vib_max_discard_seg = virtio_dev_get32(vio,
+ VIRTIO_BLK_CONFIG_MAX_DISCARD_SEG);
+ vib->vib_discard_sector_align = virtio_dev_get32(vio,
+ VIRTIO_BLK_CONFIG_DISCARD_ALIGN);
+ }
+
/*
* When allocating the request queue, we include two additional
* descriptors (beyond those required for request data) to account for
@@ -933,11 +1042,14 @@ vioblk_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
.o_sync_cache = vioblk_bd_flush,
.o_read = vioblk_bd_read,
.o_write = vioblk_bd_write,
- .o_free_space = NULL,
+ .o_free_space = vioblk_bd_free_space,
};
if (!virtio_feature_present(vio, VIRTIO_BLK_F_FLUSH)) {
vioblk_bd_ops.o_sync_cache = NULL;
}
+ if (!virtio_feature_present(vio, VIRTIO_BLK_F_DISCARD)) {
+ vioblk_bd_ops.o_free_space = NULL;
+ }
vib->vib_bd_h = bd_alloc_handle(vib, &vioblk_bd_ops,
&vib->vib_bd_dma_attr, KM_SLEEP);
diff --git a/usr/src/uts/common/io/vioblk/vioblk.h b/usr/src/uts/common/io/vioblk/vioblk.h
index e08fc31e8f..24303c193b 100644
--- a/usr/src/uts/common/io/vioblk/vioblk.h
+++ b/usr/src/uts/common/io/vioblk/vioblk.h
@@ -32,17 +32,26 @@ extern "C" {
* These are offsets into the device-specific configuration space available
* through the virtio_dev_*() family of functions.
*/
-#define VIRTIO_BLK_CONFIG_CAPACITY 0x00 /* 64 R */
-#define VIRTIO_BLK_CONFIG_SIZE_MAX 0x08 /* 32 R */
-#define VIRTIO_BLK_CONFIG_SEG_MAX 0x0C /* 32 R */
-#define VIRTIO_BLK_CONFIG_GEOMETRY_C 0x10 /* 16 R */
-#define VIRTIO_BLK_CONFIG_GEOMETRY_H 0x12 /* 8 R */
-#define VIRTIO_BLK_CONFIG_GEOMETRY_S 0x13 /* 8 R */
-#define VIRTIO_BLK_CONFIG_BLK_SIZE 0x14 /* 32 R */
-#define VIRTIO_BLK_CONFIG_TOPO_PBEXP 0x18 /* 8 R */
-#define VIRTIO_BLK_CONFIG_TOPO_ALIGN 0x19 /* 8 R */
-#define VIRTIO_BLK_CONFIG_TOPO_MIN_SZ 0x1A /* 16 R */
-#define VIRTIO_BLK_CONFIG_TOPO_OPT_SZ 0x1C /* 32 R */
+#define VIRTIO_BLK_CONFIG_CAPACITY 0x00 /* 64 R */
+#define VIRTIO_BLK_CONFIG_SIZE_MAX 0x08 /* 32 R */
+#define VIRTIO_BLK_CONFIG_SEG_MAX 0x0C /* 32 R */
+#define VIRTIO_BLK_CONFIG_GEOMETRY_C 0x10 /* 16 R */
+#define VIRTIO_BLK_CONFIG_GEOMETRY_H 0x12 /* 8 R */
+#define VIRTIO_BLK_CONFIG_GEOMETRY_S 0x13 /* 8 R */
+#define VIRTIO_BLK_CONFIG_BLK_SIZE 0x14 /* 32 R */
+#define VIRTIO_BLK_CONFIG_TOPO_PBEXP 0x18 /* 8 R */
+#define VIRTIO_BLK_CONFIG_TOPO_ALIGN 0x19 /* 8 R */
+#define VIRTIO_BLK_CONFIG_TOPO_MIN_SZ 0x1A /* 16 R */
+#define VIRTIO_BLK_CONFIG_TOPO_OPT_SZ 0x1C /* 32 R */
+#define VIRTIO_BLK_CONFIG_WRITEBACK 0x20 /* 8 R */
+ /* unused 0x21 8 R */
+#define VIRTIO_BLK_CONFIG_NUM_QUEUES 0x22 /* 16 R */
+#define VIRTIO_BLK_CONFIG_MAX_DISCARD_SECT 0x24 /* 32 R */
+#define VIRTIO_BLK_CONFIG_MAX_DISCARD_SEG 0x28 /* 32 R */
+#define VIRTIO_BLK_CONFIG_DISCARD_ALIGN 0x2C /* 32 R */
+#define VIRTIO_BLK_CONFIG_MAX_WRITE_ZERO_SECT 0x30 /* 32 R */
+#define VIRTIO_BLK_CONFIG_MAX_WRITE_ZERO_SEG 0x34 /* 32 R */
+#define VIRTIO_BLK_CONFIG_WRITE_ZERO_UNMAP 0x38 /* 8 R */
/*
* VIRTIO BLOCK VIRTQUEUES
@@ -64,6 +73,10 @@ extern "C" {
#define VIRTIO_BLK_F_SCSI (1ULL << 7)
#define VIRTIO_BLK_F_FLUSH (1ULL << 9)
#define VIRTIO_BLK_F_TOPOLOGY (1ULL << 10)
+#define VIRTIO_BLK_F_CONFIG_WCE (1ULL << 11)
+#define VIRTIO_BLK_F_MQ (1ULL << 12)
+#define VIRTIO_BLK_F_DISCARD (1ULL << 13)
+#define VIRTIO_BLK_F_WRITE_ZEROES (1ULL << 14)
/*
* These features are supported by the driver and we will request them from the
@@ -74,7 +87,8 @@ extern "C" {
VIRTIO_BLK_F_FLUSH | \
VIRTIO_BLK_F_TOPOLOGY | \
VIRTIO_BLK_F_SEG_MAX | \
- VIRTIO_BLK_F_SIZE_MAX)
+ VIRTIO_BLK_F_SIZE_MAX | \
+ VIRTIO_BLK_F_DISCARD)
/*
* VIRTIO BLOCK REQUEST HEADER
@@ -102,9 +116,31 @@ struct vioblk_req_hdr {
#define VIRTIO_BLK_T_FLUSH 4
#define VIRTIO_BLK_T_FLUSH_OUT 5
#define VIRTIO_BLK_T_GET_ID 8
+#define VIRTIO_BLK_T_DISCARD 11
+#define VIRTIO_BLK_T_WRITE_ZEROES 13
#define VIRTIO_BLK_T_BARRIER 0x80000000
/*
+ * VIRTIO BLOCK DISCARD/WRITE ZEROS DATA
+ *
+ * For hosts that support the DISCARD or WRITE ZEROES features, instead of
+ * data, the vioblk_discard_write_zeros struct is used as the 'data' for
+ * the request.
+ */
+struct vioblk_discard_write_zeroes {
+ uint64_t vdwz_sector;
+ uint32_t vdwz_num_sectors;
+ uint32_t vdwz_flags;
+} __packed;
+
+/*
+ * vdwz_flags values
+ */
+
+/* For a WRITE ZEROES request, also unmap the block */
+#define VIRTIO_BLK_WRITE_ZEROS_UNMAP (1U << 0)
+
+/*
* The GET_ID command type does not appear in the specification, but
* implementations in the wild use a 20 byte buffer into which the device will
* write an ASCII string. The string should not be assumed to be
@@ -200,6 +236,10 @@ typedef struct vioblk {
uint_t vib_seg_max;
uint_t vib_seg_size_max;
+ uint_t vib_max_discard_sectors; /* WO */
+ uint_t vib_max_discard_seg; /* WO */
+ uint_t vib_discard_sector_align; /* WO */
+
boolean_t vib_devid_fetched;
char vib_devid[VIRTIO_BLK_ID_BYTES + 1];
uint8_t vib_rawid[VIRTIO_BLK_ID_BYTES];
diff --git a/usr/src/uts/common/os/dkioc_free_util.c b/usr/src/uts/common/os/dkioc_free_util.c
index 85470f7e28..2dfb4289d4 100644
--- a/usr/src/uts/common/os/dkioc_free_util.c
+++ b/usr/src/uts/common/os/dkioc_free_util.c
@@ -11,6 +11,7 @@
/*
* Copyright 2017 Nexenta Inc. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
*/
/* needed when building libzpool */
@@ -25,6 +26,23 @@
#include <sys/file.h>
#include <sys/sdt.h>
+struct ext_arg {
+ uint64_t ea_ext_cnt;
+ dfl_iter_fn_t ea_fn;
+ void *ea_arg;
+ dkioc_free_list_ext_t *ea_exts;
+};
+
+typedef int (*ext_iter_fn_t)(const dkioc_free_list_ext_t *,
+ boolean_t, void *);
+
+static int ext_iter(const dkioc_free_list_t *, const dkioc_free_align_t *,
+ uint_t, ext_iter_fn_t, void *);
+static int ext_xlate(const dkioc_free_list_t *, const dkioc_free_list_ext_t *,
+ const dkioc_free_align_t *, uint_t, uint64_t *, uint64_t *);
+static int count_exts(const dkioc_free_list_ext_t *, boolean_t, void *);
+static int process_exts(const dkioc_free_list_ext_t *, boolean_t, void *);
+
/*
* Copy-in convenience function for variable-length dkioc_free_list_t
* structures. The pointer to be copied from is in `arg' (may be a pointer
@@ -78,3 +96,275 @@ dfl_free(dkioc_free_list_t *dfl)
{
kmem_free(dfl, DFL_SZ(dfl->dfl_num_exts));
}
+
+/*
+ * Convenience function to iterate through the array of extents in dfl while
+ * respecting segmentation and alignment of the extents.
+ *
+ * Some devices that implement DKIOCFREE (e.g. nvme and vioblk) have limits
+ * on either the number of extents that can be submitted in a single request,
+ * or the total number of blocks that can be submitted in a single request.
+ * In addition, devices may have alignment requirements on the starting
+ * address stricter than the device block size.
+ *
+ * Since there is currently no way for callers of DKIOCFREE to discover
+ * any alignment or segmentation requirements, the driver itself may choose
+ * to adjust the actual extent start and length that is freed (never freeing
+ * outside the original unmodified extent boundaries), split extents into
+ * multiple smaller extents, or split a single request into multiple requests
+ * to the underlying hardware. dfl_iter() frees the driver from having to
+ * deal with such complexity/tedium.
+ *
+ * The original request is passed in dfl and the alignment requirements are
+ * given in dkfa. dfl_iter() will do the necessary adjustments and then
+ * call func with an array of extents, number of extents, as well as a flag
+ * that is set upon the last invocation of func for the original request, as
+ * well as the void * arg passed to dfl_iter().
+ *
+ * func should return 0 on success or an error value. An error may result
+ * in partial completion of the request, sorry.
+ *
+ * Currently no flags are defined, and should always be zero.
+ */
+int
+dfl_iter(const dkioc_free_list_t *dfl, const dkioc_free_align_t *dfa,
+ dfl_iter_fn_t func, void *arg, int kmflag, uint32_t dfl_flag)
+{
+ dkioc_free_list_ext_t *exts;
+ uint64_t n_exts = 0;
+ struct ext_arg earg = { 0 };
+ uint_t bshift;
+ int r = 0;
+
+ if (dfl_flag != 0)
+ return (SET_ERROR(EINVAL));
+
+ /* Block size must be at least 1 and a power of two */
+ if (dfa->dfa_bsize == 0 || !ISP2(dfa->dba_bsize))
+ return (SET_ERROR(EINVAL));
+
+ /* Offset alignment must also be at least 1 and a power of two */
+ if (dfa->dfa_align == 0 || !ISP2(dfa->dfa_align))
+ return (SET_ERROR(EINVAL));
+
+ /* The offset alignment must be at least as large as the block size */
+ if (dfa->dfa_align < dfa->dfa_bsize)
+ return (SET_ERROR(EINVAL));
+
+ /* Since dfa_bsize != 0, ddi_ffsll() _must_ return a value > 1 */
+ bshift = ddi_ffsll((long long)dfa->dfa_bsize) - 1;
+
+ /*
+ * If a limit on the total number of blocks is given, it must be
+ * greater than the offset alignment. E.g. if the block size is 512
+ * bytes, the offset alignment is 4096 (8 blocks), the device must
+ * allow extent sizes at least 8 blocks long (otherwise it is not
+ * possible to free the entire device).
+ */
+ if (dfa->dfa_max_blocks > 0 &&
+ (dfa->dfa_max_blocks >> bshift) < dfa->dfa_align)
+ return (SET_ERROR(EINVAL));
+
+ /*
+ * Determine the total number of extents needed. Due to alignment
+ * and segmentation requirements, this may be different than
+ * the initial number of segments.
+ */
+ r = ext_iter(dfl, dfa, bshift, count_exts, &n_exts);
+ if (r != 0)
+ return (r);
+
+ /*
+ * It's possible that some extents do not conform to the alignment
+ * requirements, nor do they have a conforming subset. For example,
+ * with a minimum alignment of 8 blocks, an extent starting at
+ * offset 2 and a length of 5 is such a case. Since there is no way
+ * to report partial results, such extents are silently skipped.
+ * It is then possible that a request could consist of nothing but
+ * ineligible extents, and so such a request is also silently
+ * ignored.
+ */
+ if (n_exts == 0)
+ return (0);
+
+ n_exts = earg.ea_ext_cnt;
+ exts = kmem_zalloc(n_exts * sizeof (*exts), kmflag);
+ if (exts == NULL)
+ return (SET_ERROR(EOVERFLOW));
+
+ earg.ea_ext_cnt = n_exts;
+ earg.ea_fn = func;
+ earg.ea_arg = arg;
+ earg.ea_exts = exts;
+
+ /*
+ * Run through all the extents, calling func as the limits for
+ * each request are reached. The final request remains queued
+ * when ext_iter() returns.
+ */
+ r = ext_iter(dfl, dfa, bshift, process_exts, &earg);
+ if (r != 0)
+ goto done;
+
+ /* Process the final request */
+ r = process_exts(NULL, B_TRUE, &earg);
+
+done:
+ kmem_free(exts, n_exts * sizeof (*exts));
+ return (r);
+}
+
+static int
+count_exts(const dkioc_free_list_ext_t *ext, boolean_t newreq __unused,
+ void *arg)
+{
+ size_t *np = arg;
+
+ (*np)++;
+ return (0);
+}
+
+static int
+process_exts(const dkioc_free_list_ext_t *ext, boolean_t newreq, void *arg)
+{
+ struct ext_arg *args = arg;
+
+ if (newreq && args->ea_ext_cnt > 0) {
+ /*
+ * A new request, and are extents from the previous request
+ * ready to dispatch.
+ */
+ int r;
+ boolean_t last = (ext == NULL) ? B_TRUE : B_FALSE;
+
+ r = args->ea_fn(args->ea_exts, args->ea_ext_cnt, last,
+ args->ea_arg);
+
+ if (r != 0)
+ return (r);
+
+ args->ea_exts += args->ea_ext_cnt;
+ args->ea_ext_cnt = 0;
+
+ /*
+ * After the last request, we are called with a NULL ext
+ * and a new request to process the final request.
+ */
+ if (ext == NULL)
+ return (0);
+ }
+
+ args->ea_exts[args->ea_ext_cnt++] = *ext;
+ return (0);
+}
+
+/*
+ * Translate the byte offset and lengths in ext into block offsets and
+ * lengths, with the offset aligned per dfla.
+ */
+static int
+ext_xlate(const dkioc_free_list_t *dfl, const dkioc_free_list_ext_t *ext,
+ const dkioc_free_align_t *dfa, uint_t bshift, uint64_t *startp,
+ uint64_t *lengthp)
+{
+ uint64_t start = dfl->dfl_offset + ext->dfle_start;
+ uint64_t end = start + ext->dfle_length;
+
+ if (start < dfl->dfl_offset || start < ext->dfle_start)
+ return (SET_ERROR(EOVERFLOW));
+ if (end < start || end < ext->dfle_length)
+ return (SET_ERROR(EOVERFLOW));
+
+ start = P2ROUNDUP(start, dfa->dfa_align) >> bshift;
+ end = P2ALIGN(end, dfa->dfa_bsize) >> bshift;
+
+ *startp = start;
+ *lengthp = (end > start) ? end - start : 0;
+ return (0);
+}
+
+/*
+ * Iterate through the extents in dfl. fn is called for each adjusted extent
+ * (adjusting offsets and lengths to conform to the alignment requirements)
+ * and one input extent may result in 0, 1, or multiple calls to fn as a
+ * result.
+ */
+static int
+ext_iter(const dkioc_free_list_t *dfl, const dkioc_free_align_t *dfa,
+ uint_t bshift, ext_iter_fn_t fn, void *arg)
+{
+ const dkioc_free_list_ext_t *ext;
+ uint64_t n_exts = 0;
+ uint64_t n_blk = 0;
+ size_t i;
+ boolean_t newreq = B_TRUE;
+
+ for (i = 0, ext = dfl->dfl_exts; i < dfl->dfl_num_exts; i++, ext++) {
+ uint64_t start, length;
+ int r;
+
+ r = ext_xlate(dfl, ext, dfa, bshift, &start, &length);
+ if (r != 0)
+ return (r);
+
+ while (length > 0) {
+ dkioc_free_list_ext_t blk_ext = {
+ .dfle_start = start,
+ .dfle_length = length
+ };
+
+ if (dfa->dfa_max_ext > 0 &&
+ n_exts + 1 > dfa->dfa_max_ext) {
+ /*
+ * Reached the max # of extents, start a new
+ * request.
+ */
+ newreq = B_TRUE;
+ n_exts = 0;
+ n_blk = 0;
+ continue;
+ }
+
+ if (dfa->dfa_max_blocks > 0 &&
+ n_blk + length > dfa->dfa_max_blocks) {
+ /*
+ * This extent puts us over the max # of
+ * blocks in a request. If this isn't a
+ * new request, start a new request,
+ */
+ if (!newreq) {
+ newreq = B_TRUE;
+ n_exts = 0;
+ n_blk = 0;
+ continue;
+ }
+
+ /*
+ * A new request, and the extent length is
+ * larger than our max. Reduce the length to
+ * the largest multiple of dfa_align
+ * equal to or less than dfa_max_blocks
+ * so the next starting address has the
+ * correct alignment.
+ */
+ blk_ext.dfle_length =
+ P2ALIGN(dfa->dfa_max_blocks,
+ dfa->dfa_align >> bshift);
+ }
+
+ r = fn(&blk_ext, newreq, arg);
+ if (r != 0)
+ return (r);
+
+ newreq = B_FALSE;
+
+ n_exts++;
+ n_blk += blk_ext.dfle_length;
+
+ length -= blk_ext.dfle_length;
+ start += blk_ext.dfle_length;
+ }
+ }
+
+ return (0);
+}
diff --git a/usr/src/uts/common/sys/dkioc_free_util.h b/usr/src/uts/common/sys/dkioc_free_util.h
index 9e83ab3bff..42b16cd152 100644
--- a/usr/src/uts/common/sys/dkioc_free_util.h
+++ b/usr/src/uts/common/sys/dkioc_free_util.h
@@ -11,6 +11,7 @@
/*
* Copyright 2017 Nexenta Inc. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _SYS_DKIOC_FREE_UTIL_H
@@ -24,8 +25,20 @@ extern "C" {
#define DFL_COPYIN_MAX_EXTS (1024 * 1024)
+typedef struct dkioc_free_align {
+ size_t dfa_bsize; /* device block size in bytes */
+ size_t dfa_max_ext; /* max # of extents in a single req */
+ size_t dfa_max_blocks; /* max # of blocks in a single req */
+ size_t dfa_align; /* alignment for starting addresses */
+} dkioc_free_align_t;
+
+typedef int (*dfl_iter_fn_t)(const dkioc_free_list_ext_t *exts, size_t n_ext,
+ boolean_t last, void *arg);
+
int dfl_copyin(void *arg, dkioc_free_list_t **out, int ddi_flags, int kmflags);
void dfl_free(dkioc_free_list_t *dfl);
+int dfl_iter(const dkioc_free_list_t *dfl, const dkioc_free_align_t *align,
+ dfl_iter_fn_t fn, void *arg, int kmflag, uint32_t flags);
#ifdef __cplusplus
}