summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/io/blkdev
diff options
context:
space:
mode:
authorJason King <jason.king@joyent.com>2019-12-03 23:13:23 +0000
committerJason King <jason.king@joyent.com>2020-07-09 17:01:46 -0500
commit1a5ae140ba142cafb59ab08b3212c4ebbce84f32 (patch)
tree392db089970c3c0a81a85ab86094d0af5196f57c /usr/src/uts/common/io/blkdev
parent2fc9ab6e6080d66d8dc6b967cea7ba24bee8c7e5 (diff)
downloadillumos-joyent-1a5ae140ba142cafb59ab08b3212c4ebbce84f32.tar.gz
12506 Add support to vioblk for DISCARD operation
Reviewed by: Robert Mustacchi <rm@fingolfin.org> Reviewed by: Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org> Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src/uts/common/io/blkdev')
-rw-r--r--usr/src/uts/common/io/blkdev/blkdev.c255
1 files changed, 224 insertions, 31 deletions
diff --git a/usr/src/uts/common/io/blkdev/blkdev.c b/usr/src/uts/common/io/blkdev/blkdev.c
index 0718aebf7b..e9734d1672 100644
--- a/usr/src/uts/common/io/blkdev/blkdev.c
+++ b/usr/src/uts/common/io/blkdev/blkdev.c
@@ -45,6 +45,7 @@
#include <sys/list.h>
#include <sys/sysmacros.h>
#include <sys/dkio.h>
+#include <sys/dkioc_free_util.h>
#include <sys/vtoc.h>
#include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */
#include <sys/kstat.h>
@@ -95,6 +96,7 @@
*
* o_write: Write data as described by bd_xfer_t argument.
*
+ * o_free_space: Free the space described by bd_xfer_t argument (optional).
*
* Queues
* ------
@@ -160,6 +162,11 @@ struct bd {
uint64_t d_numblks;
ddi_devid_t d_devid;
+ uint64_t d_max_free_seg;
+ uint64_t d_max_free_blks;
+ uint64_t d_max_free_seg_blks;
+ uint64_t d_free_align;
+
kmem_cache_t *d_cache;
bd_queue_t *d_queues;
kstat_t *d_ksp;
@@ -220,7 +227,10 @@ struct bd_queue {
#define i_blkno i_public.x_blkno
#define i_flags i_public.x_flags
#define i_qnum i_public.x_qnum
+#define i_dfl i_public.x_dfl
+#define CAN_FREESPACE(bd) \
+ (((bd)->d_ops.o_free_space == NULL) ? B_FALSE : B_TRUE)
/*
* Private prototypes.
@@ -262,6 +272,7 @@ static void bd_update_state(bd_t *);
static int bd_check_state(bd_t *, enum dkio_state *);
static int bd_flush_write_cache(bd_t *, struct dk_callback *);
static int bd_check_uio(dev_t, struct uio *);
+static int bd_free_space(dev_t, bd_t *, dkioc_free_list_t *);
struct cmlb_tg_ops bd_tg_ops = {
TG_DK_OPS_VERSION_1,
@@ -664,7 +675,7 @@ bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
}
bd->d_ops = hdl->h_ops;
bd->d_private = hdl->h_private;
- bd->d_blkshift = 9; /* 512 bytes, to start */
+ bd->d_blkshift = DEV_BSHIFT; /* 512 bytes, to start */
if (bd->d_maxxfer % DEV_BSIZE) {
cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name);
@@ -711,10 +722,46 @@ bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
bzero(&drive, sizeof (drive));
/*
- * Default to one queue, parent driver can override.
+ * Default to one queue, and no restrictions on free space requests
+ * (if driver provides method) parent driver can override.
*/
drive.d_qcount = 1;
+ drive.d_free_align = 1;
bd->d_ops.o_drive_info(bd->d_private, &drive);
+
+ /*
+ * Several checks to make sure o_drive_info() didn't return bad
+ * values:
+ *
+ * There must be at least one queue
+ */
+ if (drive.d_qcount == 0)
+ goto fail_drive_info;
+
+ /* FREE/UNMAP/TRIM alignment needs to be at least 1 block */
+ if (drive.d_free_align == 0)
+ goto fail_drive_info;
+
+ /*
+ * If d_max_free_blks is not unlimited (not 0), then we cannot allow
+ * an unlimited segment size. It is however permissible to not impose
+ * a limit on the total number of blocks freed while limiting the
+ * amount allowed in an individual segment.
+ */
+ if ((drive.d_max_free_blks > 0 && drive.d_max_free_seg_blks == 0))
+ goto fail_drive_info;
+
+ /*
+ * If a limit is set on d_max_free_blks (by the above check, we know
+ * if there's a limit on d_max_free_blks, d_max_free_seg_blks cannot
+ * be unlimited), it cannot be smaller than the limit on an individual
+ * segment.
+ */
+ if ((drive.d_max_free_blks > 0 &&
+ drive.d_max_free_seg_blks > drive.d_max_free_blks)) {
+ goto fail_drive_info;
+ }
+
bd->d_qcount = drive.d_qcount;
bd->d_removable = drive.d_removable;
bd->d_hotpluggable = drive.d_hotpluggable;
@@ -722,8 +769,12 @@ bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer)
bd->d_maxxfer = drive.d_maxxfer;
- bd_create_inquiry_props(dip, &drive);
+ bd->d_free_align = drive.d_free_align;
+ bd->d_max_free_seg = drive.d_max_free_seg;
+ bd->d_max_free_blks = drive.d_max_free_blks;
+ bd->d_max_free_seg_blks = drive.d_max_free_seg_blks;
+ bd_create_inquiry_props(dip, &drive);
bd_create_errstats(bd, inst, &drive);
bd_update_state(bd);
@@ -749,25 +800,7 @@ bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK,
CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0);
if (rv != 0) {
- bd_queues_free(bd);
- bd_destroy_errstats(bd);
- cmlb_free_handle(&bd->d_cmlbh);
-
- if (bd->d_ksp != NULL) {
- kstat_delete(bd->d_ksp);
- bd->d_ksp = NULL;
- } else {
- kmem_free(bd->d_kiop, sizeof (kstat_io_t));
- bd->d_kiop = NULL;
- }
-
- kmem_cache_destroy(bd->d_cache);
- cv_destroy(&bd->d_statecv);
- mutex_destroy(&bd->d_statemutex);
- mutex_destroy(&bd->d_ocmutex);
- mutex_destroy(&bd->d_ksmutex);
- ddi_soft_state_free(bd_state, inst);
- return (DDI_FAILURE);
+ goto fail_cmlb_attach;
}
if (bd->d_ops.o_devid_init != NULL) {
@@ -800,6 +833,28 @@ bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
ddi_report_dev(dip);
return (DDI_SUCCESS);
+
+fail_cmlb_attach:
+ bd_queues_free(bd);
+ bd_destroy_errstats(bd);
+
+fail_drive_info:
+ cmlb_free_handle(&bd->d_cmlbh);
+
+ if (bd->d_ksp != NULL) {
+ kstat_delete(bd->d_ksp);
+ bd->d_ksp = NULL;
+ } else {
+ kmem_free(bd->d_kiop, sizeof (kstat_io_t));
+ }
+
+ kmem_cache_destroy(bd->d_cache);
+ cv_destroy(&bd->d_statecv);
+ mutex_destroy(&bd->d_statemutex);
+ mutex_destroy(&bd->d_ocmutex);
+ mutex_destroy(&bd->d_ksmutex);
+ ddi_soft_state_free(bd_state, inst);
+ return (DDI_FAILURE);
}
static int
@@ -1013,6 +1068,10 @@ bd_xfer_free(bd_xfer_impl_t *xi)
if (xi->i_dmah) {
(void) ddi_dma_unbind_handle(xi->i_dmah);
}
+ if (xi->i_dfl != NULL) {
+ dfl_free((dkioc_free_list_t *)xi->i_dfl);
+ xi->i_dfl = NULL;
+ }
kmem_cache_free(xi->i_bd->d_cache, xi);
}
@@ -1541,6 +1600,48 @@ bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
rv = bd_flush_write_cache(bd, dkc);
return (rv);
}
+ case DKIOCFREE: {
+ dkioc_free_list_t *dfl = NULL;
+
+ /*
+ * Check free space support early to avoid copyin/allocation
+ * when unnecessary.
+ */
+ if (!CAN_FREESPACE(bd))
+ return (ENOTSUP);
+
+ rv = dfl_copyin(ptr, &dfl, flag, KM_SLEEP);
+ if (rv != 0)
+ return (rv);
+
+ /*
+ * bd_free_space() consumes 'dfl'. bd_free_space() will
+ * call dfl_iter() which will normally try to pass dfl through
+ * to bd_free_space_cb() which attaches dfl to the bd_xfer_t
+ * that is then queued for the underlying driver. Once the
+ * driver processes the request, the bd_xfer_t instance is
+ * disposed of, including any attached dkioc_free_list_t.
+ *
+ * If dfl cannot be processed by the underlying driver due to
+ * size or alignment requirements of the driver, dfl_iter()
+ * will replace dfl with one or more new dkioc_free_list_t
+ * instances with the correct alignment and sizes for the driver
+ * (and free the original dkioc_free_list_t).
+ */
+ rv = bd_free_space(dev, bd, dfl);
+ return (rv);
+ }
+
+ case DKIOC_CANFREE: {
+ boolean_t supported = CAN_FREESPACE(bd);
+
+ if (ddi_copyout(&supported, (void *)arg, sizeof (supported),
+ flag) != 0) {
+ return (EFAULT);
+ }
+
+ return (0);
+ }
default:
break;
@@ -1934,6 +2035,84 @@ bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc)
return (rv);
}
+static int
+bd_free_space_done(struct buf *bp)
+{
+ freerbuf(bp);
+ return (0);
+}
+
+static int
+bd_free_space_cb(dkioc_free_list_t *dfl, void *arg, int kmflag)
+{
+ bd_t *bd = arg;
+ buf_t *bp = NULL;
+ bd_xfer_impl_t *xi = NULL;
+ boolean_t sync = DFL_ISSYNC(dfl) ? B_TRUE : B_FALSE;
+ int rv = 0;
+
+ bp = getrbuf(KM_SLEEP);
+ bp->b_resid = 0;
+ bp->b_bcount = 0;
+ bp->b_lblkno = 0;
+
+ xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_free_space, kmflag);
+ xi->i_dfl = dfl;
+
+ if (!sync) {
+ bp->b_iodone = bd_free_space_done;
+ bd_submit(bd, xi);
+ return (0);
+ }
+
+ xi->i_flags |= BD_XFER_POLL;
+ bd_submit(bd, xi);
+
+ (void) biowait(bp);
+ rv = geterror(bp);
+ freerbuf(bp);
+
+ return (rv);
+}
+
+static int
+bd_free_space(dev_t dev, bd_t *bd, dkioc_free_list_t *dfl)
+{
+ diskaddr_t p_len, p_offset;
+ uint64_t offset_bytes, len_bytes;
+ minor_t part = BDPART(dev);
+ const uint_t bshift = bd->d_blkshift;
+ dkioc_free_info_t dfi = {
+ .dfi_bshift = bshift,
+ .dfi_align = bd->d_free_align << bshift,
+ .dfi_max_bytes = bd->d_max_free_blks << bshift,
+ .dfi_max_ext = bd->d_max_free_seg,
+ .dfi_max_ext_bytes = bd->d_max_free_seg_blks << bshift,
+ };
+
+ if (cmlb_partinfo(bd->d_cmlbh, part, &p_len, &p_offset, NULL,
+ NULL, 0) != 0) {
+ dfl_free(dfl);
+ return (ENXIO);
+ }
+
+ /*
+ * bd_ioctl created our own copy of dfl, so we can modify as
+ * necessary
+ */
+ offset_bytes = (uint64_t)p_offset << bshift;
+ len_bytes = (uint64_t)p_len << bshift;
+
+ dfl->dfl_offset += offset_bytes;
+ if (dfl->dfl_offset < offset_bytes) {
+ dfl_free(dfl);
+ return (EOVERFLOW);
+ }
+
+ return (dfl_iter(dfl, &dfi, offset_bytes + len_bytes, bd_free_space_cb,
+ bd, KM_SLEEP));
+}
+
/*
* Nexus support.
*/
@@ -1976,26 +2155,40 @@ bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag)
{
bd_handle_t hdl;
- /*
- * There is full compatability between the version 0 API and the
- * current version.
- */
switch (ops->o_version) {
case BD_OPS_VERSION_0:
- case BD_OPS_CURRENT_VERSION:
+ case BD_OPS_VERSION_1:
+ case BD_OPS_VERSION_2:
break;
default:
+ /* Unsupported version */
return (NULL);
}
hdl = kmem_zalloc(sizeof (*hdl), kmflag);
- if (hdl != NULL) {
- hdl->h_ops = *ops;
- hdl->h_dma = dma;
- hdl->h_private = private;
+ if (hdl == NULL) {
+ return (NULL);
}
+ switch (ops->o_version) {
+ case BD_OPS_VERSION_2:
+ hdl->h_ops.o_free_space = ops->o_free_space;
+ /*FALLTHRU*/
+ case BD_OPS_VERSION_1:
+ case BD_OPS_VERSION_0:
+ hdl->h_ops.o_drive_info = ops->o_drive_info;
+ hdl->h_ops.o_media_info = ops->o_media_info;
+ hdl->h_ops.o_devid_init = ops->o_devid_init;
+ hdl->h_ops.o_sync_cache = ops->o_sync_cache;
+ hdl->h_ops.o_read = ops->o_read;
+ hdl->h_ops.o_write = ops->o_write;
+ break;
+ }
+
+ hdl->h_dma = dma;
+ hdl->h_private = private;
+
return (hdl);
}