diff options
author | Jason King <jason.king@joyent.com> | 2019-12-03 23:13:23 +0000 |
---|---|---|
committer | Jason King <jason.king@joyent.com> | 2020-07-09 17:01:46 -0500 |
commit | 1a5ae140ba142cafb59ab08b3212c4ebbce84f32 (patch) | |
tree | 392db089970c3c0a81a85ab86094d0af5196f57c /usr/src/uts/common/io/blkdev | |
parent | 2fc9ab6e6080d66d8dc6b967cea7ba24bee8c7e5 (diff) | |
download | illumos-joyent-1a5ae140ba142cafb59ab08b3212c4ebbce84f32.tar.gz |
12506 Add support to vioblk for DISCARD operation
Reviewed by: Robert Mustacchi <rm@fingolfin.org>
Reviewed by: Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
Approved by: Dan McDonald <danmcd@joyent.com>
Diffstat (limited to 'usr/src/uts/common/io/blkdev')
-rw-r--r-- | usr/src/uts/common/io/blkdev/blkdev.c | 255 |
1 files changed, 224 insertions, 31 deletions
diff --git a/usr/src/uts/common/io/blkdev/blkdev.c b/usr/src/uts/common/io/blkdev/blkdev.c index 0718aebf7b..e9734d1672 100644 --- a/usr/src/uts/common/io/blkdev/blkdev.c +++ b/usr/src/uts/common/io/blkdev/blkdev.c @@ -45,6 +45,7 @@ #include <sys/list.h> #include <sys/sysmacros.h> #include <sys/dkio.h> +#include <sys/dkioc_free_util.h> #include <sys/vtoc.h> #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */ #include <sys/kstat.h> @@ -95,6 +96,7 @@ * * o_write: Write data as described by bd_xfer_t argument. * + * o_free_space: Free the space described by bd_xfer_t argument (optional). * * Queues * ------ @@ -160,6 +162,11 @@ struct bd { uint64_t d_numblks; ddi_devid_t d_devid; + uint64_t d_max_free_seg; + uint64_t d_max_free_blks; + uint64_t d_max_free_seg_blks; + uint64_t d_free_align; + kmem_cache_t *d_cache; bd_queue_t *d_queues; kstat_t *d_ksp; @@ -220,7 +227,10 @@ struct bd_queue { #define i_blkno i_public.x_blkno #define i_flags i_public.x_flags #define i_qnum i_public.x_qnum +#define i_dfl i_public.x_dfl +#define CAN_FREESPACE(bd) \ + (((bd)->d_ops.o_free_space == NULL) ? B_FALSE : B_TRUE) /* * Private prototypes. @@ -262,6 +272,7 @@ static void bd_update_state(bd_t *); static int bd_check_state(bd_t *, enum dkio_state *); static int bd_flush_write_cache(bd_t *, struct dk_callback *); static int bd_check_uio(dev_t, struct uio *); +static int bd_free_space(dev_t, bd_t *, dkioc_free_list_t *); struct cmlb_tg_ops bd_tg_ops = { TG_DK_OPS_VERSION_1, @@ -664,7 +675,7 @@ bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) } bd->d_ops = hdl->h_ops; bd->d_private = hdl->h_private; - bd->d_blkshift = 9; /* 512 bytes, to start */ + bd->d_blkshift = DEV_BSHIFT; /* 512 bytes, to start */ if (bd->d_maxxfer % DEV_BSIZE) { cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name); @@ -711,10 +722,46 @@ bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) bzero(&drive, sizeof (drive)); /* - * Default to one queue, parent driver can override. + * Default to one queue, and no restrictions on free space requests + * (if driver provides method) parent driver can override. */ drive.d_qcount = 1; + drive.d_free_align = 1; bd->d_ops.o_drive_info(bd->d_private, &drive); + + /* + * Several checks to make sure o_drive_info() didn't return bad + * values: + * + * There must be at least one queue + */ + if (drive.d_qcount == 0) + goto fail_drive_info; + + /* FREE/UNMAP/TRIM alignment needs to be at least 1 block */ + if (drive.d_free_align == 0) + goto fail_drive_info; + + /* + * If d_max_free_blks is not unlimited (not 0), then we cannot allow + * an unlimited segment size. It is however permissible to not impose + * a limit on the total number of blocks freed while limiting the + * amount allowed in an individual segment. + */ + if ((drive.d_max_free_blks > 0 && drive.d_max_free_seg_blks == 0)) + goto fail_drive_info; + + /* + * If a limit is set on d_max_free_blks (by the above check, we know + * if there's a limit on d_max_free_blks, d_max_free_seg_blks cannot + * be unlimited), it cannot be smaller than the limit on an individual + * segment. + */ + if ((drive.d_max_free_blks > 0 && + drive.d_max_free_seg_blks > drive.d_max_free_blks)) { + goto fail_drive_info; + } + bd->d_qcount = drive.d_qcount; bd->d_removable = drive.d_removable; bd->d_hotpluggable = drive.d_hotpluggable; @@ -722,8 +769,12 @@ bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer) bd->d_maxxfer = drive.d_maxxfer; - bd_create_inquiry_props(dip, &drive); + bd->d_free_align = drive.d_free_align; + bd->d_max_free_seg = drive.d_max_free_seg; + bd->d_max_free_blks = drive.d_max_free_blks; + bd->d_max_free_seg_blks = drive.d_max_free_seg_blks; + bd_create_inquiry_props(dip, &drive); bd_create_errstats(bd, inst, &drive); bd_update_state(bd); @@ -749,25 +800,7 @@ bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK, CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0); if (rv != 0) { - bd_queues_free(bd); - bd_destroy_errstats(bd); - cmlb_free_handle(&bd->d_cmlbh); - - if (bd->d_ksp != NULL) { - kstat_delete(bd->d_ksp); - bd->d_ksp = NULL; - } else { - kmem_free(bd->d_kiop, sizeof (kstat_io_t)); - bd->d_kiop = NULL; - } - - kmem_cache_destroy(bd->d_cache); - cv_destroy(&bd->d_statecv); - mutex_destroy(&bd->d_statemutex); - mutex_destroy(&bd->d_ocmutex); - mutex_destroy(&bd->d_ksmutex); - ddi_soft_state_free(bd_state, inst); - return (DDI_FAILURE); + goto fail_cmlb_attach; } if (bd->d_ops.o_devid_init != NULL) { @@ -800,6 +833,28 @@ bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) ddi_report_dev(dip); return (DDI_SUCCESS); + +fail_cmlb_attach: + bd_queues_free(bd); + bd_destroy_errstats(bd); + +fail_drive_info: + cmlb_free_handle(&bd->d_cmlbh); + + if (bd->d_ksp != NULL) { + kstat_delete(bd->d_ksp); + bd->d_ksp = NULL; + } else { + kmem_free(bd->d_kiop, sizeof (kstat_io_t)); + } + + kmem_cache_destroy(bd->d_cache); + cv_destroy(&bd->d_statecv); + mutex_destroy(&bd->d_statemutex); + mutex_destroy(&bd->d_ocmutex); + mutex_destroy(&bd->d_ksmutex); + ddi_soft_state_free(bd_state, inst); + return (DDI_FAILURE); } static int @@ -1013,6 +1068,10 @@ bd_xfer_free(bd_xfer_impl_t *xi) if (xi->i_dmah) { (void) ddi_dma_unbind_handle(xi->i_dmah); } + if (xi->i_dfl != NULL) { + dfl_free((dkioc_free_list_t *)xi->i_dfl); + xi->i_dfl = NULL; + } kmem_cache_free(xi->i_bd->d_cache, xi); } @@ -1541,6 +1600,48 @@ bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp) rv = bd_flush_write_cache(bd, dkc); return (rv); } + case DKIOCFREE: { + dkioc_free_list_t *dfl = NULL; + + /* + * Check free space support early to avoid copyin/allocation + * when unnecessary. + */ + if (!CAN_FREESPACE(bd)) + return (ENOTSUP); + + rv = dfl_copyin(ptr, &dfl, flag, KM_SLEEP); + if (rv != 0) + return (rv); + + /* + * bd_free_space() consumes 'dfl'. bd_free_space() will + * call dfl_iter() which will normally try to pass dfl through + * to bd_free_space_cb() which attaches dfl to the bd_xfer_t + * that is then queued for the underlying driver. Once the + * driver processes the request, the bd_xfer_t instance is + * disposed of, including any attached dkioc_free_list_t. + * + * If dfl cannot be processed by the underlying driver due to + * size or alignment requirements of the driver, dfl_iter() + * will replace dfl with one or more new dkioc_free_list_t + * instances with the correct alignment and sizes for the driver + * (and free the original dkioc_free_list_t). + */ + rv = bd_free_space(dev, bd, dfl); + return (rv); + } + + case DKIOC_CANFREE: { + boolean_t supported = CAN_FREESPACE(bd); + + if (ddi_copyout(&supported, (void *)arg, sizeof (supported), + flag) != 0) { + return (EFAULT); + } + + return (0); + } default: break; @@ -1934,6 +2035,84 @@ bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc) return (rv); } +static int +bd_free_space_done(struct buf *bp) +{ + freerbuf(bp); + return (0); +} + +static int +bd_free_space_cb(dkioc_free_list_t *dfl, void *arg, int kmflag) +{ + bd_t *bd = arg; + buf_t *bp = NULL; + bd_xfer_impl_t *xi = NULL; + boolean_t sync = DFL_ISSYNC(dfl) ? B_TRUE : B_FALSE; + int rv = 0; + + bp = getrbuf(KM_SLEEP); + bp->b_resid = 0; + bp->b_bcount = 0; + bp->b_lblkno = 0; + + xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_free_space, kmflag); + xi->i_dfl = dfl; + + if (!sync) { + bp->b_iodone = bd_free_space_done; + bd_submit(bd, xi); + return (0); + } + + xi->i_flags |= BD_XFER_POLL; + bd_submit(bd, xi); + + (void) biowait(bp); + rv = geterror(bp); + freerbuf(bp); + + return (rv); +} + +static int +bd_free_space(dev_t dev, bd_t *bd, dkioc_free_list_t *dfl) +{ + diskaddr_t p_len, p_offset; + uint64_t offset_bytes, len_bytes; + minor_t part = BDPART(dev); + const uint_t bshift = bd->d_blkshift; + dkioc_free_info_t dfi = { + .dfi_bshift = bshift, + .dfi_align = bd->d_free_align << bshift, + .dfi_max_bytes = bd->d_max_free_blks << bshift, + .dfi_max_ext = bd->d_max_free_seg, + .dfi_max_ext_bytes = bd->d_max_free_seg_blks << bshift, + }; + + if (cmlb_partinfo(bd->d_cmlbh, part, &p_len, &p_offset, NULL, + NULL, 0) != 0) { + dfl_free(dfl); + return (ENXIO); + } + + /* + * bd_ioctl created our own copy of dfl, so we can modify as + * necessary + */ + offset_bytes = (uint64_t)p_offset << bshift; + len_bytes = (uint64_t)p_len << bshift; + + dfl->dfl_offset += offset_bytes; + if (dfl->dfl_offset < offset_bytes) { + dfl_free(dfl); + return (EOVERFLOW); + } + + return (dfl_iter(dfl, &dfi, offset_bytes + len_bytes, bd_free_space_cb, + bd, KM_SLEEP)); +} + /* * Nexus support. */ @@ -1976,26 +2155,40 @@ bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag) { bd_handle_t hdl; - /* - * There is full compatability between the version 0 API and the - * current version. - */ switch (ops->o_version) { case BD_OPS_VERSION_0: - case BD_OPS_CURRENT_VERSION: + case BD_OPS_VERSION_1: + case BD_OPS_VERSION_2: break; default: + /* Unsupported version */ return (NULL); } hdl = kmem_zalloc(sizeof (*hdl), kmflag); - if (hdl != NULL) { - hdl->h_ops = *ops; - hdl->h_dma = dma; - hdl->h_private = private; + if (hdl == NULL) { + return (NULL); } + switch (ops->o_version) { + case BD_OPS_VERSION_2: + hdl->h_ops.o_free_space = ops->o_free_space; + /*FALLTHRU*/ + case BD_OPS_VERSION_1: + case BD_OPS_VERSION_0: + hdl->h_ops.o_drive_info = ops->o_drive_info; + hdl->h_ops.o_media_info = ops->o_media_info; + hdl->h_ops.o_devid_init = ops->o_devid_init; + hdl->h_ops.o_sync_cache = ops->o_sync_cache; + hdl->h_ops.o_read = ops->o_read; + hdl->h_ops.o_write = ops->o_write; + break; + } + + hdl->h_dma = dma; + hdl->h_private = private; + return (hdl); } |