diff options
author | Jason King <jasonbking@users.noreply.github.com> | 2020-03-23 13:42:57 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-23 13:42:57 -0500 |
commit | da036f5cbc2608d7100a682f9c91a938e76cefdc (patch) | |
tree | 0dac5887215bc6b1c0f25d1be3a6ce121925cdb1 | |
parent | fa1a9d36937b06768ece70e12e9a0c268e50230a (diff) | |
download | illumos-joyent-da036f5cbc2608d7100a682f9c91a938e76cefdc.tar.gz |
OS-8136 Add DISCARD/TRIM support to bhyve (illumos specific bits) (#270)
Reviewed by: Mike Zeller <mike.zeller@joyent.com>
Reviewed by: Mike Gerdts <mike.gerdts@joyent.com>
Approved by: Mike Zeller <mike.zeller@joyent.com>
-rw-r--r-- | usr/src/cmd/bhyve/block_if.c | 52 | ||||
-rw-r--r-- | usr/src/lib/brand/bhyve/zone/boot.c | 15 |
2 files changed, 58 insertions, 9 deletions
diff --git a/usr/src/cmd/bhyve/block_if.c b/usr/src/cmd/bhyve/block_if.c index 42fa8c6109..85ad55b05a 100644 --- a/usr/src/cmd/bhyve/block_if.c +++ b/usr/src/cmd/bhyve/block_if.c @@ -364,9 +364,36 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) else br->br_resid = 0; } -#endif else err = EOPNOTSUPP; +#else + else if (bc->bc_ischr) { + dkioc_free_list_t dfl = { + .dfl_num_exts = 1, + .dfl_offset = 0, + .dfl_flags = 0, + .dfl_exts[0].dfle_start = br->br_offset, + .dfl_exts[0].dfle_length = br->br_resid + }; + + if (ioctl(bc->bc_fd, DKIOCFREE, &dfl)) + err = errno; + else + br->br_resid = 0; + } else { + struct flock fl = { + .l_whence = 0, + .l_type = F_WRLCK, + .l_start = br->br_offset, + .l_len = br->br_resid + }; + + if (fcntl(bc->bc_fd, F_FREESP, &fl)) + err = errno; + else + br->br_resid = 0; + } +#endif break; default: err = EINVAL; @@ -475,9 +502,7 @@ blockif_open(const char *optstr, const char *ident) off_t size, psectsz, psectoff; int extra, fd, i, sectsz; int nocache, sync, ro, candelete, geom, ssopt, pssopt; -#ifdef __FreeBSD__ int nodelete; -#endif #ifndef WITHOUT_CAPSICUM cap_rights_t rights; @@ -491,9 +516,7 @@ blockif_open(const char *optstr, const char *ident) nocache = 0; sync = 0; ro = 0; -#ifdef __FreeBSD__ nodelete = 0; -#endif /* * The first element in the optstring is always a pathname. @@ -506,10 +529,8 @@ blockif_open(const char *optstr, const char *ident) continue; else if (!strcmp(cp, "nocache")) nocache = 1; -#ifdef __FreeBSD__ else if (!strcmp(cp, "nodelete")) nodelete = 1; -#endif else if (!strcmp(cp, "sync") || !strcmp(cp, "direct")) sync = 1; else if (!strcmp(cp, "ro")) @@ -630,6 +651,10 @@ blockif_open(const char *optstr, const char *ident) } } } + + if (nodelete == 0 && ioctl(fd, DKIOC_CANFREE, &candelete)) + candelete = 0; + } else { int flags; @@ -639,6 +664,19 @@ blockif_open(const char *optstr, const char *ident) wce = WCE_FCNTL; } } + + /* + * We don't have a way to discover if a file supports the + * FREESP fcntl cmd (other than trying it). However, + * zfs, ufs, tmpfs, and udfs all support the FREESP fcntl cmd. + * Nfsv4 and nfsv4 also forward the FREESP request + * to the server, so we always enable it for file based + * volumes. Anyone trying to run volumes on an unsupported + * configuration is on their own, and should be prepared + * for the requests to fail. + */ + if (nodelete == 0) + candelete = 1; } #endif diff --git a/usr/src/lib/brand/bhyve/zone/boot.c b/usr/src/lib/brand/bhyve/zone/boot.c index 23bbb88a34..dadcf4e96a 100644 --- a/usr/src/lib/brand/bhyve/zone/boot.c +++ b/usr/src/lib/brand/bhyve/zone/boot.c @@ -228,6 +228,7 @@ add_disk(char *disk, char *path, char *slotconf, size_t slotconf_len) const char *model = "virtio-blk"; uint_t pcibus = 0, pcidev = 0, pcifn = 0; const char *slotstr; + const char *nodelstr = ""; boolean_t isboot; isboot = is_env_true("device", disk, "boot"); @@ -262,6 +263,16 @@ add_disk(char *disk, char *path, char *slotconf, size_t slotconf_len) if (is_env_string("device", disk, "model", "virtio")) { model = "virtio-blk"; + /* + * bhyve's blockif code refers to the UNMAP/DISCARD/TRIM + * feature as 'delete' and so 'nodelete' is used by + * bhyve to disable the feature. We use 'trim' for + * interfaces we expose to the operator as that seems to + * be the most familiar name for the operation (and less + * likely to cause confusion). + */ + if (is_env_string("device", disk, "notrim", "true")) + nodelstr = ",nodelete"; } else if (is_env_string("device", disk, "model", "ahci")) { if (is_env_string("device", disk, "media", "cdrom")) { model = "ahci-cd"; @@ -273,8 +284,8 @@ add_disk(char *disk, char *path, char *slotconf, size_t slotconf_len) return (-1); } - if (snprintf(slotconf, slotconf_len, "%u:%u:%u,%s,%s", - pcibus, pcidev, pcifn, model, path) >= slotconf_len) { + if (snprintf(slotconf, slotconf_len, "%u:%u:%u,%s,%s%s", + pcibus, pcidev, pcifn, model, path, nodelstr) >= slotconf_len) { (void) printf("Error: disk path '%s' too long\n", path); return (-1); } |