summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Gerdts <mike.gerdts@joyent.com>2020-03-04 07:12:09 +0000
committerMike Gerdts <mike.gerdts@joyent.com>2020-03-04 14:42:36 +0000
commit4672be612c493e5ceec30f988e7e17b2ff068060 (patch)
tree4b4fbb5a16b3d4c01f1c5230eedf3739cab0dbf3
parent553c0d387b245d0ee2292fc09d68dc51805c9409 (diff)
downloadillumos-joyent-4672be612c493e5ceec30f988e7e17b2ff068060.tar.gz
OS-6632 bhyve should be able to resize disk without reboot
-rw-r--r--usr/src/cmd/bhyve/block_if.c23
-rw-r--r--usr/src/cmd/bhyve/block_if.h2
-rw-r--r--usr/src/cmd/bhyve/pci_virtio_block.c70
-rw-r--r--usr/src/cmd/bhyve/virtio.h35
4 files changed, 114 insertions, 16 deletions
diff --git a/usr/src/cmd/bhyve/block_if.c b/usr/src/cmd/bhyve/block_if.c
index 72c5b02a0d..ecec5fa3ff 100644
--- a/usr/src/cmd/bhyve/block_if.c
+++ b/usr/src/cmd/bhyve/block_if.c
@@ -3,6 +3,7 @@
*
* Copyright (c) 2013 Peter Grehan <grehan@freebsd.org>
* All rights reserved.
+ * Copyright 2020 Joyent, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -28,10 +29,6 @@
* $FreeBSD$
*/
-/*
- * Copyright 2018 Joyent, Inc.
- */
-
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -1018,4 +1015,22 @@ blockif_set_wce(struct blockif_ctxt *bc, int wc_enable)
return (res);
}
+
+int
+blockif_check_size(struct blockif_ctxt *bc, size_t *newsize)
+{
+ struct stat sbuf;
+
+ if (fstat(bc->bc_fd, &sbuf) != 0) {
+ return (-1);
+ }
+ if (sbuf.st_size == bc->bc_size) {
+ return (-1);
+ }
+
+ bc->bc_size = sbuf.st_size;
+ *newsize = bc->bc_size;
+
+ return (0);
+}
#endif /* __FreeBSD__ */
diff --git a/usr/src/cmd/bhyve/block_if.h b/usr/src/cmd/bhyve/block_if.h
index bff2b42768..3006944ba9 100644
--- a/usr/src/cmd/bhyve/block_if.h
+++ b/usr/src/cmd/bhyve/block_if.h
@@ -3,6 +3,7 @@
*
* Copyright (c) 2013 Peter Grehan <grehan@freebsd.org>
* All rights reserved.
+ * Copyright 2020 Joyent, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -70,6 +71,7 @@ int blockif_is_ro(struct blockif_ctxt *bc);
int blockif_candelete(struct blockif_ctxt *bc);
#ifndef __FreeBSD__
int blockif_set_wce(struct blockif_ctxt *bc, int enable);
+int blockif_check_size(struct blockif_ctxt *bc, size_t *newsize);
#endif
int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq);
diff --git a/usr/src/cmd/bhyve/pci_virtio_block.c b/usr/src/cmd/bhyve/pci_virtio_block.c
index f9da14ce89..c1d45c96be 100644
--- a/usr/src/cmd/bhyve/pci_virtio_block.c
+++ b/usr/src/cmd/bhyve/pci_virtio_block.c
@@ -3,7 +3,7 @@
*
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
- * Copyright (c) 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -39,7 +39,6 @@
* http://www.illumos.org/license/CDDL.
*
* Copyright 2014 Pluribus Networks Inc.
- * Copyright 2018 Joyent, Inc.
*/
#include <sys/cdefs.h>
@@ -68,6 +67,9 @@ __FBSDID("$FreeBSD$");
#include "pci_emul.h"
#include "virtio.h"
#include "block_if.h"
+#ifndef __FreeBSD__
+#include "mevent.h"
+#endif
#define VTBLK_RINGSZ 128
@@ -327,6 +329,34 @@ pci_vtblk_notify(void *vsc, struct vqueue_info *vq)
pci_vtblk_proc(sc, vq);
}
+#ifndef __FreeBSD__
+/*
+ * See section 4.1.5.4 of VirtIO 1.1 spec.
+ * https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html
+ */
+static void
+pci_vtblk_resize(int fd, enum ev_type type, void *vsc)
+{
+ struct pci_vtblk_softc *sc = vsc;
+ struct virtio_softc *vs = &sc->vbsc_vs;
+ size_t newsize;
+ (void) fd;
+ (void) type;
+
+ if (blockif_check_size(sc->bc, &newsize) < 0) {
+ return;
+ }
+
+ sc->vbsc_cfg.vbc_capacity = newsize / DEV_BSIZE; /* 512-byte units */
+
+ /*
+ * NO_VECTOR (0xffff) > MAX_MSIX_TABLE_ENTRIES (2048), so the NO_VECTOR
+ * check happens, just later.
+ */
+ vq_interrupt_impl(vs, VTCFG_ISR_CONF_CHANGED, vs->vs_msix_cfg_idx);
+}
+#endif
+
static int
pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
{
@@ -434,6 +464,42 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
return (1);
}
vi_set_io_bar(&sc->vbsc_vs, 0);
+
+#ifndef __FreeBSD__
+ /*
+ * This is a complete hack - every 5 seconds it fstat()s the backing
+ * store to see if it is a different size than it was before. If so, it
+ * sends a config interrupt to the guest telling it to take a fresh look
+ * at the config. Presuming the guest does as told, the new size is
+ * seen.
+ *
+ * Polling for size changes so frequently for something that almost
+ * never happens is wasteful. An alternative mechanism should be found.
+ * Other mevents only allow you to poll for a file being ready for I/O.
+ * We also have an inotify implementation, but it suffers from similar
+ * limitations.
+ *
+ * It would be swell if spec_size_invalidate() (called by
+ * zvol_size_changed()) would issue a sysevent. However, the sysevent
+ * is not visible in the zone because it lacks privileges, so a helper
+ * would be needed. If we are only thinking of Triton's use, vminfod
+ * could be part of the plan. vminfod would see the size invalidation
+ * then could use bhyvectl to nudge the appropriate instance. A
+ * different approach may have the vmm module listening for that
+ * sysevent and making an upcall to the bhyve program to tell it that
+ * things have changed. I don't know if there are examples of in-kernel
+ * sysevent consumers.
+ *
+ * Note that changing the volsize already triggers a sysevent in most
+ * cases. This sysevent comes from the refreservation being changed as
+ * a side effect of volsize being changed and does not happen under all
+ * volsize changes. Also, this sysevent is specific to zvols and if we
+ * rely on it, other devices that back virtual disks would not benefit
+ * from a solution that relies on the refreservation change.
+ */
+ (void) mevent_add(5000, EVF_TIMER, pci_vtblk_resize, sc);
+#endif
+
return (0);
}
diff --git a/usr/src/cmd/bhyve/virtio.h b/usr/src/cmd/bhyve/virtio.h
index a2c3362ec2..7420cfa876 100644
--- a/usr/src/cmd/bhyve/virtio.h
+++ b/usr/src/cmd/bhyve/virtio.h
@@ -251,11 +251,9 @@ struct vring_used {
/*
* Bits in VTCFG_R_ISR. These apply only if not using MSI-X.
- *
- * (We don't [yet?] ever use CONF_CHANGED.)
*/
#define VTCFG_ISR_QUEUES 0x01 /* re-scan queues */
-#define VTCFG_ISR_CONF_CHANGED 0x80 /* configuration changed */
+#define VTCFG_ISR_CONF_CHANGED 0x02 /* configuration changed */
#define VIRTIO_MSI_NO_VECTOR 0xFFFF
@@ -430,17 +428,22 @@ vq_has_descs(struct vqueue_info *vq)
vq->vq_avail->va_idx);
}
-/*
- * Deliver an interrupt to guest on the given virtual queue
- * (if possible, or a generic MSI interrupt if not using MSI-X).
- */
+#ifdef __FreeBSD__
static inline void
vq_interrupt(struct virtio_softc *vs, struct vqueue_info *vq)
+#else
+static inline void
+vq_interrupt_impl(struct virtio_softc *vs, uint8_t isr, uint16_t msix_idx)
+#endif
{
- if (pci_msix_enabled(vs->vs_pi))
+ if (pci_msix_enabled(vs->vs_pi)) {
+#ifdef __FreeBSD__
pci_generate_msix(vs->vs_pi, vq->vq_msix_idx);
- else {
+#else
+ pci_generate_msix(vs->vs_pi, msix_idx);
+#endif
+ } else {
#ifndef __FreeBSD__
boolean_t unlock = B_FALSE;
@@ -451,7 +454,7 @@ vq_interrupt(struct virtio_softc *vs, struct vqueue_info *vq)
#else
VS_LOCK(vs);
#endif
- vs->vs_isr |= VTCFG_ISR_QUEUES;
+ vs->vs_isr |= isr;
pci_generate_msi(vs->vs_pi, 0);
pci_lintr_assert(vs->vs_pi);
#ifndef __FreeBSD__
@@ -463,6 +466,18 @@ vq_interrupt(struct virtio_softc *vs, struct vqueue_info *vq)
}
}
+#ifndef __FreeBSD__
+/*
+ * Deliver an interrupt to guest on the given virtual queue
+ * (if possible, or a generic MSI interrupt if not using MSI-X).
+ */
+static inline void
+vq_interrupt(struct virtio_softc *vs, struct vqueue_info *vq)
+{
+ vq_interrupt_impl(vs, VTCFG_ISR_QUEUES, (vq)->vq_msix_idx);
+}
+#endif
+
struct iovec;
void vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc,
void *dev_softc, struct pci_devinst *pi,