diff options
author | Mike Gerdts <mike.gerdts@joyent.com> | 2020-03-04 07:12:09 +0000 |
---|---|---|
committer | Mike Gerdts <mike.gerdts@joyent.com> | 2020-03-04 14:42:36 +0000 |
commit | 4672be612c493e5ceec30f988e7e17b2ff068060 (patch) | |
tree | 4b4fbb5a16b3d4c01f1c5230eedf3739cab0dbf3 | |
parent | 553c0d387b245d0ee2292fc09d68dc51805c9409 (diff) | |
download | illumos-joyent-4672be612c493e5ceec30f988e7e17b2ff068060.tar.gz |
OS-6632 bhyve should be able to resize disk without reboot
-rw-r--r-- | usr/src/cmd/bhyve/block_if.c | 23 | ||||
-rw-r--r-- | usr/src/cmd/bhyve/block_if.h | 2 | ||||
-rw-r--r-- | usr/src/cmd/bhyve/pci_virtio_block.c | 70 | ||||
-rw-r--r-- | usr/src/cmd/bhyve/virtio.h | 35 |
4 files changed, 114 insertions, 16 deletions
diff --git a/usr/src/cmd/bhyve/block_if.c b/usr/src/cmd/bhyve/block_if.c index 72c5b02a0d..ecec5fa3ff 100644 --- a/usr/src/cmd/bhyve/block_if.c +++ b/usr/src/cmd/bhyve/block_if.c @@ -3,6 +3,7 @@ * * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> * All rights reserved. + * Copyright 2020 Joyent, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -28,10 +29,6 @@ * $FreeBSD$ */ -/* - * Copyright 2018 Joyent, Inc. - */ - #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); @@ -1018,4 +1015,22 @@ blockif_set_wce(struct blockif_ctxt *bc, int wc_enable) return (res); } + +int +blockif_check_size(struct blockif_ctxt *bc, size_t *newsize) +{ + struct stat sbuf; + + if (fstat(bc->bc_fd, &sbuf) != 0) { + return (-1); + } + if (sbuf.st_size == bc->bc_size) { + return (-1); + } + + bc->bc_size = sbuf.st_size; + *newsize = bc->bc_size; + + return (0); +} #endif /* __FreeBSD__ */ diff --git a/usr/src/cmd/bhyve/block_if.h b/usr/src/cmd/bhyve/block_if.h index bff2b42768..3006944ba9 100644 --- a/usr/src/cmd/bhyve/block_if.h +++ b/usr/src/cmd/bhyve/block_if.h @@ -3,6 +3,7 @@ * * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> * All rights reserved. + * Copyright 2020 Joyent, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -70,6 +71,7 @@ int blockif_is_ro(struct blockif_ctxt *bc); int blockif_candelete(struct blockif_ctxt *bc); #ifndef __FreeBSD__ int blockif_set_wce(struct blockif_ctxt *bc, int enable); +int blockif_check_size(struct blockif_ctxt *bc, size_t *newsize); #endif int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq); int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq); diff --git a/usr/src/cmd/bhyve/pci_virtio_block.c b/usr/src/cmd/bhyve/pci_virtio_block.c index f9da14ce89..c1d45c96be 100644 --- a/usr/src/cmd/bhyve/pci_virtio_block.c +++ b/usr/src/cmd/bhyve/pci_virtio_block.c @@ -3,7 +3,7 @@ * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. - * Copyright (c) 2019 Joyent, Inc. + * Copyright 2020 Joyent, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -39,7 +39,6 @@ * http://www.illumos.org/license/CDDL. * * Copyright 2014 Pluribus Networks Inc. - * Copyright 2018 Joyent, Inc. */ #include <sys/cdefs.h> @@ -68,6 +67,9 @@ __FBSDID("$FreeBSD$"); #include "pci_emul.h" #include "virtio.h" #include "block_if.h" +#ifndef __FreeBSD__ +#include "mevent.h" +#endif #define VTBLK_RINGSZ 128 @@ -327,6 +329,34 @@ pci_vtblk_notify(void *vsc, struct vqueue_info *vq) pci_vtblk_proc(sc, vq); } +#ifndef __FreeBSD__ +/* + * See section 4.1.5.4 of VirtIO 1.1 spec. + * https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html + */ +static void +pci_vtblk_resize(int fd, enum ev_type type, void *vsc) +{ + struct pci_vtblk_softc *sc = vsc; + struct virtio_softc *vs = &sc->vbsc_vs; + size_t newsize; + (void) fd; + (void) type; + + if (blockif_check_size(sc->bc, &newsize) < 0) { + return; + } + + sc->vbsc_cfg.vbc_capacity = newsize / DEV_BSIZE; /* 512-byte units */ + + /* + * NO_VECTOR (0xffff) > MAX_MSIX_TABLE_ENTRIES (2048), so the NO_VECTOR + * check happens, just later. + */ + vq_interrupt_impl(vs, VTCFG_ISR_CONF_CHANGED, vs->vs_msix_cfg_idx); +} +#endif + static int pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { @@ -434,6 +464,42 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) return (1); } vi_set_io_bar(&sc->vbsc_vs, 0); + +#ifndef __FreeBSD__ + /* + * This is a complete hack - every 5 seconds it fstat()s the backing + * store to see if it is a different size than it was before. If so, it + * sends a config interrupt to the guest telling it to take a fresh look + * at the config. Presuming the guest does as told, the new size is + * seen. + * + * Polling for size changes so frequently for something that almost + * never happens is wasteful. An alternative mechanism should be found. + * Other mevents only allow you to poll for a file being ready for I/O. + * We also have an inotify implementation, but it suffers from similar + * limitations. + * + * It would be swell if spec_size_invalidate() (called by + * zvol_size_changed()) would issue a sysevent. However, the sysevent + * is not visible in the zone because it lacks privileges, so a helper + * would be needed. If we are only thinking of Triton's use, vminfod + * could be part of the plan. vminfod would see the size invalidation + * then could use bhyvectl to nudge the appropriate instance. A + * different approach may have the vmm module listening for that + * sysevent and making an upcall to the bhyve program to tell it that + * things have changed. I don't know if there are examples of in-kernel + * sysevent consumers. + * + * Note that changing the volsize already triggers a sysevent in most + * cases. This sysevent comes from the refreservation being changed as + * a side effect of volsize being changed and does not happen under all + * volsize changes. Also, this sysevent is specific to zvols and if we + * rely on it, other devices that back virtual disks would not benefit + * from a solution that relies on the refreservation change. + */ + (void) mevent_add(5000, EVF_TIMER, pci_vtblk_resize, sc); +#endif + return (0); } diff --git a/usr/src/cmd/bhyve/virtio.h b/usr/src/cmd/bhyve/virtio.h index a2c3362ec2..7420cfa876 100644 --- a/usr/src/cmd/bhyve/virtio.h +++ b/usr/src/cmd/bhyve/virtio.h @@ -251,11 +251,9 @@ struct vring_used { /* * Bits in VTCFG_R_ISR. These apply only if not using MSI-X. - * - * (We don't [yet?] ever use CONF_CHANGED.) */ #define VTCFG_ISR_QUEUES 0x01 /* re-scan queues */ -#define VTCFG_ISR_CONF_CHANGED 0x80 /* configuration changed */ +#define VTCFG_ISR_CONF_CHANGED 0x02 /* configuration changed */ #define VIRTIO_MSI_NO_VECTOR 0xFFFF @@ -430,17 +428,22 @@ vq_has_descs(struct vqueue_info *vq) vq->vq_avail->va_idx); } -/* - * Deliver an interrupt to guest on the given virtual queue - * (if possible, or a generic MSI interrupt if not using MSI-X). - */ +#ifdef __FreeBSD__ static inline void vq_interrupt(struct virtio_softc *vs, struct vqueue_info *vq) +#else +static inline void +vq_interrupt_impl(struct virtio_softc *vs, uint8_t isr, uint16_t msix_idx) +#endif { - if (pci_msix_enabled(vs->vs_pi)) + if (pci_msix_enabled(vs->vs_pi)) { +#ifdef __FreeBSD__ pci_generate_msix(vs->vs_pi, vq->vq_msix_idx); - else { +#else + pci_generate_msix(vs->vs_pi, msix_idx); +#endif + } else { #ifndef __FreeBSD__ boolean_t unlock = B_FALSE; @@ -451,7 +454,7 @@ vq_interrupt(struct virtio_softc *vs, struct vqueue_info *vq) #else VS_LOCK(vs); #endif - vs->vs_isr |= VTCFG_ISR_QUEUES; + vs->vs_isr |= isr; pci_generate_msi(vs->vs_pi, 0); pci_lintr_assert(vs->vs_pi); #ifndef __FreeBSD__ @@ -463,6 +466,18 @@ vq_interrupt(struct virtio_softc *vs, struct vqueue_info *vq) } } +#ifndef __FreeBSD__ +/* + * Deliver an interrupt to guest on the given virtual queue + * (if possible, or a generic MSI interrupt if not using MSI-X). + */ +static inline void +vq_interrupt(struct virtio_softc *vs, struct vqueue_info *vq) +{ + vq_interrupt_impl(vs, VTCFG_ISR_QUEUES, (vq)->vq_msix_idx); +} +#endif + struct iovec; void vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc, void *dev_softc, struct pci_devinst *pi, |