diff options
author | Patrick Mooney <pmooney@pfmooney.com> | 2022-06-13 19:25:42 +0000 |
---|---|---|
committer | Patrick Mooney <pmooney@oxide.computer> | 2022-07-14 00:47:06 +0000 |
commit | a26f9c149bc8e4c9206303674cdef16edec1ca70 (patch) | |
tree | d61360246b0025136bfe359ee10370830fedad4c | |
parent | 81bcd6ad07db9db66927eebc0d558e9a12011226 (diff) | |
download | illumos-joyent-a26f9c149bc8e4c9206303674cdef16edec1ca70.tar.gz |
14755 viona should expose more ring state
14756 expose viona kernel interface version
14787 bhyve should block leases on drv purge
Reviewed by: Garrett D'Amore <garrett@damore.org>
Reviewed by: Luqman Aden <luqman@oxide.computer>
Approved by: Richard Lowe <richlowe@richlowe.net>
-rw-r--r-- | usr/src/pkg/manifests/system-bhyve-tests.p5m | 2 | ||||
-rw-r--r-- | usr/src/test/bhyve-tests/runfiles/default.run | 6 | ||||
-rw-r--r-- | usr/src/test/bhyve-tests/tests/Makefile | 2 | ||||
-rw-r--r-- | usr/src/test/bhyve-tests/tests/viona/Makefile | 51 | ||||
-rw-r--r-- | usr/src/test/bhyve-tests/tests/viona/interface_version.c | 51 | ||||
-rw-r--r-- | usr/src/uts/intel/io/viona/viona_impl.h | 23 | ||||
-rw-r--r-- | usr/src/uts/intel/io/viona/viona_main.c | 123 | ||||
-rw-r--r-- | usr/src/uts/intel/io/viona/viona_ring.c | 194 | ||||
-rw-r--r-- | usr/src/uts/intel/io/viona/viona_rx.c | 6 | ||||
-rw-r--r-- | usr/src/uts/intel/io/viona/viona_tx.c | 89 | ||||
-rw-r--r-- | usr/src/uts/intel/io/vmm/vmm_sol_dev.c | 18 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/viona_io.h | 30 |
12 files changed, 512 insertions, 83 deletions
diff --git a/usr/src/pkg/manifests/system-bhyve-tests.p5m b/usr/src/pkg/manifests/system-bhyve-tests.p5m index 1df6e67bd5..aae7d2807a 100644 --- a/usr/src/pkg/manifests/system-bhyve-tests.p5m +++ b/usr/src/pkg/manifests/system-bhyve-tests.p5m @@ -49,6 +49,8 @@ file path=opt/bhyve-tests/tests/mevent/read_pause mode=0555 file path=opt/bhyve-tests/tests/mevent/read_requeue mode=0555 file path=opt/bhyve-tests/tests/mevent/vnode_file mode=0555 file path=opt/bhyve-tests/tests/mevent/vnode_zvol mode=0555 +dir path=opt/bhyve-tests/tests/viona +file path=opt/bhyve-tests/tests/viona/interface_version mode=0555 dir path=opt/bhyve-tests/tests/vmm file path=opt/bhyve-tests/tests/vmm/check_iommu mode=0555 file path=opt/bhyve-tests/tests/vmm/fpu_getset mode=0555 diff --git a/usr/src/test/bhyve-tests/runfiles/default.run b/usr/src/test/bhyve-tests/runfiles/default.run index 5cdd9647b6..8ec89a7a0a 100644 --- a/usr/src/test/bhyve-tests/runfiles/default.run +++ b/usr/src/test/bhyve-tests/runfiles/default.run @@ -49,6 +49,12 @@ tests = [ 'triple_fault' ] +[/opt/bhyve-tests/tests/viona] +user = root +tests = [ + 'interface_version' + ] + # Tests of userspace mevent system, built from cmd/bhyve [/opt/bhyve-tests/tests/mevent] tests = ['lists_delete', 'read_disable', 'read_pause', 'read_requeue', diff --git a/usr/src/test/bhyve-tests/tests/Makefile b/usr/src/test/bhyve-tests/tests/Makefile index 5dc416db01..6742106ae0 100644 --- a/usr/src/test/bhyve-tests/tests/Makefile +++ b/usr/src/test/bhyve-tests/tests/Makefile @@ -15,6 +15,6 @@ .PARALLEL: $(SUBDIRS) -SUBDIRS = inst_emul kdev vmm +SUBDIRS = inst_emul kdev viona vmm include $(SRC)/test/Makefile.com diff --git a/usr/src/test/bhyve-tests/tests/viona/Makefile b/usr/src/test/bhyve-tests/tests/viona/Makefile new file mode 100644 index 0000000000..8b68c11d35 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/viona/Makefile @@ -0,0 +1,51 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# Copyright 2022 Oxide Computer Company + +include $(SRC)/cmd/Makefile.cmd +include $(SRC)/cmd/Makefile.cmd.64 +include $(SRC)/test/Makefile.com + +PROG = interface_version + +CLEAN_OBJS = $(PROG:%=%.o) + +ROOTOPTPKG = $(ROOT)/opt/bhyve-tests +TESTDIR = $(ROOTOPTPKG)/tests/viona + +CMDS = $(PROG:%=$(TESTDIR)/%) +$(CMDS) := FILEMODE = 0555 + +CSTD= $(CSTD_GNU99) +CPPFLAGS = $(CPPFLAGS.master) \ + -I$(SRC)/uts/intel + +all: $(PROG) + +install: all $(CMDS) + +clean: + -$(RM) $(CLEAN_OBJS) +clobber: clean + -$(RM) $(PROG) + +$(CMDS): $(TESTDIR) $(PROG) + +$(TESTDIR): + $(INS.dir) + +$(TESTDIR)/%: % + $(INS.file) + +%: %.o + $(LINK.c) -o $@ $< $(LDLIBS) + $(POST_PROCESS) diff --git a/usr/src/test/bhyve-tests/tests/viona/interface_version.c b/usr/src/test/bhyve-tests/tests/viona/interface_version.c new file mode 100644 index 0000000000..1b3cd585f0 --- /dev/null +++ b/usr/src/test/bhyve-tests/tests/viona/interface_version.c @@ -0,0 +1,51 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <fcntl.h> +#include <libgen.h> + +#include <sys/viona_io.h> + +#define VIONA_DEV "/dev/viona" + +int +main(int argc, char *argv[]) +{ + const char *suite_name = basename(argv[0]); + + int ctl_fd = open(VIONA_DEV, O_EXCL | O_RDWR); + if (ctl_fd < 0) { + perror("could not open viona device"); + return (EXIT_FAILURE); + } + + int version = ioctl(ctl_fd, VNA_IOC_VERSION, 0); + if (version < 0) { + perror("VNA_IOC_VERSION ioctl failed"); + return (EXIT_FAILURE); + } + if (version != VIONA_CURRENT_INTERFACE_VERSION) { + (void) fprintf(stderr, "kernel version %d != expected %d\n", + version, VIONA_CURRENT_INTERFACE_VERSION); + return (EXIT_FAILURE); + } + + (void) close(ctl_fd); + (void) printf("%s\tPASS\n", suite_name); + return (0); +} diff --git a/usr/src/uts/intel/io/viona/viona_impl.h b/usr/src/uts/intel/io/viona/viona_impl.h index 760474e78b..c99bd2c51e 100644 --- a/usr/src/uts/intel/io/viona/viona_impl.h +++ b/usr/src/uts/intel/io/viona/viona_impl.h @@ -35,7 +35,7 @@ * * Copyright 2015 Pluribus Networks Inc. * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ #ifndef _VIONA_IMPL_H @@ -76,7 +76,8 @@ enum viona_ring_state { enum viona_ring_state_flags { VRSF_REQ_START = 0x1, /* start running from INIT state */ VRSF_REQ_STOP = 0x2, /* stop running, clean up, goto RESET state */ - VRSF_RENEW = 0x4, /* ring renewing lease */ + VRSF_REQ_PAUSE = 0x4, /* stop running, goto INIT state */ + VRSF_RENEW = 0x8, /* ring renewing lease */ }; typedef struct viona_vring { @@ -232,11 +233,6 @@ struct virtio_net_hdr { }; #pragma pack() -#define VRING_NEED_BAIL(ring, proc) \ - (((ring)->vr_state_flags & VRSF_REQ_STOP) != 0 || \ - ((proc)->p_flag & SEXITING) != 0) - - #define VNETHOOK_INTERESTED_IN(neti) \ (neti)->vni_nethook.vnh_event_in.he_interested #define VNETHOOK_INTERESTED_OUT(neti) \ @@ -288,12 +284,23 @@ struct virtio_net_hdr { #define VIRTIO_F_RING_INDIRECT_DESC (1 << 28) #define VIRTIO_F_RING_EVENT_IDX (1 << 29) +struct viona_ring_params { + uint64_t vrp_pa; + uint16_t vrp_size; + uint16_t vrp_avail_idx; + uint16_t vrp_used_idx; +}; void viona_ring_alloc(viona_link_t *, viona_vring_t *); void viona_ring_free(viona_vring_t *); +int viona_ring_get_state(viona_link_t *, uint16_t, struct viona_ring_params *); +int viona_ring_set_state(viona_link_t *, uint16_t, + const struct viona_ring_params *); int viona_ring_reset(viona_vring_t *, boolean_t); -int viona_ring_init(viona_link_t *, uint16_t, uint16_t, uint64_t); +int viona_ring_init(viona_link_t *, uint16_t, const struct viona_ring_params *); boolean_t viona_ring_lease_renew(viona_vring_t *); +bool vring_need_bail(const viona_vring_t *); +int viona_ring_pause(viona_vring_t *); int vq_popchain(viona_vring_t *, struct iovec *, uint_t, uint16_t *, vmm_page_t **); diff --git a/usr/src/uts/intel/io/viona/viona_main.c b/usr/src/uts/intel/io/viona/viona_main.c index a34196ba1a..c6a750d532 100644 --- a/usr/src/uts/intel/io/viona/viona_main.c +++ b/usr/src/uts/intel/io/viona/viona_main.c @@ -35,7 +35,7 @@ * * Copyright 2015 Pluribus Networks Inc. * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ /* @@ -111,6 +111,14 @@ * | VRS_RESET and exit. * | ^ * | | + * |<-------------------------------------------<+ + * | | | + * | | ^ + * | * If ring is requested to pause (but not stop)from the + * | VRS_RUN state, it will return to the VRS_INIT state. + * | + * | ^ + * | | * | ^ * V * +-----------+ The worker thread associated with the ring has started @@ -286,8 +294,11 @@ static int viona_ioc_delete(viona_soft_state_t *, boolean_t); static int viona_ioc_set_notify_ioport(viona_link_t *, uint16_t); static int viona_ioc_ring_init(viona_link_t *, void *, int); +static int viona_ioc_ring_set_state(viona_link_t *, void *, int); +static int viona_ioc_ring_get_state(viona_link_t *, void *, int); static int viona_ioc_ring_reset(viona_link_t *, uint_t); static int viona_ioc_ring_kick(viona_link_t *, uint_t); +static int viona_ioc_ring_pause(viona_link_t *, uint_t); static int viona_ioc_ring_set_msi(viona_link_t *, void *, int); static int viona_ioc_ring_intr_clear(viona_link_t *, uint_t); static int viona_ioc_intr_poll(viona_link_t *, void *, int, int *); @@ -530,6 +541,9 @@ viona_ioctl(dev_t dev, int cmd, intptr_t data, int md, cred_t *cr, int *rv) return (viona_ioc_create(ss, dptr, md, cr)); case VNA_IOC_DELETE: return (viona_ioc_delete(ss, B_FALSE)); + case VNA_IOC_VERSION: + *rv = VIONA_CURRENT_INTERFACE_VERSION; + return (0); default: break; } @@ -578,6 +592,16 @@ viona_ioctl(dev_t dev, int cmd, intptr_t data, int md, cred_t *cr, int *rv) case VNA_IOC_RING_INTR_CLR: err = viona_ioc_ring_intr_clear(link, (uint_t)data); break; + case VNA_IOC_RING_SET_STATE: + err = viona_ioc_ring_set_state(link, dptr, md); + break; + case VNA_IOC_RING_GET_STATE: + err = viona_ioc_ring_get_state(link, dptr, md); + break; + case VNA_IOC_RING_PAUSE: + err = viona_ioc_ring_pause(link, (uint_t)data); + break; + case VNA_IOC_INTR_POLL: err = viona_ioc_intr_poll(link, dptr, md, rv); break; @@ -853,13 +877,65 @@ viona_ioc_ring_init(viona_link_t *link, void *udata, int md) if (ddi_copyin(udata, &kri, sizeof (kri), md) != 0) { return (EFAULT); } + const struct viona_ring_params params = { + .vrp_pa = kri.ri_qaddr, + .vrp_size = kri.ri_qsize, + .vrp_avail_idx = 0, + .vrp_used_idx = 0, + }; + + err = viona_ring_init(link, kri.ri_index, ¶ms); + + return (err); +} + +static int +viona_ioc_ring_set_state(viona_link_t *link, void *udata, int md) +{ + vioc_ring_state_t krs; + int err; + + if (ddi_copyin(udata, &krs, sizeof (krs), md) != 0) { + return (EFAULT); + } + const struct viona_ring_params params = { + .vrp_pa = krs.vrs_qaddr, + .vrp_size = krs.vrs_qsize, + .vrp_avail_idx = krs.vrs_avail_idx, + .vrp_used_idx = krs.vrs_used_idx, + }; - err = viona_ring_init(link, kri.ri_index, kri.ri_qsize, kri.ri_qaddr); + err = viona_ring_init(link, krs.vrs_index, ¶ms); return (err); } static int +viona_ioc_ring_get_state(viona_link_t *link, void *udata, int md) +{ + vioc_ring_state_t krs; + + if (ddi_copyin(udata, &krs, sizeof (krs), md) != 0) { + return (EFAULT); + } + + struct viona_ring_params params; + int err = viona_ring_get_state(link, krs.vrs_index, ¶ms); + if (err != 0) { + return (err); + } + krs.vrs_qsize = params.vrp_size; + krs.vrs_qaddr = params.vrp_pa; + krs.vrs_avail_idx = params.vrp_avail_idx; + krs.vrs_used_idx = params.vrp_used_idx; + + if (ddi_copyout(&krs, udata, sizeof (krs), md) != 0) { + return (EFAULT); + } + return (0); +} + +static int viona_ioc_ring_reset(viona_link_t *link, uint_t idx) { viona_vring_t *ring; @@ -909,6 +985,17 @@ viona_ioc_ring_kick(viona_link_t *link, uint_t idx) } static int +viona_ioc_ring_pause(viona_link_t *link, uint_t idx) +{ + if (idx >= VIONA_VQ_MAX) { + return (EINVAL); + } + + viona_vring_t *ring = &link->l_vrings[idx]; + return (viona_ring_pause(ring)); +} + +static int viona_ioc_ring_set_msi(viona_link_t *link, void *data, int md) { vioc_ring_msi_t vrm; @@ -935,21 +1022,33 @@ viona_notify_iop(void *arg, bool in, uint16_t port, uint8_t bytes, uint32_t *val) { viona_link_t *link = (viona_link_t *)arg; - uint16_t vq = *val; - if (in) { - /* - * Do not service read (in/ins) requests on this ioport. - * Instead, indicate that the handler is not found, causing a - * fallback to userspace processing. - */ + /* + * If the request is a read (in/ins), or direct at a port other than + * what we expect to be registered on, ignore it. + */ + if (in || port != link->l_notify_ioport) { return (ESRCH); } - if (port != link->l_notify_ioport) { - return (EINVAL); + /* Let userspace handle notifications for rings other than RX/TX. */ + const uint16_t vq = *val; + if (vq >= VIONA_VQ_MAX) { + return (ESRCH); } - return (viona_ioc_ring_kick(link, vq)); + + viona_vring_t *ring = &link->l_vrings[vq]; + int res = 0; + + mutex_enter(&ring->vr_lock); + if (ring->vr_state == VRS_RUN) { + cv_broadcast(&ring->vr_cv); + } else { + res = ESRCH; + } + mutex_exit(&ring->vr_lock); + + return (res); } static int diff --git a/usr/src/uts/intel/io/viona/viona_ring.c b/usr/src/uts/intel/io/viona/viona_ring.c index 2d847dda09..9339f805d4 100644 --- a/usr/src/uts/intel/io/viona/viona_ring.c +++ b/usr/src/uts/intel/io/viona/viona_ring.c @@ -35,7 +35,7 @@ * * Copyright 2015 Pluribus Networks Inc. * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ @@ -281,15 +281,19 @@ viona_ring_free(viona_vring_t *ring) } int -viona_ring_init(viona_link_t *link, uint16_t idx, uint16_t qsz, uint64_t pa) +viona_ring_init(viona_link_t *link, uint16_t idx, + const struct viona_ring_params *params) { viona_vring_t *ring; kthread_t *t; int err = 0; + const uint16_t qsz = params->vrp_size; + const uint64_t pa = params->vrp_pa; if (idx >= VIONA_VQ_MAX) { return (EINVAL); } + if (qsz == 0 || qsz > VRING_MAX_LEN || (1 << (ffs(qsz) - 1)) != qsz) { return (EINVAL); } @@ -320,8 +324,8 @@ viona_ring_init(viona_link_t *link, uint16_t idx, uint16_t qsz, uint64_t pa) } /* Initialize queue indexes */ - ring->vr_cur_aidx = 0; - ring->vr_cur_uidx = 0; + ring->vr_cur_aidx = params->vrp_avail_idx; + ring->vr_cur_uidx = params->vrp_used_idx; if (idx == VIONA_VQ_TX) { viona_tx_ring_alloc(ring, qsz); @@ -351,11 +355,48 @@ fail: ring->vr_size = 0; ring->vr_mask = 0; ring->vr_pa = 0; + ring->vr_cur_aidx = 0; + ring->vr_cur_uidx = 0; mutex_exit(&ring->vr_lock); return (err); } int +viona_ring_get_state(viona_link_t *link, uint16_t idx, + struct viona_ring_params *params) +{ + viona_vring_t *ring; + + if (idx >= VIONA_VQ_MAX) { + return (EINVAL); + } + + ring = &link->l_vrings[idx]; + mutex_enter(&ring->vr_lock); + + params->vrp_size = ring->vr_size; + params->vrp_pa = ring->vr_pa; + + if (ring->vr_state == VRS_RUN) { + /* On a running ring, we must heed the avail/used locks */ + mutex_enter(&ring->vr_a_mutex); + params->vrp_avail_idx = ring->vr_cur_aidx; + mutex_exit(&ring->vr_a_mutex); + mutex_enter(&ring->vr_u_mutex); + params->vrp_used_idx = ring->vr_cur_uidx; + mutex_exit(&ring->vr_u_mutex); + } else { + /* Otherwise vr_lock is adequate protection */ + params->vrp_avail_idx = ring->vr_cur_aidx; + params->vrp_used_idx = ring->vr_cur_uidx; + } + + mutex_exit(&ring->vr_lock); + + return (0); +} + +int viona_ring_reset(viona_vring_t *ring, boolean_t heed_signals) { mutex_enter(&ring->vr_lock); @@ -485,40 +526,152 @@ viona_intr_ring(viona_vring_t *ring, boolean_t skip_flags_check) } } +static inline bool +vring_stop_req(const viona_vring_t *ring) +{ + return ((ring->vr_state_flags & VRSF_REQ_STOP) != 0); +} + +static inline bool +vring_pause_req(const viona_vring_t *ring) +{ + return ((ring->vr_state_flags & VRSF_REQ_PAUSE) != 0); +} + +static inline bool +vring_start_req(const viona_vring_t *ring) +{ + return ((ring->vr_state_flags & VRSF_REQ_START) != 0); +} + +/* + * Check if vring worker thread should bail out. This will heed indications + * that the containing process is exiting, as well as requests to stop or pause + * the ring. The `stop_only` parameter controls if pause requests are ignored + * (true) or checked (false). + * + * Caller should hold vr_lock. + */ +static bool +vring_need_bail_ext(const viona_vring_t *ring, bool stop_only) +{ + ASSERT(MUTEX_HELD(&ring->vr_lock)); + + if (vring_stop_req(ring) || + (!stop_only && vring_pause_req(ring))) { + return (true); + } + + kthread_t *t = ring->vr_worker_thread; + if (t != NULL) { + proc_t *p = ttoproc(t); + + ASSERT(p != NULL); + if ((p->p_flag & SEXITING) != 0) { + return (true); + } + } + return (false); +} + +bool +vring_need_bail(const viona_vring_t *ring) +{ + return (vring_need_bail_ext(ring, false)); +} + +int +viona_ring_pause(viona_vring_t *ring) +{ + mutex_enter(&ring->vr_lock); + switch (ring->vr_state) { + case VRS_RESET: + case VRS_SETUP: + case VRS_INIT: + /* + * For rings which have not yet started (even those in the + * VRS_SETUP and VRS_INIT phases, where there a running worker + * thread (waiting to be released to do its intended task), it + * is adequate to simply clear any start request, to keep them + * from proceeding into the actual work processing function. + */ + ring->vr_state_flags &= ~VRSF_REQ_START; + mutex_exit(&ring->vr_lock); + return (0); + + case VRS_STOP: + if ((ring->vr_state_flags & VRSF_REQ_STOP) != 0) { + /* A ring on its way to RESET cannot be paused. */ + mutex_exit(&ring->vr_lock); + return (EBUSY); + } + /* FALLTHROUGH */ + case VRS_RUN: + ring->vr_state_flags |= VRSF_REQ_PAUSE; + cv_broadcast(&ring->vr_cv); + break; + + default: + panic("invalid ring state %d", ring->vr_state); + break; + } + + for (;;) { + int res = cv_wait_sig(&ring->vr_cv, &ring->vr_lock); + + if (ring->vr_state == VRS_INIT || + (ring->vr_state_flags & VRSF_REQ_PAUSE) == 0) { + /* Ring made it to (or through) paused state */ + mutex_exit(&ring->vr_lock); + return (0); + } + if (res == 0) { + /* interrupted by signal */ + mutex_exit(&ring->vr_lock); + return (EINTR); + } + } + /* NOTREACHED */ +} + static void viona_worker(void *arg) { viona_vring_t *ring = (viona_vring_t *)arg; viona_link_t *link = ring->vr_link; - proc_t *p = ttoproc(curthread); mutex_enter(&ring->vr_lock); VERIFY3U(ring->vr_state, ==, VRS_SETUP); /* Bail immediately if ring shutdown or process exit was requested */ - if (VRING_NEED_BAIL(ring, p)) { - goto cleanup; + if (vring_need_bail_ext(ring, true)) { + goto ring_reset; } /* Report worker thread as alive and notify creator */ +ring_init: ring->vr_state = VRS_INIT; cv_broadcast(&ring->vr_cv); - while (ring->vr_state_flags == 0) { + while (!vring_start_req(ring)) { /* * Keeping lease renewals timely while waiting for the ring to * be started is important for avoiding deadlocks. */ if (vmm_drv_lease_expired(ring->vr_lease)) { if (!viona_ring_lease_renew(ring)) { - goto cleanup; + goto ring_reset; } } (void) cv_wait_sig(&ring->vr_cv, &ring->vr_lock); - if (VRING_NEED_BAIL(ring, p)) { - goto cleanup; + if (vring_pause_req(ring)) { + /* We are already paused in the INIT state. */ + ring->vr_state_flags &= ~VRSF_REQ_PAUSE; + } + if (vring_need_bail_ext(ring, true)) { + goto ring_reset; } } @@ -529,7 +682,7 @@ viona_worker(void *arg) /* Ensure ring lease is valid first */ if (vmm_drv_lease_expired(ring->vr_lease)) { if (!viona_ring_lease_renew(ring)) { - goto cleanup; + goto ring_reset; } } @@ -543,15 +696,18 @@ viona_worker(void *arg) } VERIFY3U(ring->vr_state, ==, VRS_STOP); + VERIFY3U(ring->vr_xfer_outstanding, ==, 0); -cleanup: - if (ring->vr_txdesb != NULL) { - /* - * Transmit activity must be entirely concluded before the - * associated descriptors can be cleaned up. - */ - VERIFY(ring->vr_xfer_outstanding == 0); + /* Respond to a pause request if the ring is not required to stop */ + if (vring_pause_req(ring)) { + ring->vr_state_flags &= ~VRSF_REQ_PAUSE; + + if (!vring_need_bail_ext(ring, true)) { + goto ring_init; + } } + +ring_reset: viona_ring_misc_free(ring); viona_ring_lease_drop(ring); diff --git a/usr/src/uts/intel/io/viona/viona_rx.c b/usr/src/uts/intel/io/viona/viona_rx.c index 2fbf6be972..e6f6f421d5 100644 --- a/usr/src/uts/intel/io/viona/viona_rx.c +++ b/usr/src/uts/intel/io/viona/viona_rx.c @@ -35,7 +35,7 @@ * * Copyright 2015 Pluribus Networks Inc. * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ #include <sys/types.h> @@ -86,8 +86,6 @@ viona_rx_fini(void) void viona_worker_rx(viona_vring_t *ring, viona_link_t *link) { - proc_t *p = ttoproc(curthread); - (void) thread_vsetname(curthread, "viona_rx_%p", ring); ASSERT(MUTEX_HELD(&ring->vr_lock)); @@ -122,7 +120,7 @@ viona_worker_rx(viona_vring_t *ring, viona_link_t *link) * place to inject frames into the guest. */ (void) cv_wait_sig(&ring->vr_cv, &ring->vr_lock); - } while (!VRING_NEED_BAIL(ring, p)); + } while (!vring_need_bail(ring)); ring->vr_state = VRS_STOP; diff --git a/usr/src/uts/intel/io/viona/viona_tx.c b/usr/src/uts/intel/io/viona/viona_tx.c index 277ee521a1..306c6f308e 100644 --- a/usr/src/uts/intel/io/viona/viona_tx.c +++ b/usr/src/uts/intel/io/viona/viona_tx.c @@ -35,7 +35,7 @@ * * Copyright 2015 Pluribus Networks Inc. * Copyright 2019 Joyent, Inc. - * Copyright 2021 Oxide Computer Company + * Copyright 2022 Oxide Computer Company */ @@ -179,11 +179,11 @@ viona_tx_done(viona_vring_t *ring, uint32_t len, uint16_t cookie) viona_intr_ring(ring, B_FALSE); } +#define TX_BURST_THRESH 32 + void viona_worker_tx(viona_vring_t *ring, viona_link_t *link) { - proc_t *p = ttoproc(curthread); - (void) thread_vsetname(curthread, "viona_tx_%p", ring); ASSERT(MUTEX_HELD(&ring->vr_lock)); @@ -192,23 +192,30 @@ viona_worker_tx(viona_vring_t *ring, viona_link_t *link) mutex_exit(&ring->vr_lock); for (;;) { - boolean_t bail = B_FALSE; - boolean_t renew = B_FALSE; - uint_t ntx = 0; + uint_t ntx = 0, burst = 0; viona_ring_disable_notify(ring); - while (viona_ring_num_avail(ring)) { + while (viona_ring_num_avail(ring) != 0) { viona_tx(link, ring); + ntx++; + burst++; /* * It is advantageous for throughput to keep this * transmission loop tight, but periodic breaks to * check for other events are of value too. */ - if (ntx++ >= ring->vr_size) - break; + if (burst >= TX_BURST_THRESH) { + mutex_enter(&ring->vr_lock); + const bool need_bail = vring_need_bail(ring); + mutex_exit(&ring->vr_lock); + + if (need_bail) { + break; + } + burst = 0; + } } - viona_ring_enable_notify(ring); VIONA_PROBE2(tx, viona_link_t *, link, uint_t, ntx); @@ -219,14 +226,11 @@ viona_worker_tx(viona_vring_t *ring, viona_link_t *link) * The barrier ensures that visibility of the no-notify * store does not cross the viona_ring_num_avail() check below. */ + viona_ring_enable_notify(ring); membar_enter(); - bail = VRING_NEED_BAIL(ring, p); - renew = vmm_drv_lease_expired(ring->vr_lease); - if (!bail && !renew && viona_ring_num_avail(ring)) { - continue; - } - if ((link->l_features & VIRTIO_F_RING_NOTIFY_ON_EMPTY) != 0) { + if (viona_ring_num_avail(ring) == 0 && + (link->l_features & VIRTIO_F_RING_NOTIFY_ON_EMPTY) != 0) { /* * The NOTIFY_ON_EMPTY interrupt should not pay heed to * the presence of AVAIL_NO_INTERRUPT. @@ -235,36 +239,43 @@ viona_worker_tx(viona_vring_t *ring, viona_link_t *link) } mutex_enter(&ring->vr_lock); + for (;;) { + if (vring_need_bail(ring)) { + ring->vr_state = VRS_STOP; + viona_tx_wait_outstanding(ring); + return; + } - while (!bail && !renew && !viona_ring_num_avail(ring)) { - (void) cv_wait_sig(&ring->vr_cv, &ring->vr_lock); - bail = VRING_NEED_BAIL(ring, p); - renew = vmm_drv_lease_expired(ring->vr_lease); - } - - if (bail) { - break; - } else if (renew) { - ring->vr_state_flags |= VRSF_RENEW; - /* - * When renewing the lease for the ring, no TX - * frames may be outstanding, as they contain - * references to guest memory. - */ - viona_tx_wait_outstanding(ring); + if (vmm_drv_lease_expired(ring->vr_lease)) { + ring->vr_state_flags |= VRSF_RENEW; + /* + * When renewing the lease for the ring, no TX + * frames may be outstanding, as they contain + * references to guest memory. + */ + viona_tx_wait_outstanding(ring); + + const boolean_t renewed = + viona_ring_lease_renew(ring); + ring->vr_state_flags &= ~VRSF_RENEW; + + if (!renewed) { + /* stop ring on failed renewal */ + ring->vr_state = VRS_STOP; + return; + } + } - if (!viona_ring_lease_renew(ring)) { + if (viona_ring_num_avail(ring) != 0) { break; } - ring->vr_state_flags &= ~VRSF_RENEW; + + /* Wait for further activity on the ring */ + (void) cv_wait_sig(&ring->vr_cv, &ring->vr_lock); } mutex_exit(&ring->vr_lock); } - - ASSERT(MUTEX_HELD(&ring->vr_lock)); - - ring->vr_state = VRS_STOP; - viona_tx_wait_outstanding(ring); + /* UNREACHABLE */ } static void diff --git a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c index ee07779b21..da83735e43 100644 --- a/usr/src/uts/intel/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/intel/io/vmm/vmm_sol_dev.c @@ -2239,6 +2239,24 @@ vmm_drv_purge(vmm_softc_t *sc) hold = list_next(&sc->vmm_holds, hold)) { hold->vmh_release_req = B_TRUE; } + + /* + * Require that all leases on the instance be broken, now that + * all associated holds have been marked as needing release. + * + * Dropping vmm_mtx is not strictly necessary, but if any of the + * lessees are slow to respond, it would be nice to leave it + * available for other parties. + */ + mutex_exit(&vmm_mtx); + vmm_lease_block(sc); + vmm_lease_unblock(sc); + mutex_enter(&vmm_mtx); + + /* + * With all of the leases broken, we can proceed in an orderly + * fashion to waiting for any lingering holds to be dropped. + */ while ((sc->vmm_flags & VMM_HELD) != 0) { if (cv_wait_sig(&sc->vmm_cv, &vmm_mtx) <= 0) { return (EINTR); diff --git a/usr/src/uts/intel/sys/viona_io.h b/usr/src/uts/intel/sys/viona_io.h index 46cc72eb06..761445986c 100644 --- a/usr/src/uts/intel/sys/viona_io.h +++ b/usr/src/uts/intel/sys/viona_io.h @@ -12,6 +12,7 @@ /* * Copyright 2013 Pluribus Networks Inc. * Copyright 2018 Joyent, Inc. + * Copyright 2022 Oxide Computer Company */ #ifndef _VIONA_IO_H_ @@ -20,18 +21,39 @@ #define VNA_IOC (('V' << 16)|('C' << 8)) #define VNA_IOC_CREATE (VNA_IOC | 0x01) #define VNA_IOC_DELETE (VNA_IOC | 0x02) +#define VNA_IOC_VERSION (VNA_IOC | 0x03) #define VNA_IOC_RING_INIT (VNA_IOC | 0x10) #define VNA_IOC_RING_RESET (VNA_IOC | 0x11) #define VNA_IOC_RING_KICK (VNA_IOC | 0x12) #define VNA_IOC_RING_SET_MSI (VNA_IOC | 0x13) #define VNA_IOC_RING_INTR_CLR (VNA_IOC | 0x14) +#define VNA_IOC_RING_SET_STATE (VNA_IOC | 0x15) +#define VNA_IOC_RING_GET_STATE (VNA_IOC | 0x16) +#define VNA_IOC_RING_PAUSE (VNA_IOC | 0x17) #define VNA_IOC_INTR_POLL (VNA_IOC | 0x20) #define VNA_IOC_SET_FEATURES (VNA_IOC | 0x21) #define VNA_IOC_GET_FEATURES (VNA_IOC | 0x22) #define VNA_IOC_SET_NOTIFY_IOP (VNA_IOC | 0x23) + +/* + * Viona Interface Version + * + * Like bhyve, viona exposes Private interfaces which are nonetheless consumed + * by out-of-gate consumers. While those consumers assume all risk of breakage + * incurred by subsequent changes, it would be nice to equip them to potentially + * detect (and handle) those modifications. + * + * There are no established criteria for the magnitude of change which requires + * this version to be incremented, and maintenance of it is considered a + * best-effort activity. Nothing is to be inferred about the magnitude of a + * change when the version is modified. It follows no rules like semver. + * + */ +#define VIONA_CURRENT_INTERFACE_VERSION 1 + typedef struct vioc_create { datalink_id_t c_linkid; int c_vmfd; @@ -43,6 +65,14 @@ typedef struct vioc_ring_init { uint64_t ri_qaddr; } vioc_ring_init_t; +typedef struct vioc_ring_state { + uint16_t vrs_index; + uint16_t vrs_avail_idx; + uint16_t vrs_used_idx; + uint16_t vrs_qsize; + uint64_t vrs_qaddr; +} vioc_ring_state_t; + typedef struct vioc_ring_msi { uint16_t rm_index; uint64_t rm_addr; |