summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2019-06-18 18:32:14 +0000
committerPatrick Mooney <pmooney@pfmooney.com>2019-09-03 13:59:45 +0000
commit45edaa9a0d97fa76474a135173815f7fd1a6eea9 (patch)
tree9690d007d14890736f750f57e2c6d15b6d18eb61
parentd0efab8443fcce33c336a009d253617419db9909 (diff)
downloadillumos-joyent-dev-bhyve-viona-page.tar.gz
XXX-WIP: make viona access memory per-pagedev-bhyve-viona-page
-rw-r--r--usr/src/uts/i86pc/io/viona/viona_impl.h51
-rw-r--r--usr/src/uts/i86pc/io/viona/viona_ring.c519
-rw-r--r--usr/src/uts/i86pc/io/viona/viona_tx.c32
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c25
-rw-r--r--usr/src/uts/i86pc/sys/vmm_drv.h10
5 files changed, 544 insertions, 93 deletions
diff --git a/usr/src/uts/i86pc/io/viona/viona_impl.h b/usr/src/uts/i86pc/io/viona/viona_impl.h
index ee31c4d4ce..af38edcb0b 100644
--- a/usr/src/uts/i86pc/io/viona/viona_impl.h
+++ b/usr/src/uts/i86pc/io/viona/viona_impl.h
@@ -58,6 +58,7 @@
#include <sys/vmm_drv.h>
#include <sys/viona_io.h>
+
struct viona_link;
typedef struct viona_link viona_link_t;
struct viona_desb;
@@ -99,23 +100,33 @@ typedef struct viona_vring {
/* Internal ring-related state */
kmutex_t vr_a_mutex; /* sync consumers of 'avail' */
kmutex_t vr_u_mutex; /* sync consumers of 'used' */
- uint64_t vr_pa;
+ uint64_t vr_gpa;
uint16_t vr_size;
uint16_t vr_mask; /* cached from vr_size */
uint16_t vr_cur_aidx; /* trails behind 'avail_idx' */
+ uint16_t vr_cur_uidx; /* internal 'used_idx' */
/* Host-context pointers to the queue */
- volatile struct virtio_desc *vr_descr;
+ volatile uint16_t *vr_avail_flags;
+ volatile uint16_t *vr_avail_idx;
+ volatile uint16_t *vr_avail_used_event;
+
+ volatile uint16_t *vr_used_flags;
+ volatile uint16_t *vr_used_idx;
+ volatile uint16_t *vr_used_avail_event;
+
+ /* Cached offset addresses to ring elements */
+ uint64_t vr_descr_gpa;
+ vmm_page_hold_t *vr_descr_holds;
+ uint_t vr_descr_pages;
- volatile uint16_t *vr_avail_flags;
- volatile uint16_t *vr_avail_idx;
- volatile uint16_t *vr_avail_ring;
- volatile uint16_t *vr_avail_used_event;
+ uint64_t vr_avail_gpa;
+ vmm_page_hold_t *vr_avail_holds;
+ uint_t vr_avail_pages;
- volatile uint16_t *vr_used_flags;
- volatile uint16_t *vr_used_idx;
- volatile struct virtio_used *vr_used_ring;
- volatile uint16_t *vr_used_avail_event;
+ uint64_t vr_used_gpa;
+ vmm_page_hold_t *vr_used_holds;
+ uint_t vr_used_pages;
/* Per-ring error condition statistics */
struct viona_ring_stats {
@@ -205,6 +216,16 @@ typedef struct viona_soft_state {
list_node_t ss_node;
} viona_soft_state_t;
+
+typedef struct ring_iovec {
+ vmm_page_hold_t riov_hold;
+ uint32_t riov_offset;
+ uint32_t riov_len;
+} ring_iovec_t;
+
+#define RIOV_BASE(iov) ((caddr_t)(iov).riov_hold.vph_kva + (iov).riov_offset)
+#define RIOV_LEN(iov) ((iov).riov_len)
+
#pragma pack(1)
struct virtio_desc {
uint64_t vd_addr;
@@ -270,6 +291,16 @@ struct virtio_net_hdr {
#define VIONA_MAX_HDRS_LEN (sizeof (struct ether_vlan_header) + \
IP_MAX_HDR_LENGTH + TCP_MAX_HDR_LENGTH)
+
+#define VRING_SZ_DESCR(qsz) ((qsz) * sizeof (struct virtio_desc))
+#define VRING_SZ_AVAIL(qsz) ((qsz) * sizeof (uint16_t) + 6)
+#define VRING_SZ_USED(qsz) (((qsz) * sizeof (struct virtio_used)) + 6)
+#define VRING_ALIGN_DESCR (sizeof (struct virtio_desc))
+#define VRING_ALIGN_AVAIL (sizeof (uint16_t))
+#define VRING_ALIGN_USED (sizeof (struct virtio_used))
+
+#define VRING_MAX_SIZE 32768
+
#define VRING_AVAIL_F_NO_INTERRUPT 1
#define VRING_USED_F_NO_NOTIFY 1
diff --git a/usr/src/uts/i86pc/io/viona/viona_ring.c b/usr/src/uts/i86pc/io/viona/viona_ring.c
index e535bfaa1a..31d8878418 100644
--- a/usr/src/uts/i86pc/io/viona/viona_ring.c
+++ b/usr/src/uts/i86pc/io/viona/viona_ring.c
@@ -42,20 +42,22 @@
#include "viona_impl.h"
-#define VRING_ALIGN 4096
+#define VRING_ALIGN_LEGACY 4096
#define VRING_MAX_LEN 32768
-static boolean_t viona_ring_map(viona_vring_t *);
-static void viona_ring_unmap(viona_vring_t *);
-static kthread_t *viona_create_worker(viona_vring_t *);
+#define VRING_SZ_DESCR(qsz) ((qsz) * sizeof (struct virtio_desc))
+#define VRING_SZ_AVAIL(qsz) ((qsz) * sizeof (uint16_t) + 6)
+#define VRING_SZ_USED(qsz) (((qsz) * sizeof (struct virtio_used)) + 6)
+#define VRING_ALIGN_DESCR (sizeof (struct virtio_desc))
+#define VRING_ALIGN_AVAIL (sizeof (uint16_t))
+#define VRING_ALIGN_USED (sizeof (struct virtio_used))
-static void *
-viona_gpa2kva(viona_vring_t *ring, uint64_t gpa, size_t len)
-{
- ASSERT3P(ring->vr_lease, !=, NULL);
+#define VRING_PAGES(addr,sz) \
+ (P2ROUNDUP(P2PHASE((addr), PAGESIZE) + (sz), PAGESIZE)/PAGESIZE)
- return (vmm_drv_gpa2kva(ring->vr_lease, gpa, len));
-}
+static boolean_t viona_ring_map_legacy(viona_vring_t *);
+static void viona_ring_unmap_legacy(viona_vring_t *);
+static kthread_t *viona_create_worker(viona_vring_t *);
static boolean_t
viona_ring_lease_expire_cb(void *arg)
@@ -82,7 +84,7 @@ viona_ring_lease_drop(viona_vring_t *ring)
* Without an active lease, the ring mappings cannot be
* considered valid.
*/
- viona_ring_unmap(ring);
+ viona_ring_unmap_legacy(ring);
vmm_drv_lease_break(hold, ring->vr_lease);
ring->vr_lease = NULL;
@@ -107,12 +109,12 @@ viona_ring_lease_renew(viona_vring_t *ring)
ring);
if (ring->vr_lease != NULL) {
/* A ring undergoing renewal will need valid guest mappings */
- if (ring->vr_pa != 0 && ring->vr_size != 0) {
+ if (ring->vr_gpa != 0 && ring->vr_size != 0) {
/*
* If new mappings cannot be established, consider the
* lease renewal a failure.
*/
- if (!viona_ring_map(ring)) {
+ if (!viona_ring_map_legacy(ring)) {
viona_ring_lease_drop(ring);
return (B_FALSE);
}
@@ -179,8 +181,8 @@ viona_ring_init(viona_link_t *link, uint16_t idx, uint16_t qsz, uint64_t pa)
ring->vr_size = qsz;
ring->vr_mask = (ring->vr_size - 1);
- ring->vr_pa = pa;
- if (!viona_ring_map(ring)) {
+ ring->vr_gpa = pa;
+ if (!viona_ring_map_legacy(ring)) {
err = EINVAL;
goto fail;
}
@@ -250,65 +252,292 @@ viona_ring_reset(viona_vring_t *ring, boolean_t heed_signals)
return (0);
}
+static vmm_page_hold_t *
+vring_map_pages(vmm_lease_t *lease, uint64_t gpa, uint_t pages, int prot)
+{
+ vmm_page_hold_t *holds;
+ uint64_t pos;
+
+ holds = kmem_zalloc(sizeof (vmm_page_hold_t) * pages, KM_SLEEP);
+
+ pos = P2ALIGN(gpa, PAGESIZE);
+ for (uint_t i = 0; i < pages; i++, pos += PAGESIZE) {
+ if (!vmm_drv_gpa_hold(lease, &holds[i], pos, prot)) {
+ if (i != 0) {
+ do {
+ vmm_drv_gpa_rele(lease, &holds[i]);
+ } while (i != 0);
+ }
+ kmem_free(holds, sizeof (vmm_page_hold_t) * pages);
+ return (NULL);
+ }
+ }
+ return (holds);
+}
+
+static inline caddr_t
+vring_addr_at(const vmm_page_hold_t *holds, uint64_t base, uint_t pages,
+ uint64_t addr, uint_t size)
+{
+ const uint64_t offset = addr - base;
+ const uint_t skip = offset / PAGESIZE;
+ const uint_t poffset = P2PHASE(offset, PAGESIZE);
+
+ ASSERT3U(skip, <, pages);
+ ASSERT3U(poffset + size, <=, PAGESIZE);
+
+ return ((caddr_t)holds[skip].vph_kva + poffset);
+}
+
static boolean_t
-viona_ring_map(viona_vring_t *ring)
+viona_ring_map_descr(viona_vring_t *ring)
{
- uint64_t pos = ring->vr_pa;
- const uint16_t qsz = ring->vr_size;
+ const uint64_t gpa = ring->vr_descr_gpa;
+ const uint_t pages = VRING_PAGES(gpa, VRING_SZ_DESCR(ring->vr_size));
+ vmm_page_hold_t *holds;
- ASSERT3U(qsz, !=, 0);
- ASSERT3U(pos, !=, 0);
ASSERT(MUTEX_HELD(&ring->vr_lock));
+ ASSERT0(gpa & VRING_ALIGN_DESCR);
- const size_t desc_sz = qsz * sizeof (struct virtio_desc);
- ring->vr_descr = viona_gpa2kva(ring, pos, desc_sz);
- if (ring->vr_descr == NULL) {
- goto fail;
+ holds = vring_map_pages(ring->vr_lease, gpa, pages, PROT_READ);
+ if (holds == NULL) {
+ return (B_FALSE);
}
- pos += desc_sz;
- const size_t avail_sz = (qsz + 3) * sizeof (uint16_t);
- ring->vr_avail_flags = viona_gpa2kva(ring, pos, avail_sz);
- if (ring->vr_avail_flags == NULL) {
- goto fail;
+ ring->vr_descr_pages = pages;
+ ring->vr_descr_holds = holds;
+
+ return (B_TRUE);
+}
+
+static boolean_t
+viona_ring_map_avail(viona_vring_t *ring)
+{
+ const uint64_t gpa = ring->vr_avail_gpa;
+ const uint_t pages = VRING_PAGES(gpa, VRING_SZ_AVAIL(ring->vr_size));
+ const uint64_t base = P2ALIGN(gpa, PAGESIZE);
+ vmm_page_hold_t *holds;
+
+ ASSERT(MUTEX_HELD(&ring->vr_lock));
+ ASSERT0(gpa & VRING_ALIGN_AVAIL);
+
+ holds = vring_map_pages(ring->vr_lease, gpa, pages, PROT_READ);
+ if (holds == NULL) {
+ return (B_FALSE);
}
- ring->vr_avail_idx = ring->vr_avail_flags + 1;
- ring->vr_avail_ring = ring->vr_avail_flags + 2;
- ring->vr_avail_used_event = ring->vr_avail_ring + qsz;
- pos += avail_sz;
- const size_t used_sz = (qsz * sizeof (struct virtio_used)) +
- (sizeof (uint16_t) * 3);
- pos = P2ROUNDUP(pos, VRING_ALIGN);
- ring->vr_used_flags = viona_gpa2kva(ring, pos, used_sz);
- if (ring->vr_used_flags == NULL) {
- goto fail;
+ ring->vr_avail_gpa = gpa;
+ ring->vr_avail_holds = holds;
+ ring->vr_avail_pages = pages;
+
+ ring->vr_avail_flags = (volatile uint16_t *)vring_addr_at(holds, base,
+ pages, gpa, 2);
+ ring->vr_avail_idx = (volatile uint16_t *)vring_addr_at(holds, base,
+ pages, gpa + 2, 2);
+ ring->vr_avail_used_event = (volatile uint16_t *)vring_addr_at(holds,
+ base, pages, gpa + 4 + (ring->vr_size * 2),
+ sizeof (uint16_t));
+
+ return (B_TRUE);
+}
+
+static boolean_t
+viona_ring_map_used(viona_vring_t *ring)
+{
+ const uint64_t gpa = ring->vr_used_gpa;
+ const uint_t pages = VRING_PAGES(gpa, VRING_SZ_USED(ring->vr_size));
+ const uint64_t base = P2ALIGN(gpa, PAGESIZE);
+ vmm_page_hold_t *holds;
+
+ ASSERT(MUTEX_HELD(&ring->vr_lock));
+ ASSERT0(gpa & VRING_ALIGN_USED);
+
+ holds = vring_map_pages(ring->vr_lease, gpa, pages, PROT_WRITE);
+ if (holds == NULL) {
+ return (B_FALSE);
}
- ring->vr_used_idx = ring->vr_used_flags + 1;
- ring->vr_used_ring = (struct virtio_used *)(ring->vr_used_flags + 2);
- ring->vr_used_avail_event = (uint16_t *)(ring->vr_used_ring + qsz);
+
+ ring->vr_used_gpa = gpa;
+ ring->vr_used_holds = holds;
+ ring->vr_used_pages = pages;
+
+ ring->vr_used_flags = (volatile uint16_t *)vring_addr_at(holds, base,
+ pages, gpa, 2);
+ ring->vr_used_idx = (volatile uint16_t *)vring_addr_at(holds, base,
+ pages, gpa + 2, 2);
+ ring->vr_used_avail_event = (volatile uint16_t *)vring_addr_at(holds,
+ base, pages, gpa + 4 + (ring->vr_size * 8), 2);
return (B_TRUE);
+}
-fail:
- viona_ring_unmap(ring);
- return (B_FALSE);
+static void
+viona_ring_unmap_descr(viona_vring_t *ring)
+{
+ const uint_t pages = ring->vr_descr_pages;
+ vmm_page_hold_t *holds = ring->vr_descr_holds;
+
+ ASSERT(MUTEX_HELD(&ring->vr_lock));
+
+ for (uint_t i = 0; i < pages; i++) {
+ vmm_drv_gpa_rele(ring->vr_lease, &holds[i]);
+ }
+
+ ring->vr_descr_pages = 0;
+ ring->vr_descr_holds = NULL;
+ kmem_free(holds, sizeof (vmm_page_hold_t) * pages);
}
static void
-viona_ring_unmap(viona_vring_t *ring)
+viona_ring_unmap_avail(viona_vring_t *ring)
{
+ const uint_t pages = ring->vr_avail_pages;
+ vmm_page_hold_t *holds = ring->vr_avail_holds;
+
ASSERT(MUTEX_HELD(&ring->vr_lock));
- ring->vr_descr = NULL;
+ for (uint_t i = 0; i < pages; i++) {
+ vmm_drv_gpa_rele(ring->vr_lease, &holds[i]);
+ }
+
ring->vr_avail_flags = NULL;
ring->vr_avail_idx = NULL;
- ring->vr_avail_ring = NULL;
ring->vr_avail_used_event = NULL;
+
+ ring->vr_avail_pages = 0;
+ ring->vr_avail_holds = NULL;
+ kmem_free(holds, sizeof (vmm_page_hold_t) * pages);
+}
+
+static void
+viona_ring_unmap_used(viona_vring_t *ring)
+{
+ const uint_t pages = ring->vr_used_pages;
+ vmm_page_hold_t *holds = ring->vr_used_holds;
+
+ ASSERT(MUTEX_HELD(&ring->vr_lock));
+ ASSERT(ring->vr_used_gpa != 0);
+
+ for (uint_t i = 0; i < pages; i++) {
+ vmm_drv_gpa_rele(ring->vr_lease, &holds[i]);
+ }
+
ring->vr_used_flags = NULL;
ring->vr_used_idx = NULL;
- ring->vr_used_ring = NULL;
ring->vr_used_avail_event = NULL;
+
+ ring->vr_used_pages = 0;
+ ring->vr_used_holds = NULL;
+ kmem_free(holds, sizeof (vmm_page_hold_t) * pages);
+}
+
+static boolean_t
+viona_ring_map_legacy(viona_vring_t *ring)
+{
+ const uint16_t qsz = ring->vr_size;
+
+ ASSERT3U(qsz, !=, 0);
+ ASSERT3U(pos, !=, 0);
+ ASSERT(MUTEX_HELD(&ring->vr_lock));
+
+ /* Expecting page alignment for a legacy ring */
+ if ((ring->vr_gpa & PAGEOFFSET) != 0) {
+ return (B_FALSE);
+ }
+
+ ring->vr_descr_gpa = ring->vr_gpa;
+ ring->vr_avail_gpa = ring->vr_descr_gpa + VRING_SZ_DESCR(qsz);
+ ring->vr_used_gpa = P2ALIGN(ring->vr_avail_gpa + VRING_SZ_AVAIL(qsz),
+ PAGESIZE);
+
+ if (!viona_ring_map_descr(ring)) {
+ goto fail;
+ }
+ if (!viona_ring_map_avail(ring)) {
+ viona_ring_unmap_descr(ring);
+ goto fail;
+ }
+ if (!viona_ring_map_used(ring)) {
+ viona_ring_unmap_descr(ring);
+ viona_ring_unmap_avail(ring);
+ goto fail;
+ }
+ return (B_TRUE);
+
+fail:
+ ring->vr_descr_gpa = 0;
+ ring->vr_avail_gpa = 0;
+ ring->vr_used_gpa = 0;
+ return (B_FALSE);
+}
+
+static void
+viona_ring_unmap_legacy(viona_vring_t *ring)
+{
+ ASSERT(MUTEX_HELD(&ring->vr_lock));
+
+ if (ring->vr_descr_gpa != 0) {
+ ASSERT(ring->vr_avail_gpa);
+ ASSERT(ring->vr_used_gpa);
+
+ viona_ring_unmap_descr(ring);
+ viona_ring_unmap_avail(ring);
+ viona_ring_unmap_used(ring);
+ ring->vr_descr_gpa = 0;
+ ring->vr_avail_gpa = 0;
+ ring->vr_used_gpa = 0;
+ }
+}
+
+static inline struct virtio_desc
+vring_read_descr(viona_vring_t *ring, uint_t idx)
+{
+ ASSERT(MUTEX_HELD(&ring->vr_a_mutex));
+ ASSERT(ring->vr_descr_gpa != 0);
+
+ volatile struct virtio_desc *valp = (struct virtio_desc *)
+ vring_addr_at(ring->vr_descr_holds,
+ P2ALIGN(ring->vr_descr_gpa, PAGESIZE),
+ ring->vr_descr_pages,
+ ring->vr_descr_gpa + (idx * sizeof (struct virtio_desc)),
+ sizeof (struct virtio_desc));
+
+ return (*valp);
+}
+
+static inline uint16_t
+vring_read_avail(viona_vring_t *ring, uint_t idx)
+{
+ ASSERT(MUTEX_HELD(&ring->vr_a_mutex));
+ ASSERT(ring->vr_avail_gpa != 0);
+
+ const uint_t midx = idx & ring->vr_mask;
+ volatile uint16_t *valp = (uint16_t *)
+ vring_addr_at(ring->vr_avail_holds,
+ P2ALIGN(ring->vr_avail_gpa, PAGESIZE),
+ ring->vr_avail_pages,
+ ring->vr_avail_gpa + 4 + (midx * 2),
+ 2);
+
+ return (*valp);
+}
+
+static inline void
+vring_write_used(viona_vring_t *ring, uint_t idx, uint16_t id, uint32_t len)
+{
+ ASSERT(MUTEX_HELD(&ring->vr_u_mutex));
+ ASSERT(ring->vr_used_gpa != 0);
+
+ const uint_t midx = idx & ring->vr_mask;
+ volatile struct virtio_used *vu = (struct virtio_used *)
+ vring_addr_at(ring->vr_used_holds,
+ P2ALIGN(ring->vr_used_gpa, PAGESIZE),
+ ring->vr_used_pages,
+ ring->vr_used_gpa + 4 + (midx * 8),
+ 2);
+
+ vu->vu_idx = id;
+ vu->vu_tlen = len;
}
void
@@ -438,8 +667,152 @@ viona_create_worker(viona_vring_t *ring)
return (t);
}
+static uint_t
+vq_popchain_direct(viona_vring_t *ring, const struct virtio_desc *vd,
+ vring_iovec_t *iov, uint_t niov, uint_t i)
+{
+ if (vdir.vd_len == 0) {
+ VIONA_PROBE2(desc_bad_len,
+ viona_vring_t *, ring,
+ uint32_t, vdir.vd_len);
+ VIONA_RING_STAT_INCR(ring, desc_bad_len);
+ goto bail;
+ }
+ uint_t pages = VRING_PAGES(vd->vd_addr, vd->vd_len);
+
+ ASSERT(i < niov);
+
+ if (pages == 1) {
+ uint_t off = P2PHASE(vd->vd_addr, PAGESIZE);
+ uint64_t base = P2ALIGN(vd->vd_addr, PAGESIZE);
+
+ if (!vmm_drv_gpa_hold(ring->vr_lease, &iov[i].riov_hold, base,
+ PROT_READ|PROT_WRITE)) {
+ /* XXX bail-out handline */
+ }
+ iov[i].riov_offset = off;
+ iov[i].riov_len = vd.vd_len;
+ return (i + 1);
+ } else {
+ /*
+ * The guest has provided a descriptor referring to a
+ * guest-physical contiguous mapping. With no guarantee (or
+ * frankly, likelihood) of it being host-physical contiguous,
+ * treat it like multiple descriptors.
+ */
+
+ if ((i + pages) >= niov) {
+ /* bail if there is not adequate room */
+ return (i);
+ }
+ while (vd.vd_len > 0) {
+ uint_t off = P2PHASE(vd->vd_addr, PAGESIZE);
+ uint64_t base = P2ALIGN(vd->vd_addr, PAGESIZE);
+ if (!vmm_drv_gpa_hold(ring->vr_lease, &iov[i].riov_hold,
+ base, PROT_READ|PROT_WRITE)) {
+ /* XXX bail-out handline */
+ }
+ iov[i].riov_offset = off;
+ iov[i].riov_len = PAGESIZE - off;
+ i++;
+ }
+ }
+
+ buf = viona_gpa2kva(ring, vdir.vd_addr, vdir.vd_len);
+ if (buf == NULL) {
+ VIONA_PROBE_BAD_RING_ADDR(ring, vdir.vd_addr);
+ VIONA_RING_STAT_INCR(ring, bad_ring_addr);
+ goto bail;
+ }
+ iov[i].iov_base = buf;
+ iov[i].iov_len = vdir.vd_len;
+ i++;
+}
+
+static uint_t
+vq_popchain_indirect(viona_vring_t *ring, const struct virtio_desc *vd,
+ vring_iovec_t *iov, uint_t niov, uint_t i)
+{
+ const uint_t nindir = vd->vd_len / sizeof (struct virtio_desc);
+
+ if (P2PHASE(vd->vd_len, sizeof (struct virtio_desc)) != 0 ||
+ nindir == 0) {
+ VIONA_PROBE2(indir_bad_len, viona_vring_t *, ring,
+ uint32_t, vd->vd_len);
+ VIONA_RING_STAT_INCR(ring, indir_bad_len);
+ goto bail;
+ }
+
+
+ const uint_t pages = VRING_PAGES(vd->vd_addr, vd->vd_len);
+
+ vindir = viona_gpa2kva(ring, vdir.vd_addr, vdir.vd_len);
+ if (vindir == NULL) {
+ VIONA_PROBE_BAD_RING_ADDR(ring, vdir.vd_addr);
+ VIONA_RING_STAT_INCR(ring, bad_ring_addr);
+ goto bail;
+ }
+ next = 0;
+ for (;;) {
+ struct virtio_desc vp;
+
+ /*
+ * A copy of the indirect descriptor is made
+ * here, rather than simply using a reference
+ * pointer. This prevents malicious or
+ * erroneous guest writes to the descriptor
+ * from fooling the flags/bounds verification
+ * through a race.
+ */
+ vp = vindir[next];
+ if (vp.vd_flags & VRING_DESC_F_INDIRECT) {
+ VIONA_PROBE1(indir_bad_nest,
+ viona_vring_t *, ring);
+ VIONA_RING_STAT_INCR(ring,
+ indir_bad_nest);
+ goto bail;
+ } else if (vp.vd_len == 0) {
+ VIONA_PROBE2(desc_bad_len,
+ viona_vring_t *, ring,
+ uint32_t, vp.vd_len);
+ VIONA_RING_STAT_INCR(ring,
+ desc_bad_len);
+ goto bail;
+ }
+ buf = viona_gpa2kva(ring, vp.vd_addr,
+ vp.vd_len);
+ if (buf == NULL) {
+ VIONA_PROBE_BAD_RING_ADDR(ring,
+ vp.vd_addr);
+ VIONA_RING_STAT_INCR(ring,
+ bad_ring_addr);
+ goto bail;
+ }
+ iov[i].iov_base = buf;
+ iov[i].iov_len = vp.vd_len;
+ i++;
+
+ if ((vp.vd_flags & VRING_DESC_F_NEXT) == 0)
+ break;
+ if (i >= niov) {
+ goto loopy;
+ }
+
+ next = vp.vd_next;
+ if (next >= nindir) {
+ VIONA_PROBE3(indir_bad_next,
+ viona_vring_t *, ring,
+ uint16_t, next,
+ uint_t, nindir);
+ VIONA_RING_STAT_INCR(ring,
+ indir_bad_next);
+ goto bail;
+ }
+ }
+}
+
int
-vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
+vq_popchain(viona_vring_t *ring, ring_iovec_t *iov, uint_t niov,
uint16_t *cookie)
{
uint_t i, ndesc, idx, head, next;
@@ -470,7 +843,7 @@ vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
VIONA_RING_STAT_INCR(ring, ndesc_too_high);
}
- head = ring->vr_avail_ring[idx & ring->vr_mask];
+ head = vring_read_avail(ring, idx);
next = head;
for (i = 0; i < niov; next = vdir.vd_next) {
@@ -481,7 +854,7 @@ vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
goto bail;
}
- vdir = ring->vr_descr[next];
+ vdir = vring_read_descr(ring, next);
if ((vdir.vd_flags & VRING_DESC_F_INDIRECT) == 0) {
if (vdir.vd_len == 0) {
VIONA_PROBE2(desc_bad_len,
@@ -490,16 +863,15 @@ vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
VIONA_RING_STAT_INCR(ring, desc_bad_len);
goto bail;
}
- buf = viona_gpa2kva(ring, vdir.vd_addr, vdir.vd_len);
- if (buf == NULL) {
- VIONA_PROBE_BAD_RING_ADDR(ring, vdir.vd_addr);
- VIONA_RING_STAT_INCR(ring, bad_ring_addr);
- goto bail;
- }
+ vq_popchain_direct(ring, &vdir)
+
iov[i].iov_base = buf;
iov[i].iov_len = vdir.vd_len;
i++;
} else {
+ vq_popchain_indirect(ring, &vdir);
+
+
const uint_t nindir = vdir.vd_len / 16;
volatile struct virtio_desc *vindir;
@@ -593,17 +965,16 @@ bail:
void
vq_pushchain(viona_vring_t *ring, uint32_t len, uint16_t cookie)
{
- volatile struct virtio_used *vu;
uint_t uidx;
mutex_enter(&ring->vr_u_mutex);
- uidx = *ring->vr_used_idx;
- vu = &ring->vr_used_ring[uidx++ & ring->vr_mask];
- vu->vu_idx = cookie;
- vu->vu_tlen = len;
+ uidx = ring->vr_cur_uidx;
+ vring_write_used(ring, uidx, cookie, len);
+ uidx++;
membar_producer();
*ring->vr_used_idx = uidx;
+ ring->vr_cur_uidx = uidx;
mutex_exit(&ring->vr_u_mutex);
}
@@ -611,26 +982,20 @@ vq_pushchain(viona_vring_t *ring, uint32_t len, uint16_t cookie)
void
vq_pushchain_many(viona_vring_t *ring, uint_t num_bufs, used_elem_t *elem)
{
- volatile struct virtio_used *vu;
- uint_t uidx, i;
+ uint_t uidx;
+
+ ASSERT(num_bufs <= ring->vr_size);
mutex_enter(&ring->vr_u_mutex);
- uidx = *ring->vr_used_idx;
- if (num_bufs == 1) {
- vu = &ring->vr_used_ring[uidx++ & ring->vr_mask];
- vu->vu_idx = elem[0].id;
- vu->vu_tlen = elem[0].len;
- } else {
- for (i = 0; i < num_bufs; i++) {
- vu = &ring->vr_used_ring[(uidx + i) & ring->vr_mask];
- vu->vu_idx = elem[i].id;
- vu->vu_tlen = elem[i].len;
- }
- uidx = uidx + num_bufs;
+ uidx = ring->vr_cur_uidx;
+ for (uint_t i = 0; i < num_bufs; i++) {
+ vring_write_used(ring, uidx, elem[i].id, elem[i].len);
+ uidx++;
}
membar_producer();
*ring->vr_used_idx = uidx;
+ ring->vr_cur_uidx = uidx;
mutex_exit(&ring->vr_u_mutex);
}
diff --git a/usr/src/uts/i86pc/io/viona/viona_tx.c b/usr/src/uts/i86pc/io/viona/viona_tx.c
index 843435c67d..805a077269 100644
--- a/usr/src/uts/i86pc/io/viona/viona_tx.c
+++ b/usr/src/uts/i86pc/io/viona/viona_tx.c
@@ -51,6 +51,9 @@
#define BNXE_NIC_DRIVER "bnxe"
+
+#define VIONA_DESB_HOLDS 4
+
/*
* copy tx mbufs from virtio ring to avoid necessitating a wait for packet
* transmission to free resources.
@@ -69,6 +72,7 @@ struct viona_desb {
uint32_t d_len;
uint16_t d_cookie;
uchar_t *d_headers;
+ vmm_page_hold_t d_holds[VIONA_DESB_HOLDS];
};
static void viona_tx(viona_link_t *, viona_vring_t *);
@@ -170,7 +174,7 @@ viona_tx_ring_alloc(viona_vring_t *ring, const uint16_t qsz)
}
/* Allocate ring-sized iovec buffers for TX */
- ring->vr_txiov = kmem_alloc(sizeof (struct iovec) * qsz, KM_SLEEP);
+ ring->vr_txiov = kmem_alloc(sizeof (ring_iovec_t) * qsz, KM_SLEEP);
}
void
@@ -187,7 +191,7 @@ viona_tx_ring_free(viona_vring_t *ring, const uint16_t qsz)
}
if (ring->vr_txiov != NULL) {
- kmem_free(ring->vr_txiov, sizeof (struct iovec) * qsz);
+ kmem_free(ring->vr_txiov, sizeof (ring_iovec_t) * qsz);
ring->vr_txiov = NULL;
}
}
@@ -492,9 +496,25 @@ viona_tx_csum(viona_vring_t *ring, const struct virtio_net_hdr *hdr,
}
static void
+viona_desb_assume_holds(viona_vring_t *ring, viona_desb_t *dp,
+ ring_iovec_t *iov, uint_t niov)
+{
+ ASSERT(MUTEX_HELD(&ring->vr_lock));
+
+ for (uint_t i = 0; i < niov && i < VIONA_DESB_HOLDS; i++) {
+ dp->d_holds[i] = iov->riov_hold;
+ }
+ if (niov <= VIONA_DESB_HOLDS) {
+ return;
+ }
+
+
+}
+
+static void
viona_tx(viona_link_t *link, viona_vring_t *ring)
{
- struct iovec *iov = ring->vr_txiov;
+ ring_iovec_t *iov = ring->vr_txiov;
const uint_t max_segs = ring->vr_size;
uint16_t cookie;
int i, n;
@@ -524,7 +544,7 @@ viona_tx(viona_link_t *link, viona_vring_t *ring)
}
/* Grab the header and ensure it is of adequate length */
- hdr = (const struct virtio_net_hdr *)iov[0].iov_base;
+ hdr = (const struct virtio_net_hdr *)RIOV_BASE(iov[0]);
len = iov[0].iov_len;
if (len < sizeof (struct virtio_net_hdr)) {
goto drop_fail;
@@ -573,7 +593,7 @@ viona_tx(viona_link_t *link, viona_vring_t *ring)
for (i = 1; i < n; i++) {
const uint32_t to_copy = MIN(min_copy, iov[i].iov_len);
- bcopy(iov[i].iov_base, mp_head->b_wptr, to_copy);
+ bcopy(RIOV_BASE(iov[i]), mp_head->b_wptr, to_copy);
mp_head->b_wptr += to_copy;
len += to_copy;
min_copy -= to_copy;
@@ -602,7 +622,7 @@ viona_tx(viona_link_t *link, viona_vring_t *ring)
ASSERT3P(mp_tail, !=, NULL);
for (; i < n; i++) {
- uintptr_t base = (uintptr_t)iov[i].iov_base + base_off;
+ uintptr_t base = (uintptr_t)RIOV_BASE(iov[i]) + base_off;
uint32_t chunk = iov[i].iov_len - base_off;
ASSERT3U(base_off, <, iov[i].iov_len);
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index a5e60d4887..83bc66de90 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -1561,6 +1561,31 @@ vmm_drv_gpa2kva(vmm_lease_t *lease, uintptr_t gpa, size_t sz)
return (vmspace_find_kva(vm_get_vmspace(lease->vml_vm), gpa, sz));
}
+boolean_t
+vmm_drv_gpa_hold(vmm_lease_t *lease, vmm_page_hold_t *pgh, uintptr_t gpa,
+ int prot)
+{
+ ASSERT(lease != NULL);
+
+ /* demand single-page aligned accesses */
+ VERIFY0(gpa & PAGEOFFSET);
+
+ pgh->vph_kva = vm_gpa_hold(lease->vml_vm, -1, gpa, PAGESIZE, prot,
+ &pgh->vph_cookie);
+ return (pgh->vph_kva != NULL);
+}
+
+void
+vmm_drv_gpa_rele(vmm_lease_t *lease, vmm_page_hold_t *pgh)
+{
+ ASSERT(lease != NULL);
+ ASSERT(pgh != NULL);
+
+ vm_gpa_release(pgh->vph_cookie);
+ pgh->vph_kva = NULL;
+ pgh->vph_cookie = NULL;
+}
+
int
vmm_drv_msi(vmm_lease_t *lease, uint64_t addr, uint64_t msg)
{
diff --git a/usr/src/uts/i86pc/sys/vmm_drv.h b/usr/src/uts/i86pc/sys/vmm_drv.h
index 856b75e5cc..5d5fa0efec 100644
--- a/usr/src/uts/i86pc/sys/vmm_drv.h
+++ b/usr/src/uts/i86pc/sys/vmm_drv.h
@@ -26,6 +26,12 @@ typedef struct vmm_hold vmm_hold_t;
struct vmm_lease;
typedef struct vmm_lease vmm_lease_t;
+struct vmm_page_hold {
+ void *vph_cookie;
+ void *vph_kva;
+};
+typedef struct vmm_page_hold vmm_page_hold_t;
+
/*
* Because of tangled headers, these definitions mirror their vmm_[rw]mem_cb_t
* counterparts in vmm.h.
@@ -43,6 +49,10 @@ extern void vmm_drv_lease_break(vmm_hold_t *, vmm_lease_t *);
extern boolean_t vmm_drv_lease_expired(vmm_lease_t *);
extern void *vmm_drv_gpa2kva(vmm_lease_t *, uintptr_t, size_t);
+boolean_t vmm_drv_gpa_hold(vmm_lease_t *, vmm_page_hold_t *, uintptr_t, int);
+void *vmm_drv_gpa_kva(const vmm_page_hold_t *);
+void vmm_drv_gpa_rele(vmm_lease_t *, vmm_page_hold_t *);
+
extern int vmm_drv_msi(vmm_lease_t *, uint64_t, uint64_t);
extern int vmm_drv_ioport_hook(vmm_hold_t *, uint_t, vmm_drv_rmem_cb_t,