summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Mooney <pmooney@pfmooney.com>2021-06-25 21:15:55 +0000
committerPatrick Mooney <pmooney@oxide.computer>2022-03-30 15:00:46 +0000
commitdb9aa506ce275f82ee72f31fc2e6e3c53d1212b7 (patch)
treed045a768ab97598d3d5fcfd33e4d102b71d7b2ab
parent899b7fc7762875c5244567fbc6bb4ccace75d6f7 (diff)
downloadillumos-joyent-db9aa506ce275f82ee72f31fc2e6e3c53d1212b7.tar.gz
13912 viona should track held pages
Reviewed by: Dan Cross <cross@oxidecomputer.com> Reviewed by: Joshua M. Clulow <josh@sysmgr.org> Approved by: Dan McDonald <danmcd@joyent.com>
-rw-r--r--usr/src/uts/i86pc/io/viona/viona_impl.h7
-rw-r--r--usr/src/uts/i86pc/io/viona/viona_ring.c269
-rw-r--r--usr/src/uts/i86pc/io/viona/viona_rx.c22
-rw-r--r--usr/src/uts/i86pc/io/viona/viona_tx.c15
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm.mapfile9
-rw-r--r--usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c60
-rw-r--r--usr/src/uts/i86pc/sys/vmm_drv.h19
7 files changed, 282 insertions, 119 deletions
diff --git a/usr/src/uts/i86pc/io/viona/viona_impl.h b/usr/src/uts/i86pc/io/viona/viona_impl.h
index 4872720f79..760474e78b 100644
--- a/usr/src/uts/i86pc/io/viona/viona_impl.h
+++ b/usr/src/uts/i86pc/io/viona/viona_impl.h
@@ -109,6 +109,7 @@ typedef struct viona_vring {
/* Reference to guest pages holding virtqueue */
void **vr_map_pages;
+ vmm_page_t *vr_map_hold;
/* Per-ring error condition statistics */
struct viona_ring_stats {
@@ -293,15 +294,19 @@ void viona_ring_free(viona_vring_t *);
int viona_ring_reset(viona_vring_t *, boolean_t);
int viona_ring_init(viona_link_t *, uint16_t, uint16_t, uint64_t);
boolean_t viona_ring_lease_renew(viona_vring_t *);
-int vq_popchain(viona_vring_t *, struct iovec *, uint_t, uint16_t *);
+
+int vq_popchain(viona_vring_t *, struct iovec *, uint_t, uint16_t *,
+ vmm_page_t **);
void vq_pushchain(viona_vring_t *, uint32_t, uint16_t);
void vq_pushchain_many(viona_vring_t *, uint_t, used_elem_t *);
+
void viona_intr_ring(viona_vring_t *ring, boolean_t);
void viona_ring_set_no_notify(viona_vring_t *, boolean_t);
void viona_ring_disable_notify(viona_vring_t *);
void viona_ring_enable_notify(viona_vring_t *);
uint16_t viona_ring_num_avail(viona_vring_t *);
+
void viona_rx_init(void);
void viona_rx_fini(void);
int viona_rx_set(viona_link_t *);
diff --git a/usr/src/uts/i86pc/io/viona/viona_ring.c b/usr/src/uts/i86pc/io/viona/viona_ring.c
index 79094d3dc0..2d847dda09 100644
--- a/usr/src/uts/i86pc/io/viona/viona_ring.c
+++ b/usr/src/uts/i86pc/io/viona/viona_ring.c
@@ -81,17 +81,109 @@
P2ROUNDUP(LEGACY_USED_SZ(qsz), LEGACY_VQ_ALIGN))
#define LEGACY_VQ_PAGES(qsz) (LEGACY_VQ_SIZE(qsz) / PAGESIZE)
+struct vq_held_region {
+ struct iovec *vhr_iov;
+ vmm_page_t *vhr_head;
+ vmm_page_t *vhr_tail;
+ /* Length of iovec array supplied in `vhr_iov` */
+ uint_t vhr_niov;
+ /*
+ * Index into vhr_iov, indicating the next "free" entry (following the
+ * last entry which has valid contents).
+ */
+ uint_t vhr_idx;
+};
+typedef struct vq_held_region vq_held_region_t;
+
static boolean_t viona_ring_map(viona_vring_t *);
static void viona_ring_unmap(viona_vring_t *);
static kthread_t *viona_create_worker(viona_vring_t *);
-static void *
-viona_hold_page(viona_vring_t *ring, uint64_t gpa)
+static vmm_page_t *
+vq_page_hold(viona_vring_t *ring, uint64_t gpa, bool writable)
{
ASSERT3P(ring->vr_lease, !=, NULL);
- ASSERT3U(gpa & PAGEOFFSET, ==, 0);
- return (vmm_drv_gpa2kva(ring->vr_lease, gpa, PAGESIZE));
+ int prot = PROT_READ;
+ if (writable) {
+ prot |= PROT_WRITE;
+ }
+
+ return (vmm_drv_page_hold(ring->vr_lease, gpa, prot));
+}
+
+/*
+ * Establish a hold on the page(s) which back the region of guest memory covered
+ * by [gpa, gpa + len). The host-kernel-virtual pointers to those pages are
+ * stored in the iovec array supplied in `region`, along with the chain of
+ * vmm_page_t entries representing the held pages. Since guest memory
+ * carries no guarantees of being physically contiguous (on the host), it is
+ * assumed that an iovec entry will be required for each PAGESIZE section
+ * covered by the specified `gpa` and `len` range. For each iovec entry
+ * successfully populated by holding a page, `vhr_idx` will be incremented so it
+ * references the next available iovec entry (or `vhr_niov`, if the iovec array
+ * is full). The responsibility for releasing the `vmm_page_t` chain (stored in
+ * `vhr_head` and `vhr_tail`) resides with the caller, regardless of the result.
+ */
+static int
+vq_region_hold(viona_vring_t *ring, uint64_t gpa, uint32_t len,
+ bool writable, vq_held_region_t *region)
+{
+ const uint32_t front_offset = gpa & PAGEOFFSET;
+ const uint32_t front_len = MIN(len, PAGESIZE - front_offset);
+ uint_t pages = 1;
+ vmm_page_t *vmp;
+ caddr_t buf;
+
+ ASSERT3U(region->vhr_idx, <, region->vhr_niov);
+
+ if (front_len < len) {
+ pages += P2ROUNDUP((uint64_t)(len - front_len),
+ PAGESIZE) / PAGESIZE;
+ }
+ if (pages > (region->vhr_niov - region->vhr_idx)) {
+ return (E2BIG);
+ }
+
+ vmp = vq_page_hold(ring, gpa & PAGEMASK, writable);
+ if (vmp == NULL) {
+ return (EFAULT);
+ }
+ buf = (caddr_t)vmm_drv_page_readable(vmp);
+
+ region->vhr_iov[region->vhr_idx].iov_base = buf + front_offset;
+ region->vhr_iov[region->vhr_idx].iov_len = front_len;
+ region->vhr_idx++;
+ gpa += front_len;
+ len -= front_len;
+ if (region->vhr_head == NULL) {
+ region->vhr_head = vmp;
+ region->vhr_tail = vmp;
+ } else {
+ vmm_drv_page_chain(region->vhr_tail, vmp);
+ region->vhr_tail = vmp;
+ }
+
+ for (uint_t i = 1; i < pages; i++) {
+ ASSERT3U(gpa & PAGEOFFSET, ==, 0);
+
+ vmp = vq_page_hold(ring, gpa, writable);
+ if (vmp == NULL) {
+ return (EFAULT);
+ }
+ buf = (caddr_t)vmm_drv_page_readable(vmp);
+
+ const uint32_t chunk_len = MIN(len, PAGESIZE);
+ region->vhr_iov[region->vhr_idx].iov_base = buf;
+ region->vhr_iov[region->vhr_idx].iov_len = chunk_len;
+ region->vhr_idx++;
+ gpa += chunk_len;
+ len -= chunk_len;
+ vmm_drv_page_chain(region->vhr_tail, vmp);
+ region->vhr_tail = vmp;
+ }
+
+ return (0);
}
static boolean_t
@@ -310,14 +402,28 @@ viona_ring_map(viona_vring_t *ring)
const uint_t npages = LEGACY_VQ_PAGES(qsz);
ring->vr_map_pages = kmem_zalloc(npages * sizeof (void *), KM_SLEEP);
+ vmm_page_t *prev = NULL;
+
for (uint_t i = 0; i < npages; i++, pa += PAGESIZE) {
- void *page = viona_hold_page(ring, pa);
+ vmm_page_t *vmp;
- if (page == NULL) {
+ vmp = vq_page_hold(ring, pa, true);
+ if (vmp == NULL) {
viona_ring_unmap(ring);
return (B_FALSE);
}
- ring->vr_map_pages[i] = page;
+
+ /*
+ * Keep the first page has the head of the chain, appending all
+ * subsequent pages to the tail.
+ */
+ if (prev == NULL) {
+ ring->vr_map_hold = vmp;
+ } else {
+ vmm_drv_page_chain(prev, vmp);
+ }
+ prev = vmp;
+ ring->vr_map_pages[i] = vmm_drv_page_writable(vmp);
}
return (B_TRUE);
@@ -330,17 +436,14 @@ viona_ring_unmap(viona_vring_t *ring)
void **map = ring->vr_map_pages;
if (map != NULL) {
- /*
- * The bhyve page-hold mechanism does not currently require a
- * corresponding page-release action, given the simplicity of
- * the underlying virtual memory constructs.
- *
- * If/when those systems become more sophisticated, more than a
- * simple free of the page pointers will be required here.
- */
const uint_t npages = LEGACY_VQ_PAGES(ring->vr_size);
kmem_free(map, npages * sizeof (void *));
ring->vr_map_pages = NULL;
+
+ vmm_drv_page_release_chain(ring->vr_map_hold);
+ ring->vr_map_hold = NULL;
+ } else {
+ ASSERT3P(ring->vr_map_hold, ==, NULL);
}
}
@@ -520,14 +623,9 @@ vq_read_avail(viona_vring_t *ring, uint16_t idx)
*/
static int
vq_map_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
- struct iovec *iov, uint_t niov, uint16_t *idxp)
+ vq_held_region_t *region)
{
- uint64_t gpa = desc->vd_addr;
- uint32_t len = desc->vd_len;
- uint16_t lidx = *idxp;
- caddr_t buf;
-
- ASSERT3U(lidx, <, niov);
+ int err;
if (desc->vd_len == 0) {
VIONA_PROBE2(desc_bad_len, viona_vring_t *, ring,
@@ -536,55 +634,22 @@ vq_map_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
return (EINVAL);
}
- const uint32_t front_offset = desc->vd_addr & PAGEOFFSET;
- const uint32_t front_len = MIN(len, PAGESIZE - front_offset);
- uint_t pages = 1;
- if (front_len < len) {
- pages += P2ROUNDUP((uint64_t)(len - front_len),
- PAGESIZE) / PAGESIZE;
- }
-
- if (pages > (niov - lidx)) {
+ err = vq_region_hold(ring, desc->vd_addr, desc->vd_len,
+ (desc->vd_flags & VRING_DESC_F_WRITE) != 0, region);
+ switch (err) {
+ case E2BIG:
VIONA_PROBE1(too_many_desc, viona_vring_t *, ring);
VIONA_RING_STAT_INCR(ring, too_many_desc);
- return (E2BIG);
- }
-
- buf = viona_hold_page(ring, gpa & PAGEMASK);
- if (buf == NULL) {
+ break;
+ case EFAULT:
VIONA_PROBE_BAD_RING_ADDR(ring, desc->vd_addr);
VIONA_RING_STAT_INCR(ring, bad_ring_addr);
- return (EFAULT);
+ break;
+ default:
+ break;
}
- iov[lidx].iov_base = buf + front_offset;
- iov[lidx].iov_len = front_len;
- gpa += front_len;
- len -= front_len;
- lidx++;
-
- for (uint_t i = 1; i < pages; i++) {
- ASSERT3U(gpa & PAGEOFFSET, ==, 0);
-
- buf = viona_hold_page(ring, gpa);
- if (buf == NULL) {
- VIONA_PROBE_BAD_RING_ADDR(ring, desc->vd_addr);
- VIONA_RING_STAT_INCR(ring, bad_ring_addr);
- return (EFAULT);
- }
- const uint32_t region_len = MIN(len, PAGESIZE);
- iov[lidx].iov_base = buf;
- iov[lidx].iov_len = region_len;
- gpa += region_len;
- len -= region_len;
- lidx++;
- }
-
- ASSERT3U(len, ==, 0);
- ASSERT3U(gpa, ==, desc->vd_addr + desc->vd_len);
-
- *idxp = lidx;
- return (0);
+ return (err);
}
/*
@@ -593,7 +658,7 @@ vq_map_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
*/
static int
vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
- struct iovec *iov, uint_t niov, uint16_t *idxp)
+ vq_held_region_t *region)
{
const uint16_t indir_count = desc->vd_len / sizeof (struct virtio_desc);
@@ -607,8 +672,10 @@ vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
}
uint16_t indir_next = 0;
- caddr_t buf = NULL;
+ const uint8_t *buf = NULL;
uint64_t buf_gpa = UINT64_MAX;
+ vmm_page_t *vmp = NULL;
+ int err = 0;
for (;;) {
uint64_t indir_gpa =
@@ -621,13 +688,18 @@ vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
* resides in, if has not already been done.
*/
if (indir_page != buf_gpa) {
- buf = viona_hold_page(ring, indir_page);
- if (buf == NULL) {
- VIONA_PROBE_BAD_RING_ADDR(ring, desc->vd_addr);
+ if (vmp != NULL) {
+ vmm_drv_page_release(vmp);
+ }
+ vmp = vq_page_hold(ring, indir_page, false);
+ if (vmp == NULL) {
+ VIONA_PROBE_BAD_RING_ADDR(ring, indir_page);
VIONA_RING_STAT_INCR(ring, bad_ring_addr);
- return (EFAULT);
+ err = EFAULT;
+ break;
}
buf_gpa = indir_page;
+ buf = vmm_drv_page_readable(vmp);
}
/*
@@ -641,27 +713,30 @@ vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
if (vp.vd_flags & VRING_DESC_F_INDIRECT) {
VIONA_PROBE1(indir_bad_nest, viona_vring_t *, ring);
VIONA_RING_STAT_INCR(ring, indir_bad_nest);
- return (EINVAL);
+ err = EINVAL;
+ break;
} else if (vp.vd_len == 0) {
VIONA_PROBE2(desc_bad_len, viona_vring_t *, ring,
uint32_t, vp.vd_len);
VIONA_RING_STAT_INCR(ring, desc_bad_len);
- return (EINVAL);
+ err = EINVAL;
+ break;
}
- int err = vq_map_desc_bufs(ring, &vp, iov, niov, idxp);
+ err = vq_map_desc_bufs(ring, &vp, region);
if (err != 0) {
- return (err);
+ break;
}
/* Successfully reach the end of the indir chain */
if ((vp.vd_flags & VRING_DESC_F_NEXT) == 0) {
- return (0);
+ break;
}
- if (*idxp >= niov) {
+ if (region->vhr_idx >= region->vhr_niov) {
VIONA_PROBE1(too_many_desc, viona_vring_t *, ring);
VIONA_RING_STAT_INCR(ring, too_many_desc);
- return (E2BIG);
+ err = E2BIG;
+ break;
}
indir_next = vp.vd_next;
@@ -669,23 +744,31 @@ vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
VIONA_PROBE3(indir_bad_next, viona_vring_t *, ring,
uint16_t, indir_next, uint16_t, indir_count);
VIONA_RING_STAT_INCR(ring, indir_bad_next);
- return (EINVAL);
+ err = EINVAL;
+ break;
}
}
- /* NOTREACHED */
- return (-1);
+ if (vmp != NULL) {
+ vmm_drv_page_release(vmp);
+ }
+ return (err);
}
int
vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
- uint16_t *cookie)
+ uint16_t *cookie, vmm_page_t **chain)
{
- uint16_t i, ndesc, idx, head, next;
+ uint16_t ndesc, idx, head, next;
struct virtio_desc vdir;
+ vq_held_region_t region = {
+ .vhr_niov = niov,
+ .vhr_iov = iov,
+ };
ASSERT(iov != NULL);
ASSERT(niov > 0 && niov < INT_MAX);
+ ASSERT(*chain == NULL);
mutex_enter(&ring->vr_a_mutex);
idx = ring->vr_cur_aidx;
@@ -711,7 +794,7 @@ vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
head = vq_read_avail(ring, idx & ring->vr_mask);
next = head;
- for (i = 0; i < niov; next = vdir.vd_next) {
+ for (region.vhr_idx = 0; region.vhr_idx < niov; next = vdir.vd_next) {
if (next >= ring->vr_size) {
VIONA_PROBE2(bad_idx, viona_vring_t *, ring,
uint16_t, next);
@@ -721,7 +804,7 @@ vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
vq_read_desc(ring, next, &vdir);
if ((vdir.vd_flags & VRING_DESC_F_INDIRECT) == 0) {
- if (vq_map_desc_bufs(ring, &vdir, iov, niov, &i) != 0) {
+ if (vq_map_desc_bufs(ring, &vdir, &region) != 0) {
break;
}
} else {
@@ -738,21 +821,29 @@ vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
break;
}
- if (vq_map_indir_desc_bufs(ring, &vdir, iov, niov, &i)
- != 0) {
+ if (vq_map_indir_desc_bufs(ring, &vdir, &region) != 0) {
break;
}
}
if ((vdir.vd_flags & VRING_DESC_F_NEXT) == 0) {
- *cookie = head;
ring->vr_cur_aidx++;
mutex_exit(&ring->vr_a_mutex);
- return (i);
+
+ *cookie = head;
+ *chain = region.vhr_head;
+ return (region.vhr_idx);
}
}
mutex_exit(&ring->vr_a_mutex);
+ if (region.vhr_head != NULL) {
+ /*
+ * If any pages were held prior to encountering an error, we
+ * must release them now.
+ */
+ vmm_drv_page_release_chain(region.vhr_head);
+ }
return (-1);
}
diff --git a/usr/src/uts/i86pc/io/viona/viona_rx.c b/usr/src/uts/i86pc/io/viona/viona_rx.c
index dc3feb10fe..2fbf6be972 100644
--- a/usr/src/uts/i86pc/io/viona/viona_rx.c
+++ b/usr/src/uts/i86pc/io/viona/viona_rx.c
@@ -208,10 +208,11 @@ viona_recv_plain(viona_vring_t *ring, const mblk_t *mp, size_t msz)
caddr_t buf = NULL;
boolean_t end = B_FALSE;
const uint32_t features = ring->vr_link->l_features;
+ vmm_page_t *pages = NULL;
ASSERT(msz >= MIN_BUF_SIZE);
- n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie);
+ n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &pages);
if (n <= 0) {
/* Without available buffers, the frame must be dropped. */
return (ENOSPC);
@@ -279,6 +280,7 @@ viona_recv_plain(viona_vring_t *ring, const mblk_t *mp, size_t msz)
}
/* Release this chain */
+ vmm_drv_page_release_chain(pages);
vq_pushchain(ring, copied, cookie);
return (0);
@@ -287,6 +289,7 @@ bad_frame:
mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, bad_rx_frame);
+ vmm_drv_page_release_chain(pages);
vq_pushchain(ring, MAX(copied, MIN_BUF_SIZE + hdr_sz), cookie);
return (EINVAL);
}
@@ -296,6 +299,7 @@ viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz)
{
struct iovec iov[VTNET_MAXSEGS];
used_elem_t uelem[VTNET_MAXSEGS];
+ vmm_page_t *pages = NULL, *hdr_pages = NULL;
int n, i = 0, buf_idx = 0, err = 0;
uint16_t cookie;
caddr_t buf;
@@ -307,7 +311,7 @@ viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz)
ASSERT(msz >= MIN_BUF_SIZE);
- n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie);
+ n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &hdr_pages);
if (n <= 0) {
/* Without available buffers, the frame must be dropped. */
VIONA_PROBE2(no_space, viona_vring_t *, ring, mblk_t *, mp);
@@ -376,7 +380,12 @@ viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz)
err = EOVERFLOW;
break;
}
- n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie);
+ if (pages != NULL) {
+ vmm_drv_page_release_chain(pages);
+ pages = NULL;
+ }
+ n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie,
+ &pages);
if (n <= 0) {
/*
* Without more immediate space to perform the
@@ -453,6 +462,13 @@ done:
uint16_t, cookie, mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, bad_rx_frame);
}
+
+ if (hdr_pages != NULL) {
+ vmm_drv_page_release_chain(hdr_pages);
+ }
+ if (pages != NULL) {
+ vmm_drv_page_release_chain(pages);
+ }
vq_pushchain_many(ring, buf_idx + 1, uelem);
return (err);
}
diff --git a/usr/src/uts/i86pc/io/viona/viona_tx.c b/usr/src/uts/i86pc/io/viona/viona_tx.c
index f8018692a5..424deee498 100644
--- a/usr/src/uts/i86pc/io/viona/viona_tx.c
+++ b/usr/src/uts/i86pc/io/viona/viona_tx.c
@@ -70,6 +70,7 @@ struct viona_desb {
uint32_t d_len;
uint16_t d_cookie;
uchar_t *d_headers;
+ vmm_page_t *d_pages;
};
static void viona_tx(viona_link_t *, viona_vring_t *);
@@ -287,6 +288,14 @@ viona_desb_release(viona_desb_t *dp)
cookie = dp->d_cookie;
dp->d_len = 0;
dp->d_cookie = 0;
+ vmm_drv_page_release_chain(dp->d_pages);
+ dp->d_pages = NULL;
+
+ /*
+ * Ensure all other changes to the desb are visible prior to zeroing its
+ * refcount, signifying its readiness for reuse.
+ */
+ membar_exit();
dp->d_ref = 0;
viona_tx_done(ring, len, cookie);
@@ -484,12 +493,13 @@ viona_tx(viona_link_t *link, viona_vring_t *ring)
viona_desb_t *dp = NULL;
mac_client_handle_t link_mch = link->l_mch;
const struct virtio_net_hdr *hdr;
+ vmm_page_t *pages = NULL;
mp_head = mp_tail = NULL;
ASSERT(iov != NULL);
- n = vq_popchain(ring, iov, max_segs, &cookie);
+ n = vq_popchain(ring, iov, max_segs, &cookie, &pages);
if (n == 0) {
VIONA_PROBE1(tx_absent, viona_vring_t *, ring);
VIONA_RING_STAT_INCR(ring, tx_absent);
@@ -670,6 +680,7 @@ viona_tx(viona_link_t *link, viona_vring_t *ring)
if (dp != NULL) {
dp->d_len = len;
+ dp->d_pages = pages;
mutex_enter(&ring->vr_lock);
ring->vr_xfer_outstanding++;
mutex_exit(&ring->vr_lock);
@@ -679,6 +690,7 @@ viona_tx(viona_link_t *link, viona_vring_t *ring)
* be marked as 'used' now, rather than deferring that action
* until after successful packet transmission.
*/
+ vmm_drv_page_release_chain(pages);
viona_tx_done(ring, len, cookie);
}
@@ -731,5 +743,6 @@ drop_hook:
VIONA_PROBE3(tx_drop, viona_vring_t *, ring, uint32_t, len,
uint16_t, cookie);
+ vmm_drv_page_release_chain(pages);
viona_tx_done(ring, len, cookie);
}
diff --git a/usr/src/uts/i86pc/io/vmm/vmm.mapfile b/usr/src/uts/i86pc/io/vmm/vmm.mapfile
index 0af4f090b4..fb1c9366de 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm.mapfile
+++ b/usr/src/uts/i86pc/io/vmm/vmm.mapfile
@@ -12,6 +12,7 @@
#
# Copyright 2019 Joyent, Inc.
+# Copyright 2021 Oxide Computer Company
#
#
@@ -44,7 +45,13 @@ SYMBOL_VERSION ILLUMOSprivate {
vmm_drv_lease_sign;
vmm_drv_lease_break;
vmm_drv_lease_expired;
- vmm_drv_gpa2kva;
+ vmm_drv_page_hold;
+ vmm_drv_page_release;
+ vmm_drv_page_release_chain;
+ vmm_drv_page_readable;
+ vmm_drv_page_writable;
+ vmm_drv_page_chain;
+ vmm_drv_page_next;
vmm_drv_ioport_hook;
vmm_drv_ioport_unhook;
vmm_drv_msi;
diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
index 24dd851831..d66778c55a 100644
--- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
@@ -1969,33 +1969,49 @@ vmm_drv_lease_expired(vmm_lease_t *lease)
return (lease->vml_expired);
}
-void *
-vmm_drv_gpa2kva(vmm_lease_t *lease, uintptr_t gpa, size_t sz)
+vmm_page_t *
+vmm_drv_page_hold(vmm_lease_t *lease, uintptr_t gpa, int prot)
{
- vm_page_t *vmp;
- void *res = NULL;
-
ASSERT(lease != NULL);
- ASSERT3U(sz, ==, PAGESIZE);
ASSERT0(gpa & PAGEOFFSET);
- vmp = vmc_hold(lease->vml_vmclient, gpa, PROT_READ | PROT_WRITE);
- /*
- * Break the rules for now and just extract the pointer. This is
- * nominally safe, since holding a driver lease on the VM read-locks it.
- *
- * A pointer which would otherwise be at risk of being a use-after-free
- * vector is made safe since actions such as vmspace_unmap() require
- * acquisition of the VM write-lock, (causing all driver leases to be
- * broken) allowing the consumers to cease their access prior to
- * modification of the vmspace.
- */
- if (vmp != NULL) {
- res = vmp_get_writable(vmp);
- vmp_release(vmp);
- }
+ return ((vmm_page_t *)vmc_hold(lease->vml_vmclient, gpa, prot));
+}
- return (res);
+void
+vmm_drv_page_release(vmm_page_t *vmmp)
+{
+ vmp_release((vm_page_t *)vmmp);
+}
+
+void
+vmm_drv_page_release_chain(vmm_page_t *vmmp)
+{
+ vmp_release_chain((vm_page_t *)vmmp);
+}
+
+const void *
+vmm_drv_page_readable(const vmm_page_t *vmmp)
+{
+ return (vmp_get_readable((const vm_page_t *)vmmp));
+}
+
+void *
+vmm_drv_page_writable(const vmm_page_t *vmmp)
+{
+ return (vmp_get_writable((const vm_page_t *)vmmp));
+}
+
+void
+vmm_drv_page_chain(vmm_page_t *vmmp, vmm_page_t *to_chain)
+{
+ vmp_chain((vm_page_t *)vmmp, (vm_page_t *)to_chain);
+}
+
+vmm_page_t *
+vmm_drv_page_next(const vmm_page_t *vmmp)
+{
+ return ((vmm_page_t *)vmp_next((vm_page_t *)vmmp));
}
int
diff --git a/usr/src/uts/i86pc/sys/vmm_drv.h b/usr/src/uts/i86pc/sys/vmm_drv.h
index 1f2b3d9254..0b7f622e53 100644
--- a/usr/src/uts/i86pc/sys/vmm_drv.h
+++ b/usr/src/uts/i86pc/sys/vmm_drv.h
@@ -12,7 +12,7 @@
/*
* Copyright 2019 Joyent, Inc.
- * Copyright 2020 Oxide Computer Company
+ * Copyright 2021 Oxide Computer Company
*/
#ifndef _VMM_DRV_H_
@@ -30,6 +30,14 @@ struct vmm_lease;
typedef struct vmm_lease vmm_lease_t;
/*
+ * This is effectively a synonym for the bhyve-internal 'struct vm_page' type.
+ * Use of `vmm_page_t *` instead allows us to keep those implementation details
+ * hidden from vmm_drv consumers.
+ */
+struct vmm_page;
+typedef struct vmm_page vmm_page_t;
+
+/*
* Because of tangled headers, this definitions mirrors its ioport_handler_t
* counterpart in vmm_kernel.h.
*/
@@ -44,7 +52,14 @@ extern vmm_lease_t *vmm_drv_lease_sign(vmm_hold_t *, boolean_t (*)(void *),
extern void vmm_drv_lease_break(vmm_hold_t *, vmm_lease_t *);
extern boolean_t vmm_drv_lease_expired(vmm_lease_t *);
-extern void *vmm_drv_gpa2kva(vmm_lease_t *, uintptr_t, size_t);
+extern vmm_page_t *vmm_drv_page_hold(vmm_lease_t *, uintptr_t, int);
+extern void vmm_drv_page_release(vmm_page_t *);
+extern void vmm_drv_page_release_chain(vmm_page_t *);
+extern const void *vmm_drv_page_readable(const vmm_page_t *);
+extern void *vmm_drv_page_writable(const vmm_page_t *);
+extern void vmm_drv_page_chain(vmm_page_t *, vmm_page_t *);
+extern vmm_page_t *vmm_drv_page_next(const vmm_page_t *);
+
extern int vmm_drv_msi(vmm_lease_t *, uint64_t, uint64_t);
extern int vmm_drv_ioport_hook(vmm_hold_t *, uint16_t, vmm_drv_iop_cb_t, void *,