diff options
author | Patrick Mooney <pmooney@pfmooney.com> | 2021-06-25 21:15:55 +0000 |
---|---|---|
committer | Patrick Mooney <pmooney@oxide.computer> | 2022-03-30 15:00:46 +0000 |
commit | db9aa506ce275f82ee72f31fc2e6e3c53d1212b7 (patch) | |
tree | d045a768ab97598d3d5fcfd33e4d102b71d7b2ab | |
parent | 899b7fc7762875c5244567fbc6bb4ccace75d6f7 (diff) | |
download | illumos-joyent-db9aa506ce275f82ee72f31fc2e6e3c53d1212b7.tar.gz |
13912 viona should track held pages
Reviewed by: Dan Cross <cross@oxidecomputer.com>
Reviewed by: Joshua M. Clulow <josh@sysmgr.org>
Approved by: Dan McDonald <danmcd@joyent.com>
-rw-r--r-- | usr/src/uts/i86pc/io/viona/viona_impl.h | 7 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/viona/viona_ring.c | 269 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/viona/viona_rx.c | 22 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/viona/viona_tx.c | 15 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm.mapfile | 9 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c | 60 | ||||
-rw-r--r-- | usr/src/uts/i86pc/sys/vmm_drv.h | 19 |
7 files changed, 282 insertions, 119 deletions
diff --git a/usr/src/uts/i86pc/io/viona/viona_impl.h b/usr/src/uts/i86pc/io/viona/viona_impl.h index 4872720f79..760474e78b 100644 --- a/usr/src/uts/i86pc/io/viona/viona_impl.h +++ b/usr/src/uts/i86pc/io/viona/viona_impl.h @@ -109,6 +109,7 @@ typedef struct viona_vring { /* Reference to guest pages holding virtqueue */ void **vr_map_pages; + vmm_page_t *vr_map_hold; /* Per-ring error condition statistics */ struct viona_ring_stats { @@ -293,15 +294,19 @@ void viona_ring_free(viona_vring_t *); int viona_ring_reset(viona_vring_t *, boolean_t); int viona_ring_init(viona_link_t *, uint16_t, uint16_t, uint64_t); boolean_t viona_ring_lease_renew(viona_vring_t *); -int vq_popchain(viona_vring_t *, struct iovec *, uint_t, uint16_t *); + +int vq_popchain(viona_vring_t *, struct iovec *, uint_t, uint16_t *, + vmm_page_t **); void vq_pushchain(viona_vring_t *, uint32_t, uint16_t); void vq_pushchain_many(viona_vring_t *, uint_t, used_elem_t *); + void viona_intr_ring(viona_vring_t *ring, boolean_t); void viona_ring_set_no_notify(viona_vring_t *, boolean_t); void viona_ring_disable_notify(viona_vring_t *); void viona_ring_enable_notify(viona_vring_t *); uint16_t viona_ring_num_avail(viona_vring_t *); + void viona_rx_init(void); void viona_rx_fini(void); int viona_rx_set(viona_link_t *); diff --git a/usr/src/uts/i86pc/io/viona/viona_ring.c b/usr/src/uts/i86pc/io/viona/viona_ring.c index 79094d3dc0..2d847dda09 100644 --- a/usr/src/uts/i86pc/io/viona/viona_ring.c +++ b/usr/src/uts/i86pc/io/viona/viona_ring.c @@ -81,17 +81,109 @@ P2ROUNDUP(LEGACY_USED_SZ(qsz), LEGACY_VQ_ALIGN)) #define LEGACY_VQ_PAGES(qsz) (LEGACY_VQ_SIZE(qsz) / PAGESIZE) +struct vq_held_region { + struct iovec *vhr_iov; + vmm_page_t *vhr_head; + vmm_page_t *vhr_tail; + /* Length of iovec array supplied in `vhr_iov` */ + uint_t vhr_niov; + /* + * Index into vhr_iov, indicating the next "free" entry (following the + * last entry which has valid contents). + */ + uint_t vhr_idx; +}; +typedef struct vq_held_region vq_held_region_t; + static boolean_t viona_ring_map(viona_vring_t *); static void viona_ring_unmap(viona_vring_t *); static kthread_t *viona_create_worker(viona_vring_t *); -static void * -viona_hold_page(viona_vring_t *ring, uint64_t gpa) +static vmm_page_t * +vq_page_hold(viona_vring_t *ring, uint64_t gpa, bool writable) { ASSERT3P(ring->vr_lease, !=, NULL); - ASSERT3U(gpa & PAGEOFFSET, ==, 0); - return (vmm_drv_gpa2kva(ring->vr_lease, gpa, PAGESIZE)); + int prot = PROT_READ; + if (writable) { + prot |= PROT_WRITE; + } + + return (vmm_drv_page_hold(ring->vr_lease, gpa, prot)); +} + +/* + * Establish a hold on the page(s) which back the region of guest memory covered + * by [gpa, gpa + len). The host-kernel-virtual pointers to those pages are + * stored in the iovec array supplied in `region`, along with the chain of + * vmm_page_t entries representing the held pages. Since guest memory + * carries no guarantees of being physically contiguous (on the host), it is + * assumed that an iovec entry will be required for each PAGESIZE section + * covered by the specified `gpa` and `len` range. For each iovec entry + * successfully populated by holding a page, `vhr_idx` will be incremented so it + * references the next available iovec entry (or `vhr_niov`, if the iovec array + * is full). The responsibility for releasing the `vmm_page_t` chain (stored in + * `vhr_head` and `vhr_tail`) resides with the caller, regardless of the result. + */ +static int +vq_region_hold(viona_vring_t *ring, uint64_t gpa, uint32_t len, + bool writable, vq_held_region_t *region) +{ + const uint32_t front_offset = gpa & PAGEOFFSET; + const uint32_t front_len = MIN(len, PAGESIZE - front_offset); + uint_t pages = 1; + vmm_page_t *vmp; + caddr_t buf; + + ASSERT3U(region->vhr_idx, <, region->vhr_niov); + + if (front_len < len) { + pages += P2ROUNDUP((uint64_t)(len - front_len), + PAGESIZE) / PAGESIZE; + } + if (pages > (region->vhr_niov - region->vhr_idx)) { + return (E2BIG); + } + + vmp = vq_page_hold(ring, gpa & PAGEMASK, writable); + if (vmp == NULL) { + return (EFAULT); + } + buf = (caddr_t)vmm_drv_page_readable(vmp); + + region->vhr_iov[region->vhr_idx].iov_base = buf + front_offset; + region->vhr_iov[region->vhr_idx].iov_len = front_len; + region->vhr_idx++; + gpa += front_len; + len -= front_len; + if (region->vhr_head == NULL) { + region->vhr_head = vmp; + region->vhr_tail = vmp; + } else { + vmm_drv_page_chain(region->vhr_tail, vmp); + region->vhr_tail = vmp; + } + + for (uint_t i = 1; i < pages; i++) { + ASSERT3U(gpa & PAGEOFFSET, ==, 0); + + vmp = vq_page_hold(ring, gpa, writable); + if (vmp == NULL) { + return (EFAULT); + } + buf = (caddr_t)vmm_drv_page_readable(vmp); + + const uint32_t chunk_len = MIN(len, PAGESIZE); + region->vhr_iov[region->vhr_idx].iov_base = buf; + region->vhr_iov[region->vhr_idx].iov_len = chunk_len; + region->vhr_idx++; + gpa += chunk_len; + len -= chunk_len; + vmm_drv_page_chain(region->vhr_tail, vmp); + region->vhr_tail = vmp; + } + + return (0); } static boolean_t @@ -310,14 +402,28 @@ viona_ring_map(viona_vring_t *ring) const uint_t npages = LEGACY_VQ_PAGES(qsz); ring->vr_map_pages = kmem_zalloc(npages * sizeof (void *), KM_SLEEP); + vmm_page_t *prev = NULL; + for (uint_t i = 0; i < npages; i++, pa += PAGESIZE) { - void *page = viona_hold_page(ring, pa); + vmm_page_t *vmp; - if (page == NULL) { + vmp = vq_page_hold(ring, pa, true); + if (vmp == NULL) { viona_ring_unmap(ring); return (B_FALSE); } - ring->vr_map_pages[i] = page; + + /* + * Keep the first page has the head of the chain, appending all + * subsequent pages to the tail. + */ + if (prev == NULL) { + ring->vr_map_hold = vmp; + } else { + vmm_drv_page_chain(prev, vmp); + } + prev = vmp; + ring->vr_map_pages[i] = vmm_drv_page_writable(vmp); } return (B_TRUE); @@ -330,17 +436,14 @@ viona_ring_unmap(viona_vring_t *ring) void **map = ring->vr_map_pages; if (map != NULL) { - /* - * The bhyve page-hold mechanism does not currently require a - * corresponding page-release action, given the simplicity of - * the underlying virtual memory constructs. - * - * If/when those systems become more sophisticated, more than a - * simple free of the page pointers will be required here. - */ const uint_t npages = LEGACY_VQ_PAGES(ring->vr_size); kmem_free(map, npages * sizeof (void *)); ring->vr_map_pages = NULL; + + vmm_drv_page_release_chain(ring->vr_map_hold); + ring->vr_map_hold = NULL; + } else { + ASSERT3P(ring->vr_map_hold, ==, NULL); } } @@ -520,14 +623,9 @@ vq_read_avail(viona_vring_t *ring, uint16_t idx) */ static int vq_map_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc, - struct iovec *iov, uint_t niov, uint16_t *idxp) + vq_held_region_t *region) { - uint64_t gpa = desc->vd_addr; - uint32_t len = desc->vd_len; - uint16_t lidx = *idxp; - caddr_t buf; - - ASSERT3U(lidx, <, niov); + int err; if (desc->vd_len == 0) { VIONA_PROBE2(desc_bad_len, viona_vring_t *, ring, @@ -536,55 +634,22 @@ vq_map_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc, return (EINVAL); } - const uint32_t front_offset = desc->vd_addr & PAGEOFFSET; - const uint32_t front_len = MIN(len, PAGESIZE - front_offset); - uint_t pages = 1; - if (front_len < len) { - pages += P2ROUNDUP((uint64_t)(len - front_len), - PAGESIZE) / PAGESIZE; - } - - if (pages > (niov - lidx)) { + err = vq_region_hold(ring, desc->vd_addr, desc->vd_len, + (desc->vd_flags & VRING_DESC_F_WRITE) != 0, region); + switch (err) { + case E2BIG: VIONA_PROBE1(too_many_desc, viona_vring_t *, ring); VIONA_RING_STAT_INCR(ring, too_many_desc); - return (E2BIG); - } - - buf = viona_hold_page(ring, gpa & PAGEMASK); - if (buf == NULL) { + break; + case EFAULT: VIONA_PROBE_BAD_RING_ADDR(ring, desc->vd_addr); VIONA_RING_STAT_INCR(ring, bad_ring_addr); - return (EFAULT); + break; + default: + break; } - iov[lidx].iov_base = buf + front_offset; - iov[lidx].iov_len = front_len; - gpa += front_len; - len -= front_len; - lidx++; - - for (uint_t i = 1; i < pages; i++) { - ASSERT3U(gpa & PAGEOFFSET, ==, 0); - - buf = viona_hold_page(ring, gpa); - if (buf == NULL) { - VIONA_PROBE_BAD_RING_ADDR(ring, desc->vd_addr); - VIONA_RING_STAT_INCR(ring, bad_ring_addr); - return (EFAULT); - } - const uint32_t region_len = MIN(len, PAGESIZE); - iov[lidx].iov_base = buf; - iov[lidx].iov_len = region_len; - gpa += region_len; - len -= region_len; - lidx++; - } - - ASSERT3U(len, ==, 0); - ASSERT3U(gpa, ==, desc->vd_addr + desc->vd_len); - - *idxp = lidx; - return (0); + return (err); } /* @@ -593,7 +658,7 @@ vq_map_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc, */ static int vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc, - struct iovec *iov, uint_t niov, uint16_t *idxp) + vq_held_region_t *region) { const uint16_t indir_count = desc->vd_len / sizeof (struct virtio_desc); @@ -607,8 +672,10 @@ vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc, } uint16_t indir_next = 0; - caddr_t buf = NULL; + const uint8_t *buf = NULL; uint64_t buf_gpa = UINT64_MAX; + vmm_page_t *vmp = NULL; + int err = 0; for (;;) { uint64_t indir_gpa = @@ -621,13 +688,18 @@ vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc, * resides in, if has not already been done. */ if (indir_page != buf_gpa) { - buf = viona_hold_page(ring, indir_page); - if (buf == NULL) { - VIONA_PROBE_BAD_RING_ADDR(ring, desc->vd_addr); + if (vmp != NULL) { + vmm_drv_page_release(vmp); + } + vmp = vq_page_hold(ring, indir_page, false); + if (vmp == NULL) { + VIONA_PROBE_BAD_RING_ADDR(ring, indir_page); VIONA_RING_STAT_INCR(ring, bad_ring_addr); - return (EFAULT); + err = EFAULT; + break; } buf_gpa = indir_page; + buf = vmm_drv_page_readable(vmp); } /* @@ -641,27 +713,30 @@ vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc, if (vp.vd_flags & VRING_DESC_F_INDIRECT) { VIONA_PROBE1(indir_bad_nest, viona_vring_t *, ring); VIONA_RING_STAT_INCR(ring, indir_bad_nest); - return (EINVAL); + err = EINVAL; + break; } else if (vp.vd_len == 0) { VIONA_PROBE2(desc_bad_len, viona_vring_t *, ring, uint32_t, vp.vd_len); VIONA_RING_STAT_INCR(ring, desc_bad_len); - return (EINVAL); + err = EINVAL; + break; } - int err = vq_map_desc_bufs(ring, &vp, iov, niov, idxp); + err = vq_map_desc_bufs(ring, &vp, region); if (err != 0) { - return (err); + break; } /* Successfully reach the end of the indir chain */ if ((vp.vd_flags & VRING_DESC_F_NEXT) == 0) { - return (0); + break; } - if (*idxp >= niov) { + if (region->vhr_idx >= region->vhr_niov) { VIONA_PROBE1(too_many_desc, viona_vring_t *, ring); VIONA_RING_STAT_INCR(ring, too_many_desc); - return (E2BIG); + err = E2BIG; + break; } indir_next = vp.vd_next; @@ -669,23 +744,31 @@ vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc, VIONA_PROBE3(indir_bad_next, viona_vring_t *, ring, uint16_t, indir_next, uint16_t, indir_count); VIONA_RING_STAT_INCR(ring, indir_bad_next); - return (EINVAL); + err = EINVAL; + break; } } - /* NOTREACHED */ - return (-1); + if (vmp != NULL) { + vmm_drv_page_release(vmp); + } + return (err); } int vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov, - uint16_t *cookie) + uint16_t *cookie, vmm_page_t **chain) { - uint16_t i, ndesc, idx, head, next; + uint16_t ndesc, idx, head, next; struct virtio_desc vdir; + vq_held_region_t region = { + .vhr_niov = niov, + .vhr_iov = iov, + }; ASSERT(iov != NULL); ASSERT(niov > 0 && niov < INT_MAX); + ASSERT(*chain == NULL); mutex_enter(&ring->vr_a_mutex); idx = ring->vr_cur_aidx; @@ -711,7 +794,7 @@ vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov, head = vq_read_avail(ring, idx & ring->vr_mask); next = head; - for (i = 0; i < niov; next = vdir.vd_next) { + for (region.vhr_idx = 0; region.vhr_idx < niov; next = vdir.vd_next) { if (next >= ring->vr_size) { VIONA_PROBE2(bad_idx, viona_vring_t *, ring, uint16_t, next); @@ -721,7 +804,7 @@ vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov, vq_read_desc(ring, next, &vdir); if ((vdir.vd_flags & VRING_DESC_F_INDIRECT) == 0) { - if (vq_map_desc_bufs(ring, &vdir, iov, niov, &i) != 0) { + if (vq_map_desc_bufs(ring, &vdir, ®ion) != 0) { break; } } else { @@ -738,21 +821,29 @@ vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov, break; } - if (vq_map_indir_desc_bufs(ring, &vdir, iov, niov, &i) - != 0) { + if (vq_map_indir_desc_bufs(ring, &vdir, ®ion) != 0) { break; } } if ((vdir.vd_flags & VRING_DESC_F_NEXT) == 0) { - *cookie = head; ring->vr_cur_aidx++; mutex_exit(&ring->vr_a_mutex); - return (i); + + *cookie = head; + *chain = region.vhr_head; + return (region.vhr_idx); } } mutex_exit(&ring->vr_a_mutex); + if (region.vhr_head != NULL) { + /* + * If any pages were held prior to encountering an error, we + * must release them now. + */ + vmm_drv_page_release_chain(region.vhr_head); + } return (-1); } diff --git a/usr/src/uts/i86pc/io/viona/viona_rx.c b/usr/src/uts/i86pc/io/viona/viona_rx.c index dc3feb10fe..2fbf6be972 100644 --- a/usr/src/uts/i86pc/io/viona/viona_rx.c +++ b/usr/src/uts/i86pc/io/viona/viona_rx.c @@ -208,10 +208,11 @@ viona_recv_plain(viona_vring_t *ring, const mblk_t *mp, size_t msz) caddr_t buf = NULL; boolean_t end = B_FALSE; const uint32_t features = ring->vr_link->l_features; + vmm_page_t *pages = NULL; ASSERT(msz >= MIN_BUF_SIZE); - n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie); + n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &pages); if (n <= 0) { /* Without available buffers, the frame must be dropped. */ return (ENOSPC); @@ -279,6 +280,7 @@ viona_recv_plain(viona_vring_t *ring, const mblk_t *mp, size_t msz) } /* Release this chain */ + vmm_drv_page_release_chain(pages); vq_pushchain(ring, copied, cookie); return (0); @@ -287,6 +289,7 @@ bad_frame: mblk_t *, mp); VIONA_RING_STAT_INCR(ring, bad_rx_frame); + vmm_drv_page_release_chain(pages); vq_pushchain(ring, MAX(copied, MIN_BUF_SIZE + hdr_sz), cookie); return (EINVAL); } @@ -296,6 +299,7 @@ viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz) { struct iovec iov[VTNET_MAXSEGS]; used_elem_t uelem[VTNET_MAXSEGS]; + vmm_page_t *pages = NULL, *hdr_pages = NULL; int n, i = 0, buf_idx = 0, err = 0; uint16_t cookie; caddr_t buf; @@ -307,7 +311,7 @@ viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz) ASSERT(msz >= MIN_BUF_SIZE); - n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie); + n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &hdr_pages); if (n <= 0) { /* Without available buffers, the frame must be dropped. */ VIONA_PROBE2(no_space, viona_vring_t *, ring, mblk_t *, mp); @@ -376,7 +380,12 @@ viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz) err = EOVERFLOW; break; } - n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie); + if (pages != NULL) { + vmm_drv_page_release_chain(pages); + pages = NULL; + } + n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, + &pages); if (n <= 0) { /* * Without more immediate space to perform the @@ -453,6 +462,13 @@ done: uint16_t, cookie, mblk_t *, mp); VIONA_RING_STAT_INCR(ring, bad_rx_frame); } + + if (hdr_pages != NULL) { + vmm_drv_page_release_chain(hdr_pages); + } + if (pages != NULL) { + vmm_drv_page_release_chain(pages); + } vq_pushchain_many(ring, buf_idx + 1, uelem); return (err); } diff --git a/usr/src/uts/i86pc/io/viona/viona_tx.c b/usr/src/uts/i86pc/io/viona/viona_tx.c index f8018692a5..424deee498 100644 --- a/usr/src/uts/i86pc/io/viona/viona_tx.c +++ b/usr/src/uts/i86pc/io/viona/viona_tx.c @@ -70,6 +70,7 @@ struct viona_desb { uint32_t d_len; uint16_t d_cookie; uchar_t *d_headers; + vmm_page_t *d_pages; }; static void viona_tx(viona_link_t *, viona_vring_t *); @@ -287,6 +288,14 @@ viona_desb_release(viona_desb_t *dp) cookie = dp->d_cookie; dp->d_len = 0; dp->d_cookie = 0; + vmm_drv_page_release_chain(dp->d_pages); + dp->d_pages = NULL; + + /* + * Ensure all other changes to the desb are visible prior to zeroing its + * refcount, signifying its readiness for reuse. + */ + membar_exit(); dp->d_ref = 0; viona_tx_done(ring, len, cookie); @@ -484,12 +493,13 @@ viona_tx(viona_link_t *link, viona_vring_t *ring) viona_desb_t *dp = NULL; mac_client_handle_t link_mch = link->l_mch; const struct virtio_net_hdr *hdr; + vmm_page_t *pages = NULL; mp_head = mp_tail = NULL; ASSERT(iov != NULL); - n = vq_popchain(ring, iov, max_segs, &cookie); + n = vq_popchain(ring, iov, max_segs, &cookie, &pages); if (n == 0) { VIONA_PROBE1(tx_absent, viona_vring_t *, ring); VIONA_RING_STAT_INCR(ring, tx_absent); @@ -670,6 +680,7 @@ viona_tx(viona_link_t *link, viona_vring_t *ring) if (dp != NULL) { dp->d_len = len; + dp->d_pages = pages; mutex_enter(&ring->vr_lock); ring->vr_xfer_outstanding++; mutex_exit(&ring->vr_lock); @@ -679,6 +690,7 @@ viona_tx(viona_link_t *link, viona_vring_t *ring) * be marked as 'used' now, rather than deferring that action * until after successful packet transmission. */ + vmm_drv_page_release_chain(pages); viona_tx_done(ring, len, cookie); } @@ -731,5 +743,6 @@ drop_hook: VIONA_PROBE3(tx_drop, viona_vring_t *, ring, uint32_t, len, uint16_t, cookie); + vmm_drv_page_release_chain(pages); viona_tx_done(ring, len, cookie); } diff --git a/usr/src/uts/i86pc/io/vmm/vmm.mapfile b/usr/src/uts/i86pc/io/vmm/vmm.mapfile index 0af4f090b4..fb1c9366de 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm.mapfile +++ b/usr/src/uts/i86pc/io/vmm/vmm.mapfile @@ -12,6 +12,7 @@ # # Copyright 2019 Joyent, Inc. +# Copyright 2021 Oxide Computer Company # # @@ -44,7 +45,13 @@ SYMBOL_VERSION ILLUMOSprivate { vmm_drv_lease_sign; vmm_drv_lease_break; vmm_drv_lease_expired; - vmm_drv_gpa2kva; + vmm_drv_page_hold; + vmm_drv_page_release; + vmm_drv_page_release_chain; + vmm_drv_page_readable; + vmm_drv_page_writable; + vmm_drv_page_chain; + vmm_drv_page_next; vmm_drv_ioport_hook; vmm_drv_ioport_unhook; vmm_drv_msi; diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c index 24dd851831..d66778c55a 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c @@ -1969,33 +1969,49 @@ vmm_drv_lease_expired(vmm_lease_t *lease) return (lease->vml_expired); } -void * -vmm_drv_gpa2kva(vmm_lease_t *lease, uintptr_t gpa, size_t sz) +vmm_page_t * +vmm_drv_page_hold(vmm_lease_t *lease, uintptr_t gpa, int prot) { - vm_page_t *vmp; - void *res = NULL; - ASSERT(lease != NULL); - ASSERT3U(sz, ==, PAGESIZE); ASSERT0(gpa & PAGEOFFSET); - vmp = vmc_hold(lease->vml_vmclient, gpa, PROT_READ | PROT_WRITE); - /* - * Break the rules for now and just extract the pointer. This is - * nominally safe, since holding a driver lease on the VM read-locks it. - * - * A pointer which would otherwise be at risk of being a use-after-free - * vector is made safe since actions such as vmspace_unmap() require - * acquisition of the VM write-lock, (causing all driver leases to be - * broken) allowing the consumers to cease their access prior to - * modification of the vmspace. - */ - if (vmp != NULL) { - res = vmp_get_writable(vmp); - vmp_release(vmp); - } + return ((vmm_page_t *)vmc_hold(lease->vml_vmclient, gpa, prot)); +} - return (res); +void +vmm_drv_page_release(vmm_page_t *vmmp) +{ + vmp_release((vm_page_t *)vmmp); +} + +void +vmm_drv_page_release_chain(vmm_page_t *vmmp) +{ + vmp_release_chain((vm_page_t *)vmmp); +} + +const void * +vmm_drv_page_readable(const vmm_page_t *vmmp) +{ + return (vmp_get_readable((const vm_page_t *)vmmp)); +} + +void * +vmm_drv_page_writable(const vmm_page_t *vmmp) +{ + return (vmp_get_writable((const vm_page_t *)vmmp)); +} + +void +vmm_drv_page_chain(vmm_page_t *vmmp, vmm_page_t *to_chain) +{ + vmp_chain((vm_page_t *)vmmp, (vm_page_t *)to_chain); +} + +vmm_page_t * +vmm_drv_page_next(const vmm_page_t *vmmp) +{ + return ((vmm_page_t *)vmp_next((vm_page_t *)vmmp)); } int diff --git a/usr/src/uts/i86pc/sys/vmm_drv.h b/usr/src/uts/i86pc/sys/vmm_drv.h index 1f2b3d9254..0b7f622e53 100644 --- a/usr/src/uts/i86pc/sys/vmm_drv.h +++ b/usr/src/uts/i86pc/sys/vmm_drv.h @@ -12,7 +12,7 @@ /* * Copyright 2019 Joyent, Inc. - * Copyright 2020 Oxide Computer Company + * Copyright 2021 Oxide Computer Company */ #ifndef _VMM_DRV_H_ @@ -30,6 +30,14 @@ struct vmm_lease; typedef struct vmm_lease vmm_lease_t; /* + * This is effectively a synonym for the bhyve-internal 'struct vm_page' type. + * Use of `vmm_page_t *` instead allows us to keep those implementation details + * hidden from vmm_drv consumers. + */ +struct vmm_page; +typedef struct vmm_page vmm_page_t; + +/* * Because of tangled headers, this definitions mirrors its ioport_handler_t * counterpart in vmm_kernel.h. */ @@ -44,7 +52,14 @@ extern vmm_lease_t *vmm_drv_lease_sign(vmm_hold_t *, boolean_t (*)(void *), extern void vmm_drv_lease_break(vmm_hold_t *, vmm_lease_t *); extern boolean_t vmm_drv_lease_expired(vmm_lease_t *); -extern void *vmm_drv_gpa2kva(vmm_lease_t *, uintptr_t, size_t); +extern vmm_page_t *vmm_drv_page_hold(vmm_lease_t *, uintptr_t, int); +extern void vmm_drv_page_release(vmm_page_t *); +extern void vmm_drv_page_release_chain(vmm_page_t *); +extern const void *vmm_drv_page_readable(const vmm_page_t *); +extern void *vmm_drv_page_writable(const vmm_page_t *); +extern void vmm_drv_page_chain(vmm_page_t *, vmm_page_t *); +extern vmm_page_t *vmm_drv_page_next(const vmm_page_t *); + extern int vmm_drv_msi(vmm_lease_t *, uint64_t, uint64_t); extern int vmm_drv_ioport_hook(vmm_hold_t *, uint16_t, vmm_drv_iop_cb_t, void *, |