summaryrefslogtreecommitdiff
path: root/usr/src/lib/libvmm/libvmm.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/lib/libvmm/libvmm.c')
-rw-r--r--usr/src/lib/libvmm/libvmm.c857
1 files changed, 857 insertions, 0 deletions
diff --git a/usr/src/lib/libvmm/libvmm.c b/usr/src/lib/libvmm/libvmm.c
new file mode 100644
index 0000000000..8c8e32c9f3
--- /dev/null
+++ b/usr/src/lib/libvmm/libvmm.c
@@ -0,0 +1,857 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019 Joyent, Inc.
+ */
+
+/*
+ * Library for native code to access bhyve VMs, without the need to use
+ * FreeBSD compat headers
+ */
+
+#include <sys/param.h>
+#include <sys/list.h>
+#include <sys/stddef.h>
+#include <sys/mman.h>
+#include <sys/kdi_regs.h>
+#include <sys/sysmacros.h>
+#include <sys/controlregs.h>
+#include <sys/note.h>
+#include <sys/debug.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include <machine/vmm.h>
+#include <vmmapi.h>
+
+#include <libvmm.h>
+
+typedef struct vmm_memseg vmm_memseg_t;
+
+#define VMM_MEMSEG_DEVMEM 0x1
+
+struct vmm_memseg {
+ list_node_t vms_list;
+ int vms_segid;
+ int vms_prot;
+ int vms_flags;
+ uintptr_t vms_gpa;
+ off_t vms_segoff;
+ size_t vms_seglen;
+ size_t vms_maplen;
+ char vms_name[64];
+};
+
+struct vmm {
+ struct vmctx *vmm_ctx;
+ list_t vmm_memlist;
+ char *vmm_mem;
+ size_t vmm_memsize;
+ size_t vmm_ncpu;
+};
+
+
+/*
+ * This code relies on two assumptions:
+ * - CPUs are never removed from the "active set", not even when suspended.
+ * A CPU being active just means that it has been used by the guest OS.
+ * - The CPU numbering is consecutive.
+ */
+static void
+vmm_update_ncpu(vmm_t *vmm)
+{
+ cpuset_t cpuset;
+
+ assert(vm_active_cpus(vmm->vmm_ctx, &cpuset) == 0);
+
+ for (vmm->vmm_ncpu = 0;
+ CPU_ISSET(vmm->vmm_ncpu, &cpuset) == 1;
+ vmm->vmm_ncpu++)
+ ;
+}
+
+vmm_t *
+vmm_open_vm(const char *name)
+{
+ vmm_t *vmm = NULL;
+
+ vmm = malloc(sizeof (vmm_t));
+ if (vmm == NULL)
+ return (NULL);
+
+ bzero(vmm, sizeof (vmm_t));
+ vmm->vmm_mem = MAP_FAILED;
+
+ list_create(&vmm->vmm_memlist, sizeof (vmm_memseg_t),
+ offsetof(vmm_memseg_t, vms_list));
+
+ vmm->vmm_ctx = vm_open(name);
+ if (vmm->vmm_ctx == NULL) {
+ free(vmm);
+ return (NULL);
+ }
+
+ vmm_update_ncpu(vmm);
+
+ /*
+ * If we open a VM that has just been created we may see a state
+ * where it has no CPUs configured yet. We'll just wait for 10ms
+ * and retry until we get a non-zero CPU count.
+ */
+ if (vmm->vmm_ncpu == 0) {
+ do {
+ (void) usleep(10000);
+ vmm_update_ncpu(vmm);
+ } while (vmm->vmm_ncpu == 0);
+ }
+
+ return (vmm);
+}
+
+void
+vmm_close_vm(vmm_t *vmm)
+{
+ vmm_unmap(vmm);
+
+ list_destroy(&vmm->vmm_memlist);
+
+ if (vmm->vmm_ctx != NULL)
+ vm_close(vmm->vmm_ctx);
+
+ free(vmm);
+}
+
+static vmm_memseg_t *
+vmm_get_memseg(vmm_t *vmm, uintptr_t gpa)
+{
+ vmm_memseg_t ms, *ret;
+ int error, flags;
+
+ bzero(&ms, sizeof (vmm_memseg_t));
+ ms.vms_gpa = gpa;
+ error = vm_mmap_getnext(vmm->vmm_ctx, &ms.vms_gpa, &ms.vms_segid,
+ &ms.vms_segoff, &ms.vms_maplen, &ms.vms_prot, &flags);
+ if (error)
+ return (NULL);
+
+ error = vm_get_memseg(vmm->vmm_ctx, ms.vms_segid, &ms.vms_seglen,
+ ms.vms_name, sizeof (ms.vms_name));
+ if (error)
+ return (NULL);
+
+ /*
+ * Regular memory segments don't have a name, but devmem segments do.
+ * We can use that information to set the DEVMEM flag if necessary.
+ */
+ ms.vms_flags = ms.vms_name[0] != '\0' ? VMM_MEMSEG_DEVMEM : 0;
+
+ ret = malloc(sizeof (vmm_memseg_t));
+ if (ret == NULL)
+ return (NULL);
+
+ *ret = ms;
+
+ return (ret);
+}
+
+int
+vmm_map(vmm_t *vmm, boolean_t writable)
+{
+ uintptr_t last_gpa = 0;
+ vmm_memseg_t *ms;
+ int prot_write = writable ? PROT_WRITE : 0;
+
+ if (vmm->vmm_mem != MAP_FAILED) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ assert(list_is_empty(&vmm->vmm_memlist));
+
+ for (;;) {
+ ms = vmm_get_memseg(vmm, last_gpa);
+
+ if (ms == NULL)
+ break;
+
+ last_gpa = ms->vms_gpa + ms->vms_maplen;
+ list_insert_tail(&vmm->vmm_memlist, ms);
+ }
+
+ vmm->vmm_mem = mmap(NULL, last_gpa, PROT_NONE,
+ MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, -1, 0);
+
+ if (vmm->vmm_mem == MAP_FAILED)
+ goto fail;
+
+ for (ms = list_head(&vmm->vmm_memlist);
+ ms != NULL;
+ ms = list_next(&vmm->vmm_memlist, ms)) {
+ off_t mapoff = ms->vms_gpa;
+
+ if ((ms->vms_flags & VMM_MEMSEG_DEVMEM) &&
+ vm_get_devmem_offset(vmm->vmm_ctx, ms->vms_segid, &mapoff)
+ != 0)
+ goto fail;
+
+ vmm->vmm_memsize += ms->vms_maplen;
+
+ if (mmap(vmm->vmm_mem + ms->vms_gpa, ms->vms_maplen,
+ PROT_READ | prot_write, MAP_SHARED | MAP_FIXED,
+ vm_get_device_fd(vmm->vmm_ctx), mapoff) == MAP_FAILED)
+ goto fail;
+ }
+
+ return (0);
+
+fail:
+ vmm_unmap(vmm);
+
+ return (-1);
+}
+
+void
+vmm_unmap(vmm_t *vmm)
+{
+ while (!list_is_empty(&vmm->vmm_memlist)) {
+ vmm_memseg_t *ms = list_remove_head(&vmm->vmm_memlist);
+
+ if (vmm->vmm_mem != MAP_FAILED)
+ munmap(vmm->vmm_mem + ms->vms_gpa, ms->vms_maplen);
+ free(ms);
+ }
+
+ if (vmm->vmm_mem != MAP_FAILED)
+ munmap(vmm->vmm_mem, vmm->vmm_memsize);
+
+ vmm->vmm_mem = MAP_FAILED;
+ vmm->vmm_memsize = 0;
+}
+
+ssize_t
+vmm_pread(vmm_t *vmm, void *buf, size_t len, uintptr_t addr)
+{
+ ssize_t count = 0;
+ vmm_memseg_t *ms;
+ ssize_t res = len;
+
+ for (ms = list_head(&vmm->vmm_memlist);
+ ms != NULL && len != 0;
+ ms = list_next(&vmm->vmm_memlist, ms)) {
+
+ if (addr >= ms->vms_gpa &&
+ addr < ms->vms_gpa + ms->vms_maplen) {
+ res = (addr + len) - (ms->vms_gpa + ms->vms_maplen);
+
+ if (res < 0)
+ res = 0;
+
+ bcopy(vmm->vmm_mem + addr, buf, len - res);
+ count += len - res;
+ addr += len - res;
+ len = res;
+ }
+ }
+
+ if (res)
+ errno = EFAULT;
+ else
+ errno = 0;
+
+ return (count);
+}
+
+ssize_t
+vmm_pwrite(vmm_t *vmm, const void *buf, size_t len, uintptr_t addr)
+{
+ ssize_t count = 0;
+ vmm_memseg_t *ms;
+ ssize_t res = len;
+
+ for (ms = list_head(&vmm->vmm_memlist);
+ ms != NULL;
+ ms = list_next(&vmm->vmm_memlist, ms)) {
+ if (addr >= ms->vms_gpa &&
+ addr < ms->vms_gpa + ms->vms_maplen) {
+ res = (addr + len) - (ms->vms_gpa + ms->vms_maplen);
+
+ if (res < 0)
+ res = 0;
+
+ bcopy(buf, vmm->vmm_mem + addr, len - res);
+ count += len - res;
+ addr += len - res;
+ len = res;
+ }
+ }
+
+ if (res)
+ errno = EFAULT;
+ else
+ errno = 0;
+
+ return (count);
+}
+
+size_t
+vmm_ncpu(vmm_t *vmm)
+{
+ return (vmm->vmm_ncpu);
+}
+
+size_t
+vmm_memsize(vmm_t *vmm)
+{
+ return (vmm->vmm_memsize);
+}
+
+int
+vmm_cont(vmm_t *vmm)
+{
+ return (vm_resume_cpu(vmm->vmm_ctx, -1));
+}
+
+int
+vmm_step(vmm_t *vmm, int vcpu)
+{
+ cpuset_t cpuset;
+ int ret;
+
+ if (vcpu >= vmm->vmm_ncpu) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ ret = vm_set_capability(vmm->vmm_ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
+ if (ret != 0)
+ return (-1);
+
+ assert(vm_resume_cpu(vmm->vmm_ctx, vcpu) == 0);
+
+ do {
+ (void) vm_debug_cpus(vmm->vmm_ctx, &cpuset);
+ } while (!CPU_ISSET(vcpu, &cpuset));
+
+ (void) vm_set_capability(vmm->vmm_ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
+
+ return (ret);
+}
+
+int
+vmm_stop(vmm_t *vmm)
+{
+ int ret = vm_suspend_cpu(vmm->vmm_ctx, -1);
+
+ if (ret == 0)
+ vmm_update_ncpu(vmm);
+
+ return (ret);
+}
+
+/*
+ * Mapping of KDI-defined registers to vmmapi-defined registers.
+ * Registers not known to vmmapi use VM_REG_LAST, which is invalid and
+ * causes an error in vm_{get,set}_register_set().
+ *
+ * This array must be kept in sync with the definitions in kdi_regs.h.
+ */
+static int vmm_kdi_regmap[] = {
+ VM_REG_LAST, /* KDIREG_SAVFP */
+ VM_REG_LAST, /* KDIREG_SAVPC */
+ VM_REG_GUEST_RDI, /* KDIREG_RDI */
+ VM_REG_GUEST_RSI, /* KDIREG_RSI */
+ VM_REG_GUEST_RDX, /* KDIREG_RDX */
+ VM_REG_GUEST_RCX, /* KDIREG_RCX */
+ VM_REG_GUEST_R8, /* KDIREG_R8 */
+ VM_REG_GUEST_R9, /* KDIREG_R9 */
+ VM_REG_GUEST_RAX, /* KDIREG_RAX */
+ VM_REG_GUEST_RBX, /* KDIREG_RBX */
+ VM_REG_GUEST_RBP, /* KDIREG_RBP */
+ VM_REG_GUEST_R10, /* KDIREG_R10 */
+ VM_REG_GUEST_R11, /* KDIREG_R11 */
+ VM_REG_GUEST_R12, /* KDIREG_R12 */
+ VM_REG_GUEST_R13, /* KDIREG_R13 */
+ VM_REG_GUEST_R14, /* KDIREG_R14 */
+ VM_REG_GUEST_R15, /* KDIREG_R15 */
+ VM_REG_LAST, /* KDIREG_FSBASE */
+ VM_REG_LAST, /* KDIREG_GSBASE */
+ VM_REG_LAST, /* KDIREG_KGSBASE */
+ VM_REG_GUEST_CR2, /* KDIREG_CR2 */
+ VM_REG_GUEST_CR3, /* KDIREG_CR3 */
+ VM_REG_GUEST_DS, /* KDIREG_DS */
+ VM_REG_GUEST_ES, /* KDIREG_ES */
+ VM_REG_GUEST_FS, /* KDIREG_FS */
+ VM_REG_GUEST_GS, /* KDIREG_GS */
+ VM_REG_LAST, /* KDIREG_TRAPNO */
+ VM_REG_LAST, /* KDIREG_ERR */
+ VM_REG_GUEST_RIP, /* KDIREG_RIP */
+ VM_REG_GUEST_CS, /* KDIREG_CS */
+ VM_REG_GUEST_RFLAGS, /* KDIREG_RFLAGS */
+ VM_REG_GUEST_RSP, /* KDIREG_RSP */
+ VM_REG_GUEST_SS /* KDIREG_SS */
+};
+CTASSERT(ARRAY_SIZE(vmm_kdi_regmap) == KDIREG_NGREG);
+
+/*
+ * Mapping of libvmm-defined registers to vmmapi-defined registers.
+ *
+ * This array must be kept in sync with the definitions in libvmm.h
+ */
+static int vmm_sys_regmap[] = {
+ VM_REG_GUEST_CR0, /* VMM_REG_CR0 */
+ VM_REG_GUEST_CR2, /* VMM_REG_CR2 */
+ VM_REG_GUEST_CR3, /* VMM_REG_CR3 */
+ VM_REG_GUEST_CR4, /* VMM_REG_CR4 */
+ VM_REG_GUEST_DR0, /* VMM_REG_DR0 */
+ VM_REG_GUEST_DR1, /* VMM_REG_DR1 */
+ VM_REG_GUEST_DR2, /* VMM_REG_DR2 */
+ VM_REG_GUEST_DR3, /* VMM_REG_DR3 */
+ VM_REG_GUEST_DR6, /* VMM_REG_DR6 */
+ VM_REG_GUEST_DR7, /* VMM_REG_DR7 */
+ VM_REG_GUEST_EFER, /* VMM_REG_EFER */
+ VM_REG_GUEST_PDPTE0, /* VMM_REG_PDPTE0 */
+ VM_REG_GUEST_PDPTE1, /* VMM_REG_PDPTE1 */
+ VM_REG_GUEST_PDPTE2, /* VMM_REG_PDPTE2 */
+ VM_REG_GUEST_PDPTE3, /* VMM_REG_PDPTE3 */
+ VM_REG_GUEST_INTR_SHADOW, /* VMM_REG_INTR_SHADOW */
+};
+
+/*
+ * Mapping of libvmm-defined descriptors to vmmapi-defined descriptors.
+ *
+ * This array must be kept in sync with the definitions in libvmm.h
+ */
+static int vmm_descmap[] = {
+ VM_REG_GUEST_GDTR,
+ VM_REG_GUEST_LDTR,
+ VM_REG_GUEST_IDTR,
+ VM_REG_GUEST_TR,
+ VM_REG_GUEST_CS,
+ VM_REG_GUEST_DS,
+ VM_REG_GUEST_ES,
+ VM_REG_GUEST_FS,
+ VM_REG_GUEST_GS,
+ VM_REG_GUEST_SS
+};
+
+static int
+vmm_mapreg(int reg)
+{
+ errno = 0;
+
+ if (reg < 0)
+ goto fail;
+
+ if (reg < KDIREG_NGREG)
+ return (vmm_kdi_regmap[reg]);
+
+ if (reg >= VMM_REG_OFFSET &&
+ reg < VMM_REG_OFFSET + ARRAY_SIZE(vmm_sys_regmap))
+ return (vmm_sys_regmap[reg - VMM_REG_OFFSET]);
+
+fail:
+ errno = EINVAL;
+ return (VM_REG_LAST);
+}
+
+static int
+vmm_mapdesc(int desc)
+{
+ errno = 0;
+
+ if (desc >= VMM_DESC_OFFSET &&
+ desc < VMM_DESC_OFFSET + ARRAY_SIZE(vmm_descmap))
+ return (vmm_descmap[desc - VMM_DESC_OFFSET]);
+
+ errno = EINVAL;
+ return (VM_REG_LAST);
+}
+
+int
+vmm_getreg(vmm_t *vmm, int vcpu, int reg, uint64_t *val)
+{
+ reg = vmm_mapreg(reg);
+
+ if (reg == VM_REG_LAST)
+ return (-1);
+
+ return (vm_get_register(vmm->vmm_ctx, vcpu, reg, val));
+}
+
+int
+vmm_setreg(vmm_t *vmm, int vcpu, int reg, uint64_t val)
+{
+ reg = vmm_mapreg(reg);
+
+ if (reg == VM_REG_LAST)
+ return (-1);
+
+ return (vm_set_register(vmm->vmm_ctx, vcpu, reg, val));
+}
+
+int
+vmm_get_regset(vmm_t *vmm, int vcpu, size_t nregs, const int *regnums,
+ uint64_t *regvals)
+{
+ int *vm_regnums;
+ int i;
+ int ret = -1;
+
+ vm_regnums = malloc(sizeof (int) * nregs);
+ if (vm_regnums == NULL)
+ return (ret);
+
+ for (i = 0; i != nregs; i++) {
+ vm_regnums[i] = vmm_mapreg(regnums[i]);
+ if (vm_regnums[i] == VM_REG_LAST)
+ goto fail;
+ }
+
+ ret = vm_get_register_set(vmm->vmm_ctx, vcpu, nregs, vm_regnums,
+ regvals);
+
+fail:
+ free(vm_regnums);
+ return (ret);
+}
+
+int
+vmm_set_regset(vmm_t *vmm, int vcpu, size_t nregs, const int *regnums,
+ uint64_t *regvals)
+{
+ int *vm_regnums;
+ int i;
+ int ret = -1;
+
+ vm_regnums = malloc(sizeof (int) * nregs);
+ if (vm_regnums == NULL)
+ return (ret);
+
+ for (i = 0; i != nregs; i++) {
+ vm_regnums[i] = vmm_mapreg(regnums[i]);
+ if (vm_regnums[i] == VM_REG_LAST)
+ goto fail;
+ }
+
+ ret = vm_set_register_set(vmm->vmm_ctx, vcpu, nregs, vm_regnums,
+ regvals);
+
+fail:
+ free(vm_regnums);
+ return (ret);
+}
+
+int
+vmm_get_desc(vmm_t *vmm, int vcpu, int desc, vmm_desc_t *vd)
+{
+ desc = vmm_mapdesc(desc);
+ if (desc == VM_REG_LAST)
+ return (-1);
+
+ return (vm_get_desc(vmm->vmm_ctx, vcpu, desc, &vd->vd_base, &vd->vd_lim,
+ &vd->vd_acc));
+}
+
+int
+vmm_set_desc(vmm_t *vmm, int vcpu, int desc, vmm_desc_t *vd)
+{
+ desc = vmm_mapdesc(desc);
+ if (desc == VM_REG_LAST)
+ return (-1);
+
+ return (vm_set_desc(vmm->vmm_ctx, vcpu, desc, vd->vd_base, vd->vd_lim,
+ vd->vd_acc));
+}
+
+/*
+ * Structure to hold MMU state during address translation.
+ * The contents of vmm_mmu_regnum[] must be kept in sync with this.
+ */
+typedef struct vmm_mmu {
+ uint64_t vm_cr0;
+ uint64_t vm_cr3;
+ uint64_t vm_cr4;
+ uint64_t vm_efer;
+} vmm_mmu_t;
+
+static const int vmm_mmu_regnum[] = {
+ VMM_REG_CR0,
+ VMM_REG_CR3,
+ VMM_REG_CR4,
+ VMM_REG_EFER
+};
+
+#define X86_PTE_P 0x001ULL
+#define X86_PTE_PS 0x080ULL
+
+#define X86_PTE_PHYSMASK 0x000ffffffffff000ULL
+#define X86_PAGE_SHIFT 12
+#define X86_PAGE_SIZE (1ULL << X86_PAGE_SHIFT)
+
+#define X86_SEG_CODE_DATA (1ULL << 4)
+#define X86_SEG_PRESENT (1ULL << 7)
+#define X86_SEG_LONG (1ULL << 13)
+#define X86_SEG_BIG (1ULL << 14)
+#define X86_SEG_GRANULARITY (1ULL << 15)
+#define X86_SEG_UNUSABLE (1ULL << 16)
+
+#define X86_SEG_USABLE (X86_SEG_PRESENT | X86_SEG_CODE_DATA)
+#define X86_SEG_USABLE_MASK (X86_SEG_UNUSABLE | X86_SEG_USABLE)
+
+/*
+ * vmm_pte2paddr:
+ *
+ * Recursively calculate the physical address from a virtual address,
+ * starting at the given PTE level using the given PTE.
+ */
+static int
+vmm_pte2paddr(vmm_t *vmm, uint64_t pte, boolean_t ia32, int level,
+ uint64_t vaddr, uint64_t *paddr)
+{
+ int pte_size = ia32 ? sizeof (uint32_t) : sizeof (uint64_t);
+ int off_bits = ia32 ? 10 : 9;
+ boolean_t hugepage = B_FALSE;
+ uint64_t offset;
+ uint64_t off_mask, off_shift;
+
+ if (level < 4 && (pte & X86_PTE_P) == 0) {
+ errno = EFAULT;
+ return (-1);
+ }
+
+ off_shift = X86_PAGE_SHIFT + off_bits * level;
+ off_mask = (1ULL << off_shift) - 1;
+
+ offset = vaddr & off_mask;
+
+ if ((level == 1 || level == 2) && (pte & X86_PTE_PS) != 0) {
+ hugepage = B_TRUE;
+ } else {
+ if (level > 0) {
+ offset >>= off_shift - off_bits;
+ offset <<= X86_PAGE_SHIFT - off_bits;
+ }
+ off_mask = 0xfff;
+ }
+
+ *paddr = (pte & X86_PTE_PHYSMASK & ~off_mask) + offset;
+
+ if (level == 0 || hugepage)
+ return (0);
+
+ pte = 0;
+ if (vmm_pread(vmm, &pte, pte_size, *paddr) != pte_size)
+ return (-1);
+ return (vmm_pte2paddr(vmm, pte, ia32, level - 1, vaddr, paddr));
+}
+
+static vmm_mode_t
+vmm_vcpu_mmu_mode(vmm_t *vmm, int vcpu, vmm_mmu_t *mmu)
+{
+ if ((mmu->vm_cr0 & CR0_PE) == 0)
+ return (VMM_MODE_REAL);
+ else if ((mmu->vm_cr4 & CR4_PAE) == 0)
+ return (VMM_MODE_PROT);
+ else if ((mmu->vm_efer & AMD_EFER_LME) == 0)
+ return (VMM_MODE_PAE);
+ else
+ return (VMM_MODE_LONG);
+}
+
+vmm_mode_t
+vmm_vcpu_mode(vmm_t *vmm, int vcpu)
+{
+ vmm_mmu_t mmu = { 0 };
+
+ if (vmm_get_regset(vmm, vcpu, ARRAY_SIZE(vmm_mmu_regnum),
+ vmm_mmu_regnum, (uint64_t *)&mmu) != 0)
+ return (VMM_MODE_UNKNOWN);
+
+ return (vmm_vcpu_mmu_mode(vmm, vcpu, &mmu));
+}
+
+vmm_isa_t
+vmm_vcpu_isa(vmm_t *vmm, int vcpu)
+{
+ vmm_desc_t cs;
+
+ if (vmm_get_desc(vmm, vcpu, VMM_DESC_CS, &cs) != 0)
+ return (VMM_ISA_UNKNOWN);
+
+ switch (cs.vd_acc & (X86_SEG_BIG | X86_SEG_LONG)) {
+ case 0x0: /* 16b code segment */
+ return (VMM_ISA_16);
+ case X86_SEG_LONG: /* 64b code segment */
+ return (VMM_ISA_64);
+ case X86_SEG_BIG: /* 32b code segment */
+ return (VMM_ISA_32);
+ }
+
+ return (VMM_ISA_UNKNOWN);
+}
+
+/*
+ * vmm_vtol:
+ *
+ * Translate a virtual address to a physical address on a certain vCPU,
+ * using the specified segment register or descriptor according to the mode.
+ *
+ */
+int
+vmm_vtol(vmm_t *vmm, int vcpu, int seg, uint64_t vaddr, uint64_t *laddr)
+{
+ vmm_desc_t desc;
+ uint64_t limit;
+
+ if (vmm_get_desc(vmm, vcpu, seg, &desc) != 0)
+ return (-1);
+
+ switch (vmm_vcpu_mode(vmm, vcpu)) {
+ case VMM_MODE_REAL:
+ if (seg == VMM_DESC_FS || seg == VMM_DESC_GS)
+ goto fault;
+ /* FALLTHRU */
+ case VMM_MODE_PROT:
+ case VMM_MODE_PAE:
+ if ((desc.vd_acc & X86_SEG_USABLE_MASK) != X86_SEG_USABLE)
+ /* unusable, system segment, or not present */
+ goto fault;
+
+ limit = desc.vd_lim;
+ if (desc.vd_acc & X86_SEG_GRANULARITY)
+ limit *= 4096;
+
+ if (vaddr > limit)
+ goto fault;
+ /* FALLTHRU */
+ case VMM_MODE_LONG:
+ *laddr = desc.vd_base + vaddr;
+ return (0);
+
+ default:
+ fault:
+ errno = EFAULT;
+ return (-1);
+ }
+
+}
+
+/*
+ * vmm_vtop:
+ *
+ * Translate a virtual address to a guest physical address on a certain vCPU,
+ * according to the mode the vCPU is in.
+ */
+int
+vmm_vtop(vmm_t *vmm, int vcpu, int seg, uint64_t vaddr, uint64_t *paddr)
+{
+ vmm_mmu_t mmu = { 0 };
+ int ret = 0;
+
+ if (vmm_vtol(vmm, vcpu, seg, vaddr, &vaddr) != 0)
+ return (-1);
+
+ if (vmm_get_regset(vmm, vcpu, ARRAY_SIZE(vmm_mmu_regnum),
+ vmm_mmu_regnum, (uint64_t *)&mmu) != 0)
+ return (-1);
+
+ if ((mmu.vm_cr0 & CR0_PG) == 0) {
+ /* no paging, physical equals virtual */
+ *paddr = vaddr;
+ return (0);
+ }
+
+ switch (vmm_vcpu_mmu_mode(vmm, vcpu, &mmu)) {
+ case VMM_MODE_PROT:
+ /* protected mode, no PAE: 2-level paging, 32bit PTEs */
+ ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_TRUE, 2, vaddr, paddr);
+ break;
+ case VMM_MODE_PAE:
+ /* protected mode with PAE: 3-level paging, 64bit PTEs */
+ ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_FALSE, 3, vaddr, paddr);
+ break;
+ case VMM_MODE_LONG:
+ /* long mode: 4-level paging, 64bit PTEs */
+ ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_FALSE, 4, vaddr, paddr);
+ break;
+ default:
+ ret = -1;
+ }
+
+ return (ret);
+}
+
+ssize_t
+vmm_vread(vmm_t *vmm, int vcpu, int seg, void *buf, size_t len, uintptr_t addr)
+{
+ ssize_t res = 0;
+ uint64_t paddr;
+ size_t plen;
+ uint64_t boundary;
+
+ while (len != 0) {
+ if (vmm_vtop(vmm, vcpu, seg, addr, &paddr) != 0) {
+ errno = EFAULT;
+ return (0);
+ }
+
+ boundary = (addr + X86_PAGE_SIZE) & ~(X86_PAGE_SIZE - 1);
+ if (addr + len > boundary)
+ plen = boundary - addr;
+ else
+ plen = len;
+
+ if (vmm_pread(vmm, buf, plen, paddr) != plen)
+ return (0);
+ len -= plen;
+ addr += plen;
+ buf += plen;
+ res += plen;
+ }
+
+ return (res);
+}
+
+ssize_t
+vmm_vwrite(vmm_t *vmm, int vcpu, int seg, const void *buf, size_t len,
+ uintptr_t addr)
+{
+ ssize_t res = 0;
+ uint64_t paddr;
+ size_t plen;
+ uint64_t boundary;
+
+ while (len != 0) {
+ if (vmm_vtop(vmm, vcpu, seg, addr, &paddr) != 0) {
+ errno = EFAULT;
+ return (0);
+ }
+
+ boundary = (addr + X86_PAGE_SIZE) & ~(X86_PAGE_SIZE - 1);
+ if (addr + len > boundary)
+ plen = boundary - addr;
+ else
+ plen = len;
+
+ if (vmm_pwrite(vmm, buf, plen, paddr) != plen)
+ return (0);
+ len -= plen;
+ addr += plen;
+ buf += plen;
+ res += plen;
+ }
+
+ return (res);
+}