diff options
Diffstat (limited to 'usr/src/lib/libvmm/libvmm.c')
-rw-r--r-- | usr/src/lib/libvmm/libvmm.c | 857 |
1 files changed, 857 insertions, 0 deletions
diff --git a/usr/src/lib/libvmm/libvmm.c b/usr/src/lib/libvmm/libvmm.c new file mode 100644 index 0000000000..8c8e32c9f3 --- /dev/null +++ b/usr/src/lib/libvmm/libvmm.c @@ -0,0 +1,857 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019 Joyent, Inc. + */ + +/* + * Library for native code to access bhyve VMs, without the need to use + * FreeBSD compat headers + */ + +#include <sys/param.h> +#include <sys/list.h> +#include <sys/stddef.h> +#include <sys/mman.h> +#include <sys/kdi_regs.h> +#include <sys/sysmacros.h> +#include <sys/controlregs.h> +#include <sys/note.h> +#include <sys/debug.h> +#include <errno.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <assert.h> + +#include <machine/vmm.h> +#include <vmmapi.h> + +#include <libvmm.h> + +typedef struct vmm_memseg vmm_memseg_t; + +#define VMM_MEMSEG_DEVMEM 0x1 + +struct vmm_memseg { + list_node_t vms_list; + int vms_segid; + int vms_prot; + int vms_flags; + uintptr_t vms_gpa; + off_t vms_segoff; + size_t vms_seglen; + size_t vms_maplen; + char vms_name[64]; +}; + +struct vmm { + struct vmctx *vmm_ctx; + list_t vmm_memlist; + char *vmm_mem; + size_t vmm_memsize; + size_t vmm_ncpu; +}; + + +/* + * This code relies on two assumptions: + * - CPUs are never removed from the "active set", not even when suspended. + * A CPU being active just means that it has been used by the guest OS. + * - The CPU numbering is consecutive. + */ +static void +vmm_update_ncpu(vmm_t *vmm) +{ + cpuset_t cpuset; + + assert(vm_active_cpus(vmm->vmm_ctx, &cpuset) == 0); + + for (vmm->vmm_ncpu = 0; + CPU_ISSET(vmm->vmm_ncpu, &cpuset) == 1; + vmm->vmm_ncpu++) + ; +} + +vmm_t * +vmm_open_vm(const char *name) +{ + vmm_t *vmm = NULL; + + vmm = malloc(sizeof (vmm_t)); + if (vmm == NULL) + return (NULL); + + bzero(vmm, sizeof (vmm_t)); + vmm->vmm_mem = MAP_FAILED; + + list_create(&vmm->vmm_memlist, sizeof (vmm_memseg_t), + offsetof(vmm_memseg_t, vms_list)); + + vmm->vmm_ctx = vm_open(name); + if (vmm->vmm_ctx == NULL) { + free(vmm); + return (NULL); + } + + vmm_update_ncpu(vmm); + + /* + * If we open a VM that has just been created we may see a state + * where it has no CPUs configured yet. We'll just wait for 10ms + * and retry until we get a non-zero CPU count. + */ + if (vmm->vmm_ncpu == 0) { + do { + (void) usleep(10000); + vmm_update_ncpu(vmm); + } while (vmm->vmm_ncpu == 0); + } + + return (vmm); +} + +void +vmm_close_vm(vmm_t *vmm) +{ + vmm_unmap(vmm); + + list_destroy(&vmm->vmm_memlist); + + if (vmm->vmm_ctx != NULL) + vm_close(vmm->vmm_ctx); + + free(vmm); +} + +static vmm_memseg_t * +vmm_get_memseg(vmm_t *vmm, uintptr_t gpa) +{ + vmm_memseg_t ms, *ret; + int error, flags; + + bzero(&ms, sizeof (vmm_memseg_t)); + ms.vms_gpa = gpa; + error = vm_mmap_getnext(vmm->vmm_ctx, &ms.vms_gpa, &ms.vms_segid, + &ms.vms_segoff, &ms.vms_maplen, &ms.vms_prot, &flags); + if (error) + return (NULL); + + error = vm_get_memseg(vmm->vmm_ctx, ms.vms_segid, &ms.vms_seglen, + ms.vms_name, sizeof (ms.vms_name)); + if (error) + return (NULL); + + /* + * Regular memory segments don't have a name, but devmem segments do. + * We can use that information to set the DEVMEM flag if necessary. + */ + ms.vms_flags = ms.vms_name[0] != '\0' ? VMM_MEMSEG_DEVMEM : 0; + + ret = malloc(sizeof (vmm_memseg_t)); + if (ret == NULL) + return (NULL); + + *ret = ms; + + return (ret); +} + +int +vmm_map(vmm_t *vmm, boolean_t writable) +{ + uintptr_t last_gpa = 0; + vmm_memseg_t *ms; + int prot_write = writable ? PROT_WRITE : 0; + + if (vmm->vmm_mem != MAP_FAILED) { + errno = EINVAL; + return (-1); + } + + assert(list_is_empty(&vmm->vmm_memlist)); + + for (;;) { + ms = vmm_get_memseg(vmm, last_gpa); + + if (ms == NULL) + break; + + last_gpa = ms->vms_gpa + ms->vms_maplen; + list_insert_tail(&vmm->vmm_memlist, ms); + } + + vmm->vmm_mem = mmap(NULL, last_gpa, PROT_NONE, + MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, -1, 0); + + if (vmm->vmm_mem == MAP_FAILED) + goto fail; + + for (ms = list_head(&vmm->vmm_memlist); + ms != NULL; + ms = list_next(&vmm->vmm_memlist, ms)) { + off_t mapoff = ms->vms_gpa; + + if ((ms->vms_flags & VMM_MEMSEG_DEVMEM) && + vm_get_devmem_offset(vmm->vmm_ctx, ms->vms_segid, &mapoff) + != 0) + goto fail; + + vmm->vmm_memsize += ms->vms_maplen; + + if (mmap(vmm->vmm_mem + ms->vms_gpa, ms->vms_maplen, + PROT_READ | prot_write, MAP_SHARED | MAP_FIXED, + vm_get_device_fd(vmm->vmm_ctx), mapoff) == MAP_FAILED) + goto fail; + } + + return (0); + +fail: + vmm_unmap(vmm); + + return (-1); +} + +void +vmm_unmap(vmm_t *vmm) +{ + while (!list_is_empty(&vmm->vmm_memlist)) { + vmm_memseg_t *ms = list_remove_head(&vmm->vmm_memlist); + + if (vmm->vmm_mem != MAP_FAILED) + munmap(vmm->vmm_mem + ms->vms_gpa, ms->vms_maplen); + free(ms); + } + + if (vmm->vmm_mem != MAP_FAILED) + munmap(vmm->vmm_mem, vmm->vmm_memsize); + + vmm->vmm_mem = MAP_FAILED; + vmm->vmm_memsize = 0; +} + +ssize_t +vmm_pread(vmm_t *vmm, void *buf, size_t len, uintptr_t addr) +{ + ssize_t count = 0; + vmm_memseg_t *ms; + ssize_t res = len; + + for (ms = list_head(&vmm->vmm_memlist); + ms != NULL && len != 0; + ms = list_next(&vmm->vmm_memlist, ms)) { + + if (addr >= ms->vms_gpa && + addr < ms->vms_gpa + ms->vms_maplen) { + res = (addr + len) - (ms->vms_gpa + ms->vms_maplen); + + if (res < 0) + res = 0; + + bcopy(vmm->vmm_mem + addr, buf, len - res); + count += len - res; + addr += len - res; + len = res; + } + } + + if (res) + errno = EFAULT; + else + errno = 0; + + return (count); +} + +ssize_t +vmm_pwrite(vmm_t *vmm, const void *buf, size_t len, uintptr_t addr) +{ + ssize_t count = 0; + vmm_memseg_t *ms; + ssize_t res = len; + + for (ms = list_head(&vmm->vmm_memlist); + ms != NULL; + ms = list_next(&vmm->vmm_memlist, ms)) { + if (addr >= ms->vms_gpa && + addr < ms->vms_gpa + ms->vms_maplen) { + res = (addr + len) - (ms->vms_gpa + ms->vms_maplen); + + if (res < 0) + res = 0; + + bcopy(buf, vmm->vmm_mem + addr, len - res); + count += len - res; + addr += len - res; + len = res; + } + } + + if (res) + errno = EFAULT; + else + errno = 0; + + return (count); +} + +size_t +vmm_ncpu(vmm_t *vmm) +{ + return (vmm->vmm_ncpu); +} + +size_t +vmm_memsize(vmm_t *vmm) +{ + return (vmm->vmm_memsize); +} + +int +vmm_cont(vmm_t *vmm) +{ + return (vm_resume_cpu(vmm->vmm_ctx, -1)); +} + +int +vmm_step(vmm_t *vmm, int vcpu) +{ + cpuset_t cpuset; + int ret; + + if (vcpu >= vmm->vmm_ncpu) { + errno = EINVAL; + return (-1); + } + + ret = vm_set_capability(vmm->vmm_ctx, vcpu, VM_CAP_MTRAP_EXIT, 1); + if (ret != 0) + return (-1); + + assert(vm_resume_cpu(vmm->vmm_ctx, vcpu) == 0); + + do { + (void) vm_debug_cpus(vmm->vmm_ctx, &cpuset); + } while (!CPU_ISSET(vcpu, &cpuset)); + + (void) vm_set_capability(vmm->vmm_ctx, vcpu, VM_CAP_MTRAP_EXIT, 0); + + return (ret); +} + +int +vmm_stop(vmm_t *vmm) +{ + int ret = vm_suspend_cpu(vmm->vmm_ctx, -1); + + if (ret == 0) + vmm_update_ncpu(vmm); + + return (ret); +} + +/* + * Mapping of KDI-defined registers to vmmapi-defined registers. + * Registers not known to vmmapi use VM_REG_LAST, which is invalid and + * causes an error in vm_{get,set}_register_set(). + * + * This array must be kept in sync with the definitions in kdi_regs.h. + */ +static int vmm_kdi_regmap[] = { + VM_REG_LAST, /* KDIREG_SAVFP */ + VM_REG_LAST, /* KDIREG_SAVPC */ + VM_REG_GUEST_RDI, /* KDIREG_RDI */ + VM_REG_GUEST_RSI, /* KDIREG_RSI */ + VM_REG_GUEST_RDX, /* KDIREG_RDX */ + VM_REG_GUEST_RCX, /* KDIREG_RCX */ + VM_REG_GUEST_R8, /* KDIREG_R8 */ + VM_REG_GUEST_R9, /* KDIREG_R9 */ + VM_REG_GUEST_RAX, /* KDIREG_RAX */ + VM_REG_GUEST_RBX, /* KDIREG_RBX */ + VM_REG_GUEST_RBP, /* KDIREG_RBP */ + VM_REG_GUEST_R10, /* KDIREG_R10 */ + VM_REG_GUEST_R11, /* KDIREG_R11 */ + VM_REG_GUEST_R12, /* KDIREG_R12 */ + VM_REG_GUEST_R13, /* KDIREG_R13 */ + VM_REG_GUEST_R14, /* KDIREG_R14 */ + VM_REG_GUEST_R15, /* KDIREG_R15 */ + VM_REG_LAST, /* KDIREG_FSBASE */ + VM_REG_LAST, /* KDIREG_GSBASE */ + VM_REG_LAST, /* KDIREG_KGSBASE */ + VM_REG_GUEST_CR2, /* KDIREG_CR2 */ + VM_REG_GUEST_CR3, /* KDIREG_CR3 */ + VM_REG_GUEST_DS, /* KDIREG_DS */ + VM_REG_GUEST_ES, /* KDIREG_ES */ + VM_REG_GUEST_FS, /* KDIREG_FS */ + VM_REG_GUEST_GS, /* KDIREG_GS */ + VM_REG_LAST, /* KDIREG_TRAPNO */ + VM_REG_LAST, /* KDIREG_ERR */ + VM_REG_GUEST_RIP, /* KDIREG_RIP */ + VM_REG_GUEST_CS, /* KDIREG_CS */ + VM_REG_GUEST_RFLAGS, /* KDIREG_RFLAGS */ + VM_REG_GUEST_RSP, /* KDIREG_RSP */ + VM_REG_GUEST_SS /* KDIREG_SS */ +}; +CTASSERT(ARRAY_SIZE(vmm_kdi_regmap) == KDIREG_NGREG); + +/* + * Mapping of libvmm-defined registers to vmmapi-defined registers. + * + * This array must be kept in sync with the definitions in libvmm.h + */ +static int vmm_sys_regmap[] = { + VM_REG_GUEST_CR0, /* VMM_REG_CR0 */ + VM_REG_GUEST_CR2, /* VMM_REG_CR2 */ + VM_REG_GUEST_CR3, /* VMM_REG_CR3 */ + VM_REG_GUEST_CR4, /* VMM_REG_CR4 */ + VM_REG_GUEST_DR0, /* VMM_REG_DR0 */ + VM_REG_GUEST_DR1, /* VMM_REG_DR1 */ + VM_REG_GUEST_DR2, /* VMM_REG_DR2 */ + VM_REG_GUEST_DR3, /* VMM_REG_DR3 */ + VM_REG_GUEST_DR6, /* VMM_REG_DR6 */ + VM_REG_GUEST_DR7, /* VMM_REG_DR7 */ + VM_REG_GUEST_EFER, /* VMM_REG_EFER */ + VM_REG_GUEST_PDPTE0, /* VMM_REG_PDPTE0 */ + VM_REG_GUEST_PDPTE1, /* VMM_REG_PDPTE1 */ + VM_REG_GUEST_PDPTE2, /* VMM_REG_PDPTE2 */ + VM_REG_GUEST_PDPTE3, /* VMM_REG_PDPTE3 */ + VM_REG_GUEST_INTR_SHADOW, /* VMM_REG_INTR_SHADOW */ +}; + +/* + * Mapping of libvmm-defined descriptors to vmmapi-defined descriptors. + * + * This array must be kept in sync with the definitions in libvmm.h + */ +static int vmm_descmap[] = { + VM_REG_GUEST_GDTR, + VM_REG_GUEST_LDTR, + VM_REG_GUEST_IDTR, + VM_REG_GUEST_TR, + VM_REG_GUEST_CS, + VM_REG_GUEST_DS, + VM_REG_GUEST_ES, + VM_REG_GUEST_FS, + VM_REG_GUEST_GS, + VM_REG_GUEST_SS +}; + +static int +vmm_mapreg(int reg) +{ + errno = 0; + + if (reg < 0) + goto fail; + + if (reg < KDIREG_NGREG) + return (vmm_kdi_regmap[reg]); + + if (reg >= VMM_REG_OFFSET && + reg < VMM_REG_OFFSET + ARRAY_SIZE(vmm_sys_regmap)) + return (vmm_sys_regmap[reg - VMM_REG_OFFSET]); + +fail: + errno = EINVAL; + return (VM_REG_LAST); +} + +static int +vmm_mapdesc(int desc) +{ + errno = 0; + + if (desc >= VMM_DESC_OFFSET && + desc < VMM_DESC_OFFSET + ARRAY_SIZE(vmm_descmap)) + return (vmm_descmap[desc - VMM_DESC_OFFSET]); + + errno = EINVAL; + return (VM_REG_LAST); +} + +int +vmm_getreg(vmm_t *vmm, int vcpu, int reg, uint64_t *val) +{ + reg = vmm_mapreg(reg); + + if (reg == VM_REG_LAST) + return (-1); + + return (vm_get_register(vmm->vmm_ctx, vcpu, reg, val)); +} + +int +vmm_setreg(vmm_t *vmm, int vcpu, int reg, uint64_t val) +{ + reg = vmm_mapreg(reg); + + if (reg == VM_REG_LAST) + return (-1); + + return (vm_set_register(vmm->vmm_ctx, vcpu, reg, val)); +} + +int +vmm_get_regset(vmm_t *vmm, int vcpu, size_t nregs, const int *regnums, + uint64_t *regvals) +{ + int *vm_regnums; + int i; + int ret = -1; + + vm_regnums = malloc(sizeof (int) * nregs); + if (vm_regnums == NULL) + return (ret); + + for (i = 0; i != nregs; i++) { + vm_regnums[i] = vmm_mapreg(regnums[i]); + if (vm_regnums[i] == VM_REG_LAST) + goto fail; + } + + ret = vm_get_register_set(vmm->vmm_ctx, vcpu, nregs, vm_regnums, + regvals); + +fail: + free(vm_regnums); + return (ret); +} + +int +vmm_set_regset(vmm_t *vmm, int vcpu, size_t nregs, const int *regnums, + uint64_t *regvals) +{ + int *vm_regnums; + int i; + int ret = -1; + + vm_regnums = malloc(sizeof (int) * nregs); + if (vm_regnums == NULL) + return (ret); + + for (i = 0; i != nregs; i++) { + vm_regnums[i] = vmm_mapreg(regnums[i]); + if (vm_regnums[i] == VM_REG_LAST) + goto fail; + } + + ret = vm_set_register_set(vmm->vmm_ctx, vcpu, nregs, vm_regnums, + regvals); + +fail: + free(vm_regnums); + return (ret); +} + +int +vmm_get_desc(vmm_t *vmm, int vcpu, int desc, vmm_desc_t *vd) +{ + desc = vmm_mapdesc(desc); + if (desc == VM_REG_LAST) + return (-1); + + return (vm_get_desc(vmm->vmm_ctx, vcpu, desc, &vd->vd_base, &vd->vd_lim, + &vd->vd_acc)); +} + +int +vmm_set_desc(vmm_t *vmm, int vcpu, int desc, vmm_desc_t *vd) +{ + desc = vmm_mapdesc(desc); + if (desc == VM_REG_LAST) + return (-1); + + return (vm_set_desc(vmm->vmm_ctx, vcpu, desc, vd->vd_base, vd->vd_lim, + vd->vd_acc)); +} + +/* + * Structure to hold MMU state during address translation. + * The contents of vmm_mmu_regnum[] must be kept in sync with this. + */ +typedef struct vmm_mmu { + uint64_t vm_cr0; + uint64_t vm_cr3; + uint64_t vm_cr4; + uint64_t vm_efer; +} vmm_mmu_t; + +static const int vmm_mmu_regnum[] = { + VMM_REG_CR0, + VMM_REG_CR3, + VMM_REG_CR4, + VMM_REG_EFER +}; + +#define X86_PTE_P 0x001ULL +#define X86_PTE_PS 0x080ULL + +#define X86_PTE_PHYSMASK 0x000ffffffffff000ULL +#define X86_PAGE_SHIFT 12 +#define X86_PAGE_SIZE (1ULL << X86_PAGE_SHIFT) + +#define X86_SEG_CODE_DATA (1ULL << 4) +#define X86_SEG_PRESENT (1ULL << 7) +#define X86_SEG_LONG (1ULL << 13) +#define X86_SEG_BIG (1ULL << 14) +#define X86_SEG_GRANULARITY (1ULL << 15) +#define X86_SEG_UNUSABLE (1ULL << 16) + +#define X86_SEG_USABLE (X86_SEG_PRESENT | X86_SEG_CODE_DATA) +#define X86_SEG_USABLE_MASK (X86_SEG_UNUSABLE | X86_SEG_USABLE) + +/* + * vmm_pte2paddr: + * + * Recursively calculate the physical address from a virtual address, + * starting at the given PTE level using the given PTE. + */ +static int +vmm_pte2paddr(vmm_t *vmm, uint64_t pte, boolean_t ia32, int level, + uint64_t vaddr, uint64_t *paddr) +{ + int pte_size = ia32 ? sizeof (uint32_t) : sizeof (uint64_t); + int off_bits = ia32 ? 10 : 9; + boolean_t hugepage = B_FALSE; + uint64_t offset; + uint64_t off_mask, off_shift; + + if (level < 4 && (pte & X86_PTE_P) == 0) { + errno = EFAULT; + return (-1); + } + + off_shift = X86_PAGE_SHIFT + off_bits * level; + off_mask = (1ULL << off_shift) - 1; + + offset = vaddr & off_mask; + + if ((level == 1 || level == 2) && (pte & X86_PTE_PS) != 0) { + hugepage = B_TRUE; + } else { + if (level > 0) { + offset >>= off_shift - off_bits; + offset <<= X86_PAGE_SHIFT - off_bits; + } + off_mask = 0xfff; + } + + *paddr = (pte & X86_PTE_PHYSMASK & ~off_mask) + offset; + + if (level == 0 || hugepage) + return (0); + + pte = 0; + if (vmm_pread(vmm, &pte, pte_size, *paddr) != pte_size) + return (-1); + return (vmm_pte2paddr(vmm, pte, ia32, level - 1, vaddr, paddr)); +} + +static vmm_mode_t +vmm_vcpu_mmu_mode(vmm_t *vmm, int vcpu, vmm_mmu_t *mmu) +{ + if ((mmu->vm_cr0 & CR0_PE) == 0) + return (VMM_MODE_REAL); + else if ((mmu->vm_cr4 & CR4_PAE) == 0) + return (VMM_MODE_PROT); + else if ((mmu->vm_efer & AMD_EFER_LME) == 0) + return (VMM_MODE_PAE); + else + return (VMM_MODE_LONG); +} + +vmm_mode_t +vmm_vcpu_mode(vmm_t *vmm, int vcpu) +{ + vmm_mmu_t mmu = { 0 }; + + if (vmm_get_regset(vmm, vcpu, ARRAY_SIZE(vmm_mmu_regnum), + vmm_mmu_regnum, (uint64_t *)&mmu) != 0) + return (VMM_MODE_UNKNOWN); + + return (vmm_vcpu_mmu_mode(vmm, vcpu, &mmu)); +} + +vmm_isa_t +vmm_vcpu_isa(vmm_t *vmm, int vcpu) +{ + vmm_desc_t cs; + + if (vmm_get_desc(vmm, vcpu, VMM_DESC_CS, &cs) != 0) + return (VMM_ISA_UNKNOWN); + + switch (cs.vd_acc & (X86_SEG_BIG | X86_SEG_LONG)) { + case 0x0: /* 16b code segment */ + return (VMM_ISA_16); + case X86_SEG_LONG: /* 64b code segment */ + return (VMM_ISA_64); + case X86_SEG_BIG: /* 32b code segment */ + return (VMM_ISA_32); + } + + return (VMM_ISA_UNKNOWN); +} + +/* + * vmm_vtol: + * + * Translate a virtual address to a physical address on a certain vCPU, + * using the specified segment register or descriptor according to the mode. + * + */ +int +vmm_vtol(vmm_t *vmm, int vcpu, int seg, uint64_t vaddr, uint64_t *laddr) +{ + vmm_desc_t desc; + uint64_t limit; + + if (vmm_get_desc(vmm, vcpu, seg, &desc) != 0) + return (-1); + + switch (vmm_vcpu_mode(vmm, vcpu)) { + case VMM_MODE_REAL: + if (seg == VMM_DESC_FS || seg == VMM_DESC_GS) + goto fault; + /* FALLTHRU */ + case VMM_MODE_PROT: + case VMM_MODE_PAE: + if ((desc.vd_acc & X86_SEG_USABLE_MASK) != X86_SEG_USABLE) + /* unusable, system segment, or not present */ + goto fault; + + limit = desc.vd_lim; + if (desc.vd_acc & X86_SEG_GRANULARITY) + limit *= 4096; + + if (vaddr > limit) + goto fault; + /* FALLTHRU */ + case VMM_MODE_LONG: + *laddr = desc.vd_base + vaddr; + return (0); + + default: + fault: + errno = EFAULT; + return (-1); + } + +} + +/* + * vmm_vtop: + * + * Translate a virtual address to a guest physical address on a certain vCPU, + * according to the mode the vCPU is in. + */ +int +vmm_vtop(vmm_t *vmm, int vcpu, int seg, uint64_t vaddr, uint64_t *paddr) +{ + vmm_mmu_t mmu = { 0 }; + int ret = 0; + + if (vmm_vtol(vmm, vcpu, seg, vaddr, &vaddr) != 0) + return (-1); + + if (vmm_get_regset(vmm, vcpu, ARRAY_SIZE(vmm_mmu_regnum), + vmm_mmu_regnum, (uint64_t *)&mmu) != 0) + return (-1); + + if ((mmu.vm_cr0 & CR0_PG) == 0) { + /* no paging, physical equals virtual */ + *paddr = vaddr; + return (0); + } + + switch (vmm_vcpu_mmu_mode(vmm, vcpu, &mmu)) { + case VMM_MODE_PROT: + /* protected mode, no PAE: 2-level paging, 32bit PTEs */ + ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_TRUE, 2, vaddr, paddr); + break; + case VMM_MODE_PAE: + /* protected mode with PAE: 3-level paging, 64bit PTEs */ + ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_FALSE, 3, vaddr, paddr); + break; + case VMM_MODE_LONG: + /* long mode: 4-level paging, 64bit PTEs */ + ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_FALSE, 4, vaddr, paddr); + break; + default: + ret = -1; + } + + return (ret); +} + +ssize_t +vmm_vread(vmm_t *vmm, int vcpu, int seg, void *buf, size_t len, uintptr_t addr) +{ + ssize_t res = 0; + uint64_t paddr; + size_t plen; + uint64_t boundary; + + while (len != 0) { + if (vmm_vtop(vmm, vcpu, seg, addr, &paddr) != 0) { + errno = EFAULT; + return (0); + } + + boundary = (addr + X86_PAGE_SIZE) & ~(X86_PAGE_SIZE - 1); + if (addr + len > boundary) + plen = boundary - addr; + else + plen = len; + + if (vmm_pread(vmm, buf, plen, paddr) != plen) + return (0); + len -= plen; + addr += plen; + buf += plen; + res += plen; + } + + return (res); +} + +ssize_t +vmm_vwrite(vmm_t *vmm, int vcpu, int seg, const void *buf, size_t len, + uintptr_t addr) +{ + ssize_t res = 0; + uint64_t paddr; + size_t plen; + uint64_t boundary; + + while (len != 0) { + if (vmm_vtop(vmm, vcpu, seg, addr, &paddr) != 0) { + errno = EFAULT; + return (0); + } + + boundary = (addr + X86_PAGE_SIZE) & ~(X86_PAGE_SIZE - 1); + if (addr + len > boundary) + plen = boundary - addr; + else + plen = len; + + if (vmm_pwrite(vmm, buf, plen, paddr) != plen) + return (0); + len -= plen; + addr += plen; + buf += plen; + res += plen; + } + + return (res); +} |