summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Mustacchi <rm@joyent.com>2011-04-13 18:36:48 -0700
committerRobert Mustacchi <rm@joyent.com>2011-04-13 18:39:28 -0700
commitecb0a5535dd376628aff2fd3f4c9a742dd4ab0b7 (patch)
treef682a8712af5bc946e42ce4c64394e96d2f41f5a
parent69eb07e56c007c711072a478bbcf25e8fd045a52 (diff)
downloadillumos-kvm-ecb0a5535dd376628aff2fd3f4c9a742dd4ab0b7.tar.gz
HVM-52 can't use /dev/mem for mmap
HVM-53 Opening /dev/kvm should create a new minor instance HVM-56 only one vm should be supported per open of /dev/kvm
-rw-r--r--coalesced_mmio.h4
-rw-r--r--kvm.c429
-rw-r--r--kvm.h3
-rw-r--r--kvm_x86.c81
-rw-r--r--kvm_x86host.h5
5 files changed, 304 insertions, 218 deletions
diff --git a/coalesced_mmio.h b/coalesced_mmio.h
index dc16d64..045949b 100644
--- a/coalesced_mmio.h
+++ b/coalesced_mmio.h
@@ -38,7 +38,7 @@ struct kvm_coalesced_mmio_dev {
struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
};
-int kvm_coalesced_mmio_init(struct kvm *kvm, struct kvm_vcpu *vcpu);
+int kvm_coalesced_mmio_init(struct kvm *kvm);
void kvm_coalesced_mmio_free(struct kvm *kvm);
int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *zone);
@@ -48,7 +48,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
#else
-static int kvm_coalesced_mmio_init(struct kvm *kvm, struct kvm_vcpu *vcpu) { return 0; }
+static int kvm_coalesced_mmio_init(struct kvm *kvm) { return 0; }
static void kvm_coalesced_mmio_free(struct kvm *kvm) { }
#endif
diff --git a/kvm.c b/kvm.c
index 027c040..5724637 100644
--- a/kvm.c
+++ b/kvm.c
@@ -19,6 +19,12 @@
#include <sys/spl.h>
#include <sys/cpuvar.h>
#include <sys/segments.h>
+#include <sys/cred.h>
+#include <sys/devops.h>
+#include <sys/file.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/vm.h>
#include "vmx.h"
#include "msr-index.h"
@@ -38,41 +44,24 @@
#undef DEBUG
-int kvmid; /* monotonically increasing, unique per vm */
-
/*
- * Find the first cleared bit in a memory region.
+ * The entire state of the kvm device.
*/
-unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
-{
- const unsigned long *p = addr;
- unsigned long result = 0;
- unsigned long tmp;
+typedef struct {
+ struct kvm *kds_kvmp;
+} kvm_devstate_t;
- while (size & ~(64-1)) {
- if (~(tmp = *(p++)))
- goto found;
- result += 64;
- size -= 64;
- }
- if (!size)
- return result;
+/*
+ * Internal driver-wide values
+ */
+static void *kvm_state; /* DDI state */
+static vmem_t *kvm_minor; /* minor number arena */
+static dev_info_t *kvm_dip; /* global devinfo hanlde */
+static minor_t kvm_base_minor; /* The only minor device that can be opened */
- tmp = (*p) | (~0UL << size);
- if (tmp == ~0UL) /* Are any bits zero? */
- return result + size; /* Nope. */
-found:
- return result + ffz(tmp);
-}
+int kvmid; /* monotonically increasing, unique per vm */
int largepages_enabled = 1;
-
-extern struct kvm *kvm_arch_create_vm(void);
-extern void kvm_arch_destroy_vm(struct kvm *kvmp);
-extern int kvm_arch_hardware_enable(void *garbage);
-extern void kvm_arch_hardware_disable(void *garbage);
-extern long kvm_vm_ioctl(struct kvm *kvmp, unsigned int ioctl, unsigned long arg, int mode);
-
static cpuset_t cpus_hardware_enabled;
static volatile uint32_t hardware_enable_failed;
static int kvm_usage_count;
@@ -81,26 +70,25 @@ kmutex_t kvm_lock;
kmem_cache_t *kvm_cache;
struct vmx_capability vmx_capability;
-/*
- * The entire state of the kvm device.
- */
-typedef struct {
- dev_info_t *dip; /* my devinfo handle */
-} kvm_devstate_t;
/*
- * An opaque handle where the kvm device state lives
+ * Driver forward declarations
*/
-static void *kvm_state;
-
static int kvm_open(dev_t *devp, int flag, int otyp, cred_t *cred);
static int kvm_close(dev_t dev, int flag, int otyp, cred_t *cred);
static int kvm_read(dev_t dev, struct uio *uiop, cred_t *credp);
static int kvm_write(dev_t dev, struct uio *uiop, cred_t *credp);
static int kvm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
- cred_t *cred_p, int *rval_p);
+ cred_t *cred_p, int *rval_p);
static int kvm_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off,
- size_t len, size_t *maplen, uint_t model);
+ size_t len, size_t *maplen, uint_t model);
+static int kvm_segmap(dev_t, off_t, struct as *, caddr_t *, off_t,
+ unsigned int, unsigned int, unsigned int, cred_t *);
+static int kvm_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
+ void **result);
+static int kvm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
+static int kvm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
+
static struct cb_ops kvm_cb_ops = {
kvm_open,
@@ -113,18 +101,12 @@ static struct cb_ops kvm_cb_ops = {
kvm_ioctl,
kvm_devmap,
nodev, /* mmap */
- nodev, /* segmap */
+ kvm_segmap, /* segmap */
nochpoll, /* poll */
ddi_prop_op,
NULL,
- D_NEW | D_MP
+ D_NEW | D_MP | D_DEVMAP
};
-
-static int kvm_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
- void **result);
-static int kvm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
-static int kvm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
-
static struct dev_ops kvm_ops = {
DEVO_REV,
0,
@@ -153,6 +135,11 @@ static struct modlinkage modlinkage = {
0
};
+extern struct kvm *kvm_arch_create_vm(void);
+extern void kvm_arch_destroy_vm(struct kvm *kvmp);
+extern int kvm_arch_hardware_enable(void *garbage);
+extern void kvm_arch_hardware_disable(void *garbage);
+extern long kvm_vm_ioctl(struct kvm *kvmp, unsigned int ioctl, unsigned long arg, int mode);
static void hardware_enable(void *junk);
static void hardware_disable(void *junk);
extern struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, struct kvm_vcpu_ioc *arg,
@@ -179,16 +166,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, uint32_t msr_index, uint64_t data)
static void vmx_vcpu_run(struct kvm_vcpu *vcpu);
static void vmx_save_host_state(struct kvm_vcpu *vcpu);
-struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
-{
-#ifdef XXX_KVM_DOESNTCOMPILE
- return container_of(vcpu, struct vcpu_vmx, vcpu);
-#else
- /* assumes vcpu is first field in vcpu_vmx */
- /* because gcc with kernel flags complains about container_of */
- return (struct vcpu_vmx *)vcpu;
-#endif /*XXX*/
-}
static int vmx_handle_exit(struct kvm_vcpu *vcpu);
int vmx_interrupt_allowed(struct kvm_vcpu *vcpu);
@@ -222,6 +199,42 @@ void vmx_fpu_activate(struct kvm_vcpu *vcpu);
int get_ept_level(void);
static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg);
+struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
+{
+#ifdef XXX_KVM_DOESNTCOMPILE
+ return container_of(vcpu, struct vcpu_vmx, vcpu);
+#else
+ /* assumes vcpu is first field in vcpu_vmx */
+ /* because gcc with kernel flags complains about container_of */
+ return (struct vcpu_vmx *)vcpu;
+#endif /*XXX*/
+}
+
+/*
+ * Find the first cleared bit in a memory region.
+ */
+unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+{
+ const unsigned long *p = addr;
+ unsigned long result = 0;
+ unsigned long tmp;
+
+ while (size & ~(64-1)) {
+ if (~(tmp = *(p++)))
+ goto found;
+ result += 64;
+ size -= 64;
+ }
+ if (!size)
+ return result;
+
+ tmp = (*p) | (~0UL << size);
+ if (tmp == ~0UL) /* Are any bits zero? */
+ return result + size; /* Nope. */
+found:
+ return result + ffz(tmp);
+}
+
static inline void __invvpid(int ext, uint16_t vpid, gva_t gva)
{
struct {
@@ -3151,68 +3164,52 @@ _info(struct modinfo *modinfop)
static int
kvm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
- int instance;
- kvm_devstate_t *rsp;
-
- switch (cmd) {
-
- case DDI_ATTACH:
+ minor_t instance;
- instance = ddi_get_instance(dip);
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
- if (ddi_soft_state_zalloc(kvm_state, instance) != DDI_SUCCESS) {
- cmn_err(CE_CONT, "%s%d: can't allocate state\n",
- ddi_get_name(dip), instance);
- return (DDI_FAILURE);
- } else
- rsp = ddi_get_soft_state(kvm_state, instance);
+ if (kvm_dip != NULL)
+ return (DDI_FAILURE);
- kvm_cache = kmem_cache_create("kvm_cache", KVM_VM_DATA_SIZE,
- ptob(1), NULL, NULL, NULL, NULL, NULL, 0);
- list_create(&vm_list, sizeof(struct kvm), offsetof(struct kvm, vm_list));
- if (ddi_create_minor_node(dip, "kvm", S_IFCHR,
- instance, DDI_PSEUDO, 0) == DDI_FAILURE) {
- ddi_remove_minor_node(dip, NULL);
- goto attach_failed;
- }
+ instance = ddi_get_instance(dip);
+ if (ddi_create_minor_node(dip, "kvm", S_IFCHR, instance, DDI_PSEUDO, 0)
+ == DDI_FAILURE)
+ return (DDI_FAILURE);
- rsp->dip = dip;
- ddi_report_dev(dip);
+ kvm_dip = dip;
+ kvm_base_minor = instance;
- return (DDI_SUCCESS);
+ kvm_cache = kmem_cache_create("kvm_cache", KVM_VM_DATA_SIZE,
+ ptob(1), NULL, NULL, NULL, NULL, NULL, 0);
+ list_create(&vm_list, sizeof(struct kvm), offsetof(struct kvm, vm_list));
+ kvm_minor = vmem_create("kvm_minor", (void *)1, UINT32_MAX - 1, 1,
+ NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
- default:
- return (DDI_FAILURE);
- }
+ ddi_report_dev(dip);
-attach_failed:
- if (kvm_cache)
- kmem_cache_destroy(kvm_cache);
- (void) kvm_detach(dip, DDI_DETACH);
- return (DDI_FAILURE);
+ return (DDI_SUCCESS);
}
static int
kvm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
int instance;
- register kvm_devstate_t *rsp;
- return (EBUSY);
-
- switch (cmd) {
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
- case DDI_DETACH:
- ddi_prop_remove_all(dip);
- instance = ddi_get_instance(dip);
- rsp = ddi_get_soft_state(kvm_state, instance);
- ddi_remove_minor_node(dip, NULL);
- ddi_soft_state_free(kvm_state, instance);
- return (DDI_SUCCESS);
+ VERIFY(kvm_dip != NULL && kvm_dip == dip);
+ instance = ddi_get_instance(dip);
+ VERIFY(instance == kvm_base_minor);
+ ddi_prop_remove_all(dip);
+ ddi_remove_minor_node(dip, NULL);
+ kmem_cache_destroy(kvm_cache);
+ list_destroy(&vm_list);
+ vmem_destroy(kvm_minor);
+ kvm_dip = NULL;
- default:
- return (DDI_FAILURE);
- }
+ return (DDI_SUCCESS);
}
/*ARGSUSED*/
@@ -3224,12 +3221,7 @@ kvm_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
switch (infocmd) {
case DDI_INFO_DEVT2DEVINFO:
- if ((rsp = ddi_get_soft_state(kvm_state,
- getminor((dev_t)arg))) != NULL) {
- *result = rsp->dip;
- error = DDI_SUCCESS;
- } else
- *result = NULL;
+ *result = kvm_dip;
break;
case DDI_INFO_DEVT2INSTANCE:
@@ -3247,14 +3239,41 @@ kvm_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
/*ARGSUSED*/
static int
-kvm_open(dev_t *devp, int flag, int otyp, cred_t *cred)
+kvm_open(dev_t *devp, int flag, int otype, cred_t *credp)
{
- if (otyp != OTYP_BLK && otyp != OTYP_CHR)
+
+ minor_t minor;
+ kvm_devstate_t *ksp;
+
+ if (flag & FEXCL || flag & FNDELAY)
+ return (EINVAL);
+
+ if (otype != OTYP_CHR)
+ return (EINVAL);
+
+ /*
+ * XXX This should be its own privilage
+ */
+ if (drv_priv(credp) != 0)
+ return (EPERM);
+
+ if (!(flag & FREAD && flag & FWRITE))
return (EINVAL);
- if (ddi_get_soft_state(kvm_state, getminor(*devp)) == NULL)
+ if (getminor(*devp) != kvm_base_minor)
return (ENXIO);
+ minor = (minor_t)(uintptr_t)vmem_alloc(kvm_minor, 1, VM_BESTFIT | VM_SLEEP);
+
+ if (ddi_soft_state_zalloc(kvm_state, minor) != 0) {
+ vmem_free(kvm_minor, (void *)(uintptr_t)minor, 1);
+ return (ENXIO);
+ }
+
+ *devp = makedevice(getmajor(*devp), minor);
+ ksp = ddi_get_soft_state(kvm_state, minor);
+ VERIFY(ksp != NULL);
+
return (0);
}
@@ -3262,6 +3281,19 @@ kvm_open(dev_t *devp, int flag, int otyp, cred_t *cred)
static int
kvm_close(dev_t dev, int flag, int otyp, cred_t *cred)
{
+ kvm_devstate_t *ksp;
+ minor_t minor = getminor(dev);
+
+ VERIFY(getminor(dev) != kvm_base_minor);
+ ksp = ddi_get_soft_state(kvm_state, minor);
+ /*
+ * XXX We need to clean up the vcpus / kvm structs we allocated.
+ */
+ if (ksp->kds_kvmp != NULL)
+ list_remove(&vm_list, ksp->kds_kvmp);
+ ddi_soft_state_free(kvm_state, minor);
+ vmem_free(kvm_minor, (void *)(uintptr_t)minor, 1);
+
return (0);
}
@@ -3639,26 +3671,26 @@ kvm_create_vm(void)
kvmp->users_count = 1;
list_insert_tail(&vm_list, kvmp);
mutex_exit(&kvm_lock);
-#ifdef KVM_MOVED
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
kvm_coalesced_mmio_init(kvmp);
#endif
-#endif /*KVM_MOVED*/
return (kvmp);
}
static int
-kvm_dev_ioctl_create_vm(intptr_t arg, int mode, int *rval_p)
+kvm_dev_ioctl_create_vm(kvm_devstate_t *ksp, intptr_t arg, int mode,
+ int *rval_p)
{
- struct kvm *kvmp;
+ if (ksp->kds_kvmp != NULL)
+ return (EINVAL);
- kvmp = kvm_create_vm();
- if (kvmp == NULL) {
+ ksp->kds_kvmp = kvm_create_vm();
+ if (ksp->kds_kvmp == NULL) {
cmn_err(CE_WARN, "Could not create new vm\n");
return (EIO);
}
- *rval_p = kvmp->kvmid;
+ *rval_p = ksp->kds_kvmp->kvmid;
return (DDI_SUCCESS);
}
@@ -4167,8 +4199,7 @@ find_kvm_id(int id)
return (kvmp);
}
-extern int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, uint32_t id,
- struct kvm_vcpu_ioc *kvm_vcpu, int *rval_p);
+extern int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, uint32_t id, int *rval_p);
static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
@@ -13629,6 +13660,14 @@ static int
kvm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred_p, int *rval_p)
{
int rval = DDI_SUCCESS;
+ minor_t minor;
+ kvm_devstate_t *ksp;
+
+ minor = getminor(dev);
+ ksp = ddi_get_soft_state(kvm_state, minor);
+ if (ksp == NULL)
+ return (ENXIO);
+
union {
struct kvm_pit_state ps;
struct kvm_pit_state2 ps2;
@@ -13648,7 +13687,7 @@ kvm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred_p, int *rval_
*rval_p = KVM_API_VERSION;
break;
case KVM_CREATE_VM:
- rval = kvm_dev_ioctl_create_vm(arg, mode, rval_p);
+ rval = kvm_dev_ioctl_create_vm(ksp, arg, mode, rval_p);
break;
case KVM_CREATE_PIT:
{
@@ -14004,39 +14043,14 @@ kvm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred_p, int *rval_
}
case KVM_CREATE_VCPU: {
- struct kvm_vcpu_ioc *kvm_vcpu;
- struct kvm *kvmp;
+ uint32_t id;
- if ((kvm_vcpu = kmem_zalloc(sizeof(struct kvm_vcpu_ioc), KM_SLEEP)) == NULL) {
- rval = ENOMEM;
- break;
- }
-
- if (ddi_copyin((const void *)arg, kvm_vcpu,
- sizeof(struct kvm_vcpu_ioc), mode) != 0) {
- rval = EFAULT;
- kmem_free(kvm_vcpu, sizeof(struct kvm_vcpu_ioc));
- break;
- }
+ id = (uintptr_t)arg;
- rval = EINVAL;
- kvmp = find_kvm_id(kvm_vcpu->kvmid);
- if (kvmp == NULL) {
- kmem_free(kvm_vcpu, sizeof(struct kvm_vcpu_ioc));
- break;
- }
-
- rval = kvm_vm_ioctl_create_vcpu(kvmp, kvm_vcpu->id, kvm_vcpu, rval_p);
- if (rval != 0) {
+ rval = kvm_vm_ioctl_create_vcpu(ksp->kds_kvmp, id, rval_p);
+ if (rval != 0)
rval = EINVAL;
- kmem_free(kvm_vcpu, sizeof(struct kvm_vcpu_ioc));
- break;
- }
- if (ddi_copyout(kvm_vcpu, (void *)arg,
- sizeof(struct kvm_vcpu_ioc), mode) != 0)
- rval = EFAULT;
- kmem_free(kvm_vcpu, sizeof(struct kvm_vcpu_ioc));
break;
}
@@ -14564,7 +14578,7 @@ kvm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred_p, int *rval_
rval = EINVAL;
break;
}
- *rval_p = ptob(3); /*XXX initially 1*/
+ *rval_p = ptob(KVM_VCPU_MMAP_LENGTH);
break;
case KVM_SET_TSS_ADDR:
{
@@ -14872,9 +14886,112 @@ kvm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred_p, int *rval_
return (rval);
}
+/*
+ * mmap(2), segmap(9E), and devmap(9E)
+ *
+ * Users call mmap(2). For each call to mmap(2) there is a corresponding call to
+ * segmap(9E). segmap(9E) is responsible for making sure that the various
+ * requests in the mmap call make sense from the question of protection,
+ * offsets, lengths, etc. It then ends by calling the ddi_devmap_segmap() which
+ * is what is responsible for making all of the actual mappings.
+ *
+ * The devmap entry point is called a variable number of times. It is called a
+ * number of times until all the maplen values equal the original length of the
+ * requested mapping. This allows us to make several different mappings by not
+ * honoring the full requested mapping the first time. Each subsequent time it
+ * is called with an updated offset and length.
+ */
+
+
+/*
+ * We can only create one mapping per dhp. We know whether this is the first
+ * time or the second time in based on the requested offset / length. If we only
+ * have one page worth, then it's always looking for the shared mmio page. If it
+ * is asking for KVM_VCPU_MMAP_LENGTH pages, then it's asking for the shared
+ * vcpu pages.
+ */
static int
-kvm_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off,
- size_t len, size_t *maplen, uint_t model)
+kvm_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
+ size_t *maplen, uint_t model)
{
- return (ENOTSUP);
+ int res, vpi;
+ minor_t instance;
+ kvm_devstate_t *ksp;
+ kvm_vcpu_t *vcpu;
+
+ instance = getminor(dev);
+ ksp = ddi_get_soft_state(kvm_state, instance);
+ if (ksp == NULL)
+ return (ENXIO);
+
+ /*
+ * Enforce that only 64-bit guests are allowed.
+ */
+ if (ddi_model_convert_from(model) == DDI_MODEL_ILP32)
+ return (EINVAL);
+
+ if (ksp->kds_kvmp == NULL)
+ return (EINVAL);
+
+ if (len == PAGESIZE) {
+ res = devmap_umem_setup(dhp, kvm_dip, NULL,
+ ksp->kds_kvmp->mmio_cookie, 0, len, PROT_READ | PROT_WRITE |
+ PROT_USER, DEVMAP_DEFAULTS, NULL);
+ *maplen = len;
+ return (res);
+ }
+
+ vpi = btop(off) / 3;
+ VERIFY(vpi < ksp->kds_kvmp->online_vcpus);
+ vcpu = ksp->kds_kvmp->vcpus[vpi];
+ VERIFY(vcpu != NULL);
+
+ res = devmap_umem_setup(dhp, kvm_dip, NULL, vcpu->cookie, 0,
+ PAGESIZE*2, PROT_READ | PROT_WRITE | PROT_USER, DEVMAP_DEFAULTS,
+ NULL);
+
+ *maplen = PAGESIZE*2;
+
+ return (res);
+}
+
+/*
+ * We determine which vcpu we're trying to mmap in based upon the requested
+ * offset. For a given vcpu n the offset to specify it is
+ * n*KVM_VCPU_MMAP_LENGTH. Thus the first vcpu is at offset 0.
+ */
+static int
+kvm_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp, off_t len,
+ unsigned int prot, unsigned int maxprot, unsigned int flags,
+ cred_t *credp)
+{
+ kvm_devstate_t *ksp;
+ off_t poff;
+
+ if ((ksp = ddi_get_soft_state(kvm_state, getminor(dev))) == NULL)
+ return (ENXIO);
+
+ if (prot & PROT_EXEC)
+ return (EINVAL);
+
+ if (!(prot & PROT_USER))
+ return (EINVAL);
+
+ if (len != ptob(KVM_VCPU_MMAP_LENGTH))
+ return (EINVAL);
+
+ poff = btop(off);
+ if (poff % 3 != 0)
+ return (EINVAL);
+
+ /*
+ * Currently vcpus can only be turned on, they cannot be offlined. As a
+ * result we can safely check that we have a request for a valid cpu
+ * because it is within this range.
+ */
+ if (poff / 3 + 1 > ksp->kds_kvmp->online_vcpus)
+ return (EINVAL);
+
+ return (ddi_devmap_segmap(dev, off, asp, addrp, len, prot, maxprot,
+ flags, credp));
}
diff --git a/kvm.h b/kvm.h
index 9d02fd3..3869897 100644
--- a/kvm.h
+++ b/kvm.h
@@ -404,7 +404,9 @@ struct kvm_vcpu {
/*#endif*/
struct kvm_vcpu_arch arch;
+ ddi_umem_cookie_t cookie;
};
+typedef struct kvm_vcpu kvm_vcpu_t;
#define KVM_NR_SHARED_MSRS 16
@@ -1223,6 +1225,7 @@ struct kvm {
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
struct kvm_coalesced_mmio_dev *coalesced_mmio_dev;
struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
+ ddi_umem_cookie_t mmio_cookie;
#endif
kmutex_t irq_lock;
diff --git a/kvm_x86.c b/kvm_x86.c
index 4f31c82..d56dcb8 100644
--- a/kvm_x86.c
+++ b/kvm_x86.c
@@ -1471,11 +1471,9 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = {
};
int
-kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, struct kvm_vcpu_ioc *arg, unsigned id)
+kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
{
- page_t *page;
int r;
- caddr_t kvm_run;
mutex_init(&vcpu->mutex, NULL, MUTEX_DRIVER, 0);
vcpu->cpu = -1;
@@ -1484,44 +1482,17 @@ kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, struct kvm_vcpu_ioc *arg,
#ifdef NOTNOW
init_waitqueue_head(&vcpu->wq);
#endif
- page = alloc_page(PAGESIZE*3, KM_SLEEP);
- if (!page) {
- r = ENOMEM;
- goto fail;
- }
- vcpu->run = (struct kvm_run *)page_address(page);
- kvm_run = (caddr_t)vcpu->run;
-
- arg->kvm_run_addr =
- (hat_getpfnum(kas.a_hat, kvm_run) << PAGESHIFT) |
- ((uint64_t)kvm_run & PAGEOFFSET);
-
- vcpu->run->xxx_paddrs.xxx_pio_paddr =
- hat_getpfnum(kas.a_hat, kvm_run + PAGESIZE) << PAGESHIFT;
-
- vcpu->run->xxx_paddrs.xxx_mmio_paddr =
- hat_getpfnum(kas.a_hat, kvm_run + (2 * PAGESIZE)) << PAGESHIFT;
-
- arg->kvm_vcpu_addr = (uint64_t)vcpu;
+ vcpu->run = ddi_umem_alloc(PAGESIZE * 2, DDI_UMEM_SLEEP, &vcpu->cookie);
r = kvm_arch_vcpu_init(vcpu);
- if (r != 0)
- goto fail_free_run;
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET /*XXX moved */
- kvm_coalesced_mmio_init(kvm, vcpu);
-#endif
- return 0;
+ if (r != 0) {
+ vcpu->run = NULL;
+ ddi_umem_free(&vcpu->cookie);
+ return (r);
+ }
-fail_free_run:
-#ifdef XXX
- free_page((unsigned long)vcpu->run);
-#else
- XXX_KVM_PROBE;
-#endif /*XXX*/
- vcpu->run = 0;
-fail:
- return r;
+ return (0);
}
/*
@@ -1662,7 +1633,7 @@ out:
}
struct kvm_vcpu *
-vmx_create_vcpu(struct kvm *kvm, struct kvm_vcpu_ioc *arg, unsigned int id)
+vmx_create_vcpu(struct kvm *kvm, unsigned int id)
{
int err;
struct vcpu_vmx *vmx = kmem_cache_alloc(kvm_vcpu_cache, KM_SLEEP);
@@ -1672,7 +1643,7 @@ vmx_create_vcpu(struct kvm *kvm, struct kvm_vcpu_ioc *arg, unsigned int id)
return NULL;
allocate_vpid(vmx);
- err = kvm_vcpu_init(&vmx->vcpu, kvm, arg, id);
+ err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
if (err) {
#ifdef NOTNOW
goto free_vcpu;
@@ -1741,12 +1712,12 @@ free_vcpu:
}
struct kvm_vcpu *
-kvm_arch_vcpu_create(struct kvm *kvm, struct kvm_vcpu_ioc *arg, unsigned int id)
+kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
/* for right now, assume always on x86 */
/* later, if needed, we'll add something here */
/* to call architecture dependent routine */
- return vmx_create_vcpu(kvm, arg, id);
+ return vmx_create_vcpu(kvm, id);
}
void update_exception_bitmap(struct kvm_vcpu *vcpu)
@@ -4034,12 +4005,12 @@ void kvm_get_kvm(struct kvm *kvm)
* Creates some virtual cpus. Good luck creating more than one.
*/
int
-kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int32_t id, struct kvm_vcpu_ioc *arg, int *rval_p)
+kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int32_t id, int *rval_p)
{
int r;
struct kvm_vcpu *vcpu, *v;
- vcpu = kvm_arch_vcpu_create(kvm, arg, id);
+ vcpu = kvm_arch_vcpu_create(kvm, id);
if (vcpu == NULL)
return EINVAL;
@@ -4352,24 +4323,18 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this)
kmem_free(dev, sizeof(struct kvm_coalesced_mmio_dev));
}
-int kvm_coalesced_mmio_init(struct kvm *kvm, struct kvm_vcpu *vcpu)
+int
+kvm_coalesced_mmio_init(struct kvm *kvm)
{
struct kvm_coalesced_mmio_dev *dev;
page_t *page;
int ret;
- /*
- ret = -ENOMEM;
- page = alloc_page(PAGESIZE, KM_SLEEP);
- if (!page)
- goto out_err;
- kvm->coalesced_mmio_ring = (struct kvm_coalesced_mmio_ring *)page_address(page);
- */
- if (!kvm->coalesced_mmio_ring)
- kvm->coalesced_mmio_ring = (struct kvm_coalesced_mmio_ring *)((caddr_t)vcpu->run + (KVM_COALESCED_MMIO_PAGE_OFFSET*PAGESIZE));
+ kvm->coalesced_mmio_ring = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP,
+ &kvm->mmio_cookie);
ret = -ENOMEM;
- dev = kmem_zalloc(sizeof(struct kvm_coalesced_mmio_dev), KM_SLEEP);
+ dev = kmem_zalloc(sizeof (struct kvm_coalesced_mmio_dev), KM_SLEEP);
if (!dev)
goto out_free_page;
mutex_init(&dev->lock, NULL, MUTEX_DRIVER, 0);
@@ -4383,18 +4348,18 @@ int kvm_coalesced_mmio_init(struct kvm *kvm, struct kvm_vcpu *vcpu)
if (ret < 0)
goto out_free_dev;
- return ret;
+ return (ret);
out_free_dev:
- kmem_free(dev, sizeof(struct kvm_coalesced_mmio_dev));
+ kmem_free(dev, sizeof (struct kvm_coalesced_mmio_dev));
out_free_page:
#ifdef XXX
kmem_free(page, PAGESIZE);
#else
XXX_KVM_PROBE;
#endif /*XXX*/
-out_err:
- return ret;
+ ddi_umem_free(kvm->mmio_cookie);
+ return (ret);
}
void kvm_coalesced_mmio_free(struct kvm *kvm)
diff --git a/kvm_x86host.h b/kvm_x86host.h
index 4b65f4b..d40f573 100644
--- a/kvm_x86host.h
+++ b/kvm_x86host.h
@@ -31,8 +31,9 @@
#include "kvm_types.h"
#include "msr.h"
-#define KVM_PIO_PAGE_OFFSET 1
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+#define KVM_PIO_PAGE_OFFSET 1
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+#define KVM_VCPU_MMAP_LENGTH 3
#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
#define CR3_NONPAE_RESERVED_BITS ((PAGESIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))