summaryrefslogtreecommitdiff
path: root/linux/x86/vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'linux/x86/vmx.c')
-rw-r--r--linux/x86/vmx.c1012
1 files changed, 394 insertions, 618 deletions
diff --git a/linux/x86/vmx.c b/linux/x86/vmx.c
index ed4cf97..e29837e 100644
--- a/linux/x86/vmx.c
+++ b/linux/x86/vmx.c
@@ -45,7 +45,6 @@
* machines without emulation or binary translation.
*
* Copyright (C) 2006 Qumranet, Inc.
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* Authors:
* Avi Kivity <avi@qumranet.com>
@@ -68,7 +67,6 @@
#include <linux/moduleparam.h>
#include <linux/ftrace_event.h>
#include <linux/slab.h>
-#include <linux/tboot.h>
#include "kvm_cache_regs.h"
#include "x86.h"
@@ -77,14 +75,12 @@
#include <asm/vmx.h>
#include <asm/virtext.h>
#include <asm/mce.h>
-#include <asm/i387.h>
-#include <asm/xcr.h>
#include "trace.h"
#define __ex(x) __kvm_handle_fault_on_reboot(x)
-MODULE_INFO(version, "kvm-kmod-2.6.38-rc7");
+MODULE_INFO(version, "kvm-kmod-2.6.34");
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
@@ -107,12 +103,6 @@ module_param_named(unrestricted_guest,
static int __read_mostly emulate_invalid_guest_state = 0;
module_param(emulate_invalid_guest_state, bool, S_IRUGO);
-static int __read_mostly vmm_exclusive = 1;
-module_param(vmm_exclusive, bool, S_IRUGO);
-
-static int __read_mostly yield_on_hlt = 1;
-module_param(yield_on_hlt, bool, S_IRUGO);
-
#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
(X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
#define KVM_GUEST_CR0_MASK \
@@ -149,8 +139,6 @@ module_param(ple_gap, int, S_IRUGO);
static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
module_param(ple_window, int, S_IRUGO);
-#define NR_AUTOLOAD_MSRS 1
-
struct vmcs {
u32 revision_id;
u32 abort;
@@ -169,7 +157,6 @@ struct vcpu_vmx {
unsigned long host_rsp;
int launched;
u8 fail;
- u32 exit_intr_info;
u32 idt_vectoring_info;
struct shared_msr_entry *guest_msrs;
int nmsrs;
@@ -179,11 +166,6 @@ struct vcpu_vmx {
u64 msr_guest_kernel_gs_base;
#endif
struct vmcs *vmcs;
- struct msr_autoload {
- unsigned nr;
- struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
- struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
- } msr_autoload;
struct {
int loaded;
u16 fs_sel, gs_sel, ldt_sel;
@@ -199,6 +181,11 @@ struct vcpu_vmx {
u32 limit;
u32 ar;
} tr, es, ds, fs, gs;
+ struct {
+ bool pending;
+ u8 vector;
+ unsigned rip;
+ } irq;
} rmode;
int vpid;
bool emulation_required;
@@ -219,22 +206,16 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
static int init_rmode(struct kvm *kvm);
static u64 construct_eptp(unsigned long root_hpa);
-static void kvm_cpu_vmxon(u64 addr);
-static void kvm_cpu_vmxoff(void);
-static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu);
-static DEFINE_PER_CPU(struct kvm_desc_ptr, host_gdt);
static unsigned long *vmx_io_bitmap_a;
static unsigned long *vmx_io_bitmap_b;
static unsigned long *vmx_msr_bitmap_legacy;
static unsigned long *vmx_msr_bitmap_longmode;
-static bool cpu_has_load_ia32_efer;
-
static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -283,67 +264,67 @@ static u64 host_efer;
static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
/*
- * Keep MSR_STAR at the end, as setup_msrs() will try to optimize it
+ * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it
* away by decrementing the array size.
*/
static const u32 vmx_msr_index[] = {
#ifdef CONFIG_X86_64
MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
#endif
- MSR_EFER, MSR_TSC_AUX, MSR_STAR,
+ MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR,
};
#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
-static inline bool is_page_fault(u32 intr_info)
+static inline int is_page_fault(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
(INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
}
-static inline bool is_no_device(u32 intr_info)
+static inline int is_no_device(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
(INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
}
-static inline bool is_invalid_opcode(u32 intr_info)
+static inline int is_invalid_opcode(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
(INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
}
-static inline bool is_external_interrupt(u32 intr_info)
+static inline int is_external_interrupt(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
== (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
}
-static inline bool is_machine_check(u32 intr_info)
+static inline int is_machine_check(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
(INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
}
-static inline bool cpu_has_vmx_msr_bitmap(void)
+static inline int cpu_has_vmx_msr_bitmap(void)
{
return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
}
-static inline bool cpu_has_vmx_tpr_shadow(void)
+static inline int cpu_has_vmx_tpr_shadow(void)
{
return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW;
}
-static inline bool vm_need_tpr_shadow(struct kvm *kvm)
+static inline int vm_need_tpr_shadow(struct kvm *kvm)
{
return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm));
}
-static inline bool cpu_has_secondary_exec_ctrls(void)
+static inline int cpu_has_secondary_exec_ctrls(void)
{
return vmcs_config.cpu_based_exec_ctrl &
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -363,105 +344,84 @@ static inline bool cpu_has_vmx_flexpriority(void)
static inline bool cpu_has_vmx_ept_execute_only(void)
{
- return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT;
+ return !!(vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT);
}
static inline bool cpu_has_vmx_eptp_uncacheable(void)
{
- return vmx_capability.ept & VMX_EPTP_UC_BIT;
+ return !!(vmx_capability.ept & VMX_EPTP_UC_BIT);
}
static inline bool cpu_has_vmx_eptp_writeback(void)
{
- return vmx_capability.ept & VMX_EPTP_WB_BIT;
+ return !!(vmx_capability.ept & VMX_EPTP_WB_BIT);
}
static inline bool cpu_has_vmx_ept_2m_page(void)
{
- return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT;
+ return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT);
}
static inline bool cpu_has_vmx_ept_1g_page(void)
{
- return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT;
-}
-
-static inline bool cpu_has_vmx_ept_4levels(void)
-{
- return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
-}
-
-static inline bool cpu_has_vmx_invept_individual_addr(void)
-{
- return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT;
+ return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT);
}
-static inline bool cpu_has_vmx_invept_context(void)
+static inline int cpu_has_vmx_invept_individual_addr(void)
{
- return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
+ return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT);
}
-static inline bool cpu_has_vmx_invept_global(void)
+static inline int cpu_has_vmx_invept_context(void)
{
- return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
+ return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT);
}
-static inline bool cpu_has_vmx_invvpid_single(void)
+static inline int cpu_has_vmx_invept_global(void)
{
- return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
+ return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT);
}
-static inline bool cpu_has_vmx_invvpid_global(void)
-{
- return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
-}
-
-static inline bool cpu_has_vmx_ept(void)
+static inline int cpu_has_vmx_ept(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_ENABLE_EPT;
}
-static inline bool cpu_has_vmx_unrestricted_guest(void)
+static inline int cpu_has_vmx_unrestricted_guest(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_UNRESTRICTED_GUEST;
}
-static inline bool cpu_has_vmx_ple(void)
+static inline int cpu_has_vmx_ple(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_PAUSE_LOOP_EXITING;
}
-static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm)
+static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
{
return flexpriority_enabled && irqchip_in_kernel(kvm);
}
-static inline bool cpu_has_vmx_vpid(void)
+static inline int cpu_has_vmx_vpid(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_ENABLE_VPID;
}
-static inline bool cpu_has_vmx_rdtscp(void)
+static inline int cpu_has_vmx_rdtscp(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_RDTSCP;
}
-static inline bool cpu_has_virtual_nmis(void)
+static inline int cpu_has_virtual_nmis(void)
{
return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
}
-static inline bool cpu_has_vmx_wbinvd_exit(void)
-{
- return vmcs_config.cpu_based_2nd_exec_ctrl &
- SECONDARY_EXEC_WBINVD_EXITING;
-}
-
static inline bool report_flexpriority(void)
{
return flexpriority_enabled;
@@ -519,26 +479,13 @@ static void vmcs_clear(struct vmcs *vmcs)
u8 error;
asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0"
- : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
+ : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
: "cc", "memory");
if (error)
printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n",
vmcs, phys_addr);
}
-static void vmcs_load(struct vmcs *vmcs)
-{
- u64 phys_addr = __pa(vmcs);
- u8 error;
-
- asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
- : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
- : "cc", "memory");
- if (error)
- printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
- vmcs, phys_addr);
-}
-
static void __vcpu_clear(void *arg)
{
struct vcpu_vmx *vmx = arg;
@@ -548,6 +495,7 @@ static void __vcpu_clear(void *arg)
vmcs_clear(vmx->vmcs);
if (per_cpu(current_vmcs, cpu) == vmx->vmcs)
per_cpu(current_vmcs, cpu) = NULL;
+ rdtscll(vmx->vcpu.arch.host_tsc);
list_del(&vmx->local_vcpus_link);
vmx->vcpu.cpu = -1;
vmx->launched = 0;
@@ -560,27 +508,12 @@ static void vcpu_clear(struct vcpu_vmx *vmx)
smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1);
}
-static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx)
+static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
{
if (vmx->vpid == 0)
return;
- if (cpu_has_vmx_invvpid_single())
- __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
-}
-
-static inline void vpid_sync_vcpu_global(void)
-{
- if (cpu_has_vmx_invvpid_global())
- __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0);
-}
-
-static inline void vpid_sync_context(struct vcpu_vmx *vmx)
-{
- if (cpu_has_vmx_invvpid_single())
- vpid_sync_vcpu_single(vmx);
- else
- vpid_sync_vcpu_global();
+ __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
}
static inline void ept_sync_global(void)
@@ -612,10 +545,10 @@ static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
static unsigned long vmcs_readl(unsigned long field)
{
- unsigned long value = 0;
+ unsigned long value;
asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX)
- : "+a"(value) : "d"(field) : "cc");
+ : "=a"(value) : "d"(field) : "cc");
return value;
}
@@ -703,69 +636,16 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
vmcs_write32(EXCEPTION_BITMAP, eb);
}
-static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
-{
- unsigned i;
- struct msr_autoload *m = &vmx->msr_autoload;
-
- if (msr == MSR_EFER && cpu_has_load_ia32_efer) {
- vmcs_clear_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER);
- vmcs_clear_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER);
- return;
- }
-
- for (i = 0; i < m->nr; ++i)
- if (m->guest[i].index == msr)
- break;
-
- if (i == m->nr)
- return;
- --m->nr;
- m->guest[i] = m->guest[m->nr];
- m->host[i] = m->host[m->nr];
- vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
- vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
-}
-
-static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
- u64 guest_val, u64 host_val)
-{
- unsigned i;
- struct msr_autoload *m = &vmx->msr_autoload;
-
- if (msr == MSR_EFER && cpu_has_load_ia32_efer) {
- vmcs_write64(GUEST_IA32_EFER, guest_val);
- vmcs_write64(HOST_IA32_EFER, host_val);
- vmcs_set_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER);
- vmcs_set_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER);
- return;
- }
-
- for (i = 0; i < m->nr; ++i)
- if (m->guest[i].index == msr)
- break;
-
- if (i == m->nr) {
- ++m->nr;
- vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
- vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
- }
-
- m->guest[i].index = msr;
- m->guest[i].value = guest_val;
- m->host[i].index = msr;
- m->host[i].value = host_val;
-}
-
static void reload_tss(void)
{
/*
* VT restores TR but not its size. Useless.
*/
- struct kvm_desc_ptr *gdt = &__get_cpu_var(host_gdt);
+ struct descriptor_table gdt;
struct kvm_desc_struct *descs;
- descs = (void *)gdt->address;
+ kvm_get_gdt(&gdt);
+ descs = (void *)gdt.base;
descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
load_TR_desc();
}
@@ -792,56 +672,9 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
guest_efer |= host_efer & ignore_bits;
vmx->guest_msrs[efer_offset].data = guest_efer;
vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
-
- clear_atomic_switch_msr(vmx, MSR_EFER);
- /* On ept, can't emulate nx, and must switch nx atomically */
- if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) {
- guest_efer = vmx->vcpu.arch.efer;
- if (!(guest_efer & EFER_LMA))
- guest_efer &= ~EFER_LME;
- add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer);
- return false;
- }
-
return true;
}
-static unsigned long segment_base(u16 selector)
-{
- struct kvm_desc_ptr *gdt = &__get_cpu_var(host_gdt);
- struct kvm_desc_struct *d;
- unsigned long table_base;
- unsigned long v;
-
- if (!(selector & ~3))
- return 0;
-
- table_base = gdt->address;
-
- if (selector & 4) { /* from ldt */
- u16 ldt_selector = kvm_read_ldt();
-
- if (!(ldt_selector & ~3))
- return 0;
-
- table_base = segment_base(ldt_selector);
- }
- d = (struct kvm_desc_struct *)(table_base + (selector & ~7));
- v = kvm_get_desc_base(d);
-#ifdef CONFIG_X86_64
- if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
- v |= ((unsigned long)((struct kvm_ldttss_desc64 *)d)->base3) << 32;
-#endif
- return v;
-}
-
-static inline unsigned long kvm_read_tr_base(void)
-{
- u16 tr;
- asm("str %0" : "=g"(tr));
- return segment_base(tr);
-}
-
static void vmx_save_host_state(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -857,7 +690,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
*/
vmx->host_state.ldt_sel = kvm_read_ldt();
vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
- savesegment(fs, vmx->host_state.fs_sel);
+ vmx->host_state.fs_sel = kvm_read_fs();
if (!(vmx->host_state.fs_sel & 7)) {
vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
vmx->host_state.fs_reload_needed = 0;
@@ -865,7 +698,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
vmcs_write16(HOST_FS_SELECTOR, 0);
vmx->host_state.fs_reload_needed = 1;
}
- savesegment(gs, vmx->host_state.gs_sel);
+ vmx->host_state.gs_sel = kvm_read_gs();
if (!(vmx->host_state.gs_sel & 7))
vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
else {
@@ -882,9 +715,10 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
#endif
#ifdef CONFIG_X86_64
- rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
- if (is_long_mode(&vmx->vcpu))
+ if (is_long_mode(&vmx->vcpu)) {
+ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+ }
#endif
for (i = 0; i < vmx->save_nmsrs; ++i)
kvm_set_shared_msr(vmx->guest_msrs[i].index,
@@ -894,32 +728,35 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
static void __vmx_load_host_state(struct vcpu_vmx *vmx)
{
+ unsigned long flags;
+
if (!vmx->host_state.loaded)
return;
++vmx->vcpu.stat.host_state_reload;
vmx->host_state.loaded = 0;
-#ifdef CONFIG_X86_64
- if (is_long_mode(&vmx->vcpu))
- rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
-#endif
+ if (vmx->host_state.fs_reload_needed)
+ kvm_load_fs(vmx->host_state.fs_sel);
if (vmx->host_state.gs_ldt_reload_needed) {
kvm_load_ldt(vmx->host_state.ldt_sel);
+ /*
+ * If we have to reload gs, we must take care to
+ * preserve our gs base.
+ */
+ local_irq_save(flags);
+ kvm_load_gs(vmx->host_state.gs_sel);
#ifdef CONFIG_X86_64
- load_gs_index(vmx->host_state.gs_sel);
-#else
- loadsegment(gs, vmx->host_state.gs_sel);
+ wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
#endif
+ local_irq_restore(flags);
}
- if (vmx->host_state.fs_reload_needed)
- loadsegment(fs, vmx->host_state.fs_sel);
reload_tss();
#ifdef CONFIG_X86_64
- wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ if (is_long_mode(&vmx->vcpu)) {
+ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ }
#endif
- if (current_thread_info()->status & TS_USEDFPU)
- clts();
- kvm_load_gdt(&__get_cpu_var(host_gdt));
}
static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -936,47 +773,62 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
+ u64 phys_addr = __pa(vmx->vmcs);
+ u64 tsc_this, delta, new_offset;
- if (!vmm_exclusive)
- kvm_cpu_vmxon(phys_addr);
- else if (vcpu->cpu != cpu)
+ if (vcpu->cpu != cpu) {
vcpu_clear(vmx);
+ kvm_migrate_timers(vcpu);
+ set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
+ local_irq_disable();
+ list_add(&vmx->local_vcpus_link,
+ &per_cpu(vcpus_on_cpu, cpu));
+ local_irq_enable();
+ }
if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
+ u8 error;
+
per_cpu(current_vmcs, cpu) = vmx->vmcs;
- vmcs_load(vmx->vmcs);
+ asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
+ : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
+ : "cc");
+ if (error)
+ printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
+ vmx->vmcs, phys_addr);
}
if (vcpu->cpu != cpu) {
- struct kvm_desc_ptr *gdt = &__get_cpu_var(host_gdt);
+ struct descriptor_table dt;
unsigned long sysenter_esp;
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
- local_irq_disable();
- list_add(&vmx->local_vcpus_link,
- &per_cpu(vcpus_on_cpu, cpu));
- local_irq_enable();
-
+ vcpu->cpu = cpu;
/*
* Linux uses per-cpu TSS and GDT, so set these when switching
* processors.
*/
vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
- vmcs_writel(HOST_GDTR_BASE, gdt->address); /* 22.2.4 */
+ kvm_get_gdt(&dt);
+ vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */
rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
+
+ /*
+ * Make sure the time stamp counter is monotonous.
+ */
+ rdtscll(tsc_this);
+ if (tsc_this < vcpu->arch.host_tsc) {
+ delta = vcpu->arch.host_tsc - tsc_this;
+ new_offset = vmcs_read64(TSC_OFFSET) + delta;
+ vmcs_write64(TSC_OFFSET, new_offset);
+ }
}
}
static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
{
__vmx_load_host_state(to_vmx(vcpu));
- if (!vmm_exclusive) {
- __vcpu_clear(to_vmx(vcpu));
- kvm_cpu_vmxoff();
- }
}
static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
@@ -1035,9 +887,9 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
int ret = 0;
if (interruptibility & GUEST_INTR_STATE_STI)
- ret |= KVM_X86_SHADOW_INT_STI;
+ ret |= X86_SHADOW_INT_STI;
if (interruptibility & GUEST_INTR_STATE_MOV_SS)
- ret |= KVM_X86_SHADOW_INT_MOV_SS;
+ ret |= X86_SHADOW_INT_MOV_SS;
return ret & mask;
}
@@ -1049,9 +901,9 @@ static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
- if (mask & KVM_X86_SHADOW_INT_MOV_SS)
+ if (mask & X86_SHADOW_INT_MOV_SS)
interruptibility |= GUEST_INTR_STATE_MOV_SS;
- else if (mask & KVM_X86_SHADOW_INT_STI)
+ if (mask & X86_SHADOW_INT_STI)
interruptibility |= GUEST_INTR_STATE_STI;
if ((interruptibility != interruptibility_old))
@@ -1070,20 +922,8 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
vmx_set_interrupt_shadow(vcpu, 0);
}
-static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
-{
- /* Ensure that we clear the HLT state in the VMCS. We don't need to
- * explicitly skip the instruction because if the HLT state is set, then
- * the instruction is already executing and RIP has already been
- * advanced. */
- if (!yield_on_hlt &&
- vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
- vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
-}
-
static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
- bool has_error_code, u32 error_code,
- bool reinject)
+ bool has_error_code, u32 error_code)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 intr_info = nr | INTR_INFO_VALID_MASK;
@@ -1094,8 +934,16 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
}
if (vmx->rmode.vm86_active) {
- if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE)
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ vmx->rmode.irq.pending = true;
+ vmx->rmode.irq.vector = nr;
+ vmx->rmode.irq.rip = kvm_rip_read(vcpu);
+ if (kvm_exception_is_soft(nr))
+ vmx->rmode.irq.rip +=
+ vmx->vcpu.arch.event_exit_inst_len;
+ intr_info |= INTR_TYPE_SOFT_INTR;
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
+ vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+ kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
return;
}
@@ -1107,7 +955,6 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
intr_info |= INTR_TYPE_HARD_EXCEPTION;
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
- vmx_clear_hlt(vcpu);
}
static bool vmx_rdtscp_supported(void)
@@ -1154,10 +1001,10 @@ static void setup_msrs(struct vcpu_vmx *vmx)
if (index >= 0 && vmx->rdtscp_enabled)
move_msr_up(vmx, index, save_nmsrs++);
/*
- * MSR_STAR is only needed on long mode guests, and only
+ * MSR_K6_STAR is only needed on long mode guests, and only
* if efer.sce is enabled.
*/
- index = __find_msr_index(vmx, MSR_STAR);
+ index = __find_msr_index(vmx, MSR_K6_STAR);
if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE))
move_msr_up(vmx, index, save_nmsrs++);
}
@@ -1192,17 +1039,12 @@ static u64 guest_read_tsc(void)
}
/*
- * writes 'offset' into guest's timestamp counter offset register
+ * writes 'guest_tsc' into guest's timestamp counter "register"
+ * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc
*/
-static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
-{
- vmcs_write64(TSC_OFFSET, offset);
-}
-
-static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
+static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
{
- u64 offset = vmcs_read64(TSC_OFFSET);
- vmcs_write64(TSC_OFFSET, offset + adjustment);
+ vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc);
}
/*
@@ -1275,6 +1117,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct shared_msr_entry *msr;
+ u64 host_tsc;
int ret = 0;
switch (msr_index) {
@@ -1304,7 +1147,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_TSC:
- kvm_write_tsc(vcpu, data);
+ rdtscll(host_tsc);
+ guest_write_tsc(data, host_tsc);
break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
@@ -1373,58 +1217,37 @@ static __init int vmx_disabled_by_bios(void)
u64 msr;
rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
- if (msr & FEATURE_CONTROL_LOCKED) {
- if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
- && kvm_tboot_enabled())
- return 1;
- if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
- && !kvm_tboot_enabled()) {
- printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
- " activate TXT before enabling KVM\n");
- return 1;
- }
- }
-
- return 0;
+ return (msr & (FEATURE_CONTROL_LOCKED |
+ FEATURE_CONTROL_VMXON_ENABLED))
+ == FEATURE_CONTROL_LOCKED;
/* locked but not enabled */
}
-static void kvm_cpu_vmxon(u64 addr)
-{
- asm volatile (ASM_VMX_VMXON_RAX
- : : "a"(&addr), "m"(addr)
- : "memory", "cc");
-}
-
static int hardware_enable(void *garbage)
{
int cpu = raw_smp_processor_id();
u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
- u64 old, test_bits;
+ u64 old;
if (read_cr4() & X86_CR4_VMXE)
return -EBUSY;
INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
-
- test_bits = FEATURE_CONTROL_LOCKED;
- test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
- if (kvm_tboot_enabled())
- test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
-
- if ((old & test_bits) != test_bits) {
+ if ((old & (FEATURE_CONTROL_LOCKED |
+ FEATURE_CONTROL_VMXON_ENABLED))
+ != (FEATURE_CONTROL_LOCKED |
+ FEATURE_CONTROL_VMXON_ENABLED))
/* enable and lock */
- wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
- }
+ wrmsrl(MSR_IA32_FEATURE_CONTROL, old |
+ FEATURE_CONTROL_LOCKED |
+ FEATURE_CONTROL_VMXON_ENABLED);
write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */
+ asm volatile (ASM_VMX_VMXON_RAX
+ : : "a"(&phys_addr), "m"(phys_addr)
+ : "memory", "cc");
- if (vmm_exclusive) {
- kvm_cpu_vmxon(phys_addr);
- ept_sync_global();
- }
-
- kvm_store_gdt(&__get_cpu_var(host_gdt));
+ ept_sync_global();
return 0;
}
@@ -1446,15 +1269,13 @@ static void vmclear_local_vcpus(void)
static void kvm_cpu_vmxoff(void)
{
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+ write_cr4(read_cr4() & ~X86_CR4_VMXE);
}
static void hardware_disable(void *garbage)
{
- if (vmm_exclusive) {
- vmclear_local_vcpus();
- kvm_cpu_vmxoff();
- }
- write_cr4(read_cr4() & ~X86_CR4_VMXE);
+ vmclear_local_vcpus();
+ kvm_cpu_vmxoff();
}
static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@@ -1476,14 +1297,6 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
return 0;
}
-static __init bool allow_1_setting(u32 msr, u32 ctl)
-{
- u32 vmx_msr_low, vmx_msr_high;
-
- rdmsr(msr, vmx_msr_low, vmx_msr_high);
- return vmx_msr_high & ctl;
-}
-
static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
{
u32 vmx_msr_low, vmx_msr_high;
@@ -1500,7 +1313,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
&_pin_based_exec_control) < 0)
return -EIO;
- min =
+ min = CPU_BASED_HLT_EXITING |
#ifdef CONFIG_X86_64
CPU_BASED_CR8_LOAD_EXITING |
CPU_BASED_CR8_STORE_EXITING |
@@ -1513,10 +1326,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
CPU_BASED_MWAIT_EXITING |
CPU_BASED_MONITOR_EXITING |
CPU_BASED_INVLPG_EXITING;
-
- if (yield_on_hlt)
- min |= CPU_BASED_HLT_EXITING;
-
opt = CPU_BASED_TPR_SHADOW |
CPU_BASED_USE_MSR_BITMAPS |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -1598,12 +1407,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
vmcs_conf->vmexit_ctrl = _vmexit_control;
vmcs_conf->vmentry_ctrl = _vmentry_control;
- cpu_has_load_ia32_efer =
- allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
- VM_ENTRY_LOAD_IA32_EFER)
- && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
- VM_EXIT_LOAD_IA32_EFER);
-
return 0;
}
@@ -1671,8 +1474,7 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_vpid())
enable_vpid = 0;
- if (!cpu_has_vmx_ept() ||
- !cpu_has_vmx_ept_4levels()) {
+ if (!cpu_has_vmx_ept()) {
enable_ept = 0;
enable_unrestricted_guest = 0;
}
@@ -1760,8 +1562,8 @@ static gva_t rmode_tss_base(struct kvm *kvm)
struct kvm_memslots *slots;
gfn_t base_gfn;
- slots = kvm_memslots(kvm);
- base_gfn = slots->memslots[0].base_gfn +
+ slots = rcu_dereference(kvm->memslots);
+ base_gfn = kvm->memslots->memslots[0].base_gfn +
kvm->memslots->memslots[0].npages - 3;
return base_gfn << PAGE_SHIFT;
}
@@ -1777,13 +1579,9 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
save->limit = vmcs_read32(sf->limit);
save->ar = vmcs_read32(sf->ar_bytes);
vmcs_write16(sf->selector, save->base >> 4);
- vmcs_write32(sf->base, save->base & 0xffff0);
+ vmcs_write32(sf->base, save->base & 0xfffff);
vmcs_write32(sf->limit, 0xffff);
vmcs_write32(sf->ar_bytes, 0xf3);
- if (save->base & 0xf)
- printk_once(KERN_WARNING "kvm: segment base is not paragraph"
- " aligned when entering protected mode (seg=%d)",
- seg);
}
static void enter_rmode(struct kvm_vcpu *vcpu)
@@ -1881,27 +1679,26 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
(guest_tr_ar & ~AR_TYPE_MASK)
| AR_TYPE_BUSY_64_TSS);
}
- vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
+ vcpu->arch.efer |= EFER_LMA;
+ vmx_set_efer(vcpu, vcpu->arch.efer);
}
static void exit_lmode(struct kvm_vcpu *vcpu)
{
+ vcpu->arch.efer &= ~EFER_LMA;
+
vmcs_write32(VM_ENTRY_CONTROLS,
vmcs_read32(VM_ENTRY_CONTROLS)
& ~VM_ENTRY_IA32E_MODE);
- vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
}
#endif
static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
{
- vpid_sync_context(to_vmx(vcpu));
- if (enable_ept) {
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
- return;
+ vpid_sync_vcpu_all(to_vmx(vcpu));
+ if (enable_ept)
ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
- }
}
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
@@ -1912,13 +1709,6 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
}
-static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
-{
- if (enable_ept && is_paging(vcpu))
- vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
-}
-
static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
{
ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
@@ -1934,20 +1724,20 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
return;
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
- vmcs_write64(GUEST_PDPTR0, vcpu->arch.mmu.pdptrs[0]);
- vmcs_write64(GUEST_PDPTR1, vcpu->arch.mmu.pdptrs[1]);
- vmcs_write64(GUEST_PDPTR2, vcpu->arch.mmu.pdptrs[2]);
- vmcs_write64(GUEST_PDPTR3, vcpu->arch.mmu.pdptrs[3]);
+ vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]);
+ vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]);
+ vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]);
+ vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]);
}
}
static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
{
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
- vcpu->arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
- vcpu->arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
- vcpu->arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
- vcpu->arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
+ vcpu->arch.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
+ vcpu->arch.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
+ vcpu->arch.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
+ vcpu->arch.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
}
__set_bit(VCPU_EXREG_PDPTR,
@@ -1962,7 +1752,6 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
unsigned long cr0,
struct kvm_vcpu *vcpu)
{
- vmx_decache_cr3(vcpu);
if (!(cr0 & X86_CR0_PG)) {
/* From paging/starting to nonpaging */
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
@@ -2043,7 +1832,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
if (enable_ept) {
eptp = construct_eptp(cr3);
vmcs_write64(EPT_POINTER, eptp);
- guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) :
+ guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
vcpu->kvm->arch.ept_identity_map_addr;
ept_load_pdptrs(vcpu);
}
@@ -2186,28 +1975,28 @@ static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
*l = (ar >> 13) & 1;
}
-static void vmx_get_idt(struct kvm_vcpu *vcpu, struct kvm_desc_ptr *dt)
+static void vmx_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
{
- dt->size = vmcs_read32(GUEST_IDTR_LIMIT);
- dt->address = vmcs_readl(GUEST_IDTR_BASE);
+ dt->limit = vmcs_read32(GUEST_IDTR_LIMIT);
+ dt->base = vmcs_readl(GUEST_IDTR_BASE);
}
-static void vmx_set_idt(struct kvm_vcpu *vcpu, struct kvm_desc_ptr *dt)
+static void vmx_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
{
- vmcs_write32(GUEST_IDTR_LIMIT, dt->size);
- vmcs_writel(GUEST_IDTR_BASE, dt->address);
+ vmcs_write32(GUEST_IDTR_LIMIT, dt->limit);
+ vmcs_writel(GUEST_IDTR_BASE, dt->base);
}
-static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct kvm_desc_ptr *dt)
+static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
{
- dt->size = vmcs_read32(GUEST_GDTR_LIMIT);
- dt->address = vmcs_readl(GUEST_GDTR_BASE);
+ dt->limit = vmcs_read32(GUEST_GDTR_LIMIT);
+ dt->base = vmcs_readl(GUEST_GDTR_BASE);
}
-static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct kvm_desc_ptr *dt)
+static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
{
- vmcs_write32(GUEST_GDTR_LIMIT, dt->size);
- vmcs_writel(GUEST_GDTR_BASE, dt->address);
+ vmcs_write32(GUEST_GDTR_LIMIT, dt->limit);
+ vmcs_writel(GUEST_GDTR_BASE, dt->base);
}
static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
@@ -2548,16 +2337,6 @@ static void allocate_vpid(struct vcpu_vmx *vmx)
spin_unlock(&vmx_vpid_lock);
}
-static void free_vpid(struct vcpu_vmx *vmx)
-{
- if (!enable_vpid)
- return;
- spin_lock(&vmx_vpid_lock);
- if (vmx->vpid != 0)
- __clear_bit(vmx->vpid, vmx_vpid_bitmap);
- spin_unlock(&vmx_vpid_lock);
-}
-
static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
{
int f = sizeof(unsigned long);
@@ -2594,9 +2373,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
{
u32 host_sysenter_cs, msr_low, msr_high;
u32 junk;
- u64 host_pat;
+ u64 host_pat, tsc_this, tsc_base;
unsigned long a;
- struct kvm_desc_ptr dt;
+ struct descriptor_table dt;
int i;
unsigned long kvm_vmx_return;
u32 exec_control;
@@ -2655,15 +2434,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf);
vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
- vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */
+ vmcs_writel(HOST_CR0, read_cr0()); /* 22.2.3 */
vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */
vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */
vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
- vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
- vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
+ vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs()); /* 22.2.4 */
+ vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs()); /* 22.2.4 */
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
#ifdef CONFIG_X86_64
rdmsrl(MSR_FS_BASE, a);
@@ -2677,16 +2456,14 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
- kvm_native_store_idt(&dt);
- vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
+ kvm_get_idt(&dt);
+ vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */
asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return));
vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */
vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
- vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
- vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk);
vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs);
@@ -2735,34 +2512,33 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
- kvm_write_tsc(&vmx->vcpu, 0);
+ tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
+ rdtscll(tsc_this);
+ if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc)
+ tsc_base = tsc_this;
+
+ guest_write_tsc(0, tsc_base);
return 0;
}
static int init_rmode(struct kvm *kvm)
{
- int idx, ret = 0;
-
- idx = srcu_read_lock(&kvm->srcu);
if (!init_rmode_tss(kvm))
- goto exit;
+ return 0;
if (!init_rmode_identity_map(kvm))
- goto exit;
-
- ret = 1;
-exit:
- srcu_read_unlock(&kvm->srcu, idx);
- return ret;
+ return 0;
+ return 1;
}
static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 msr;
- int ret;
+ int ret, idx;
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
+ idx = kvm_srcu_read_lock(&vcpu->kvm->srcu);
if (!init_rmode(vmx->vcpu.kvm)) {
ret = -ENOMEM;
goto out;
@@ -2779,9 +2555,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
msr |= MSR_IA32_APICBASE_BSP;
kvm_set_apic_base(&vmx->vcpu, msr);
- ret = fx_init(&vmx->vcpu);
- if (ret != 0)
- goto out;
+ fx_init(&vmx->vcpu);
seg_setup(VCPU_SREG_CS);
/*
@@ -2831,7 +2605,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_IDTR_BASE, 0);
vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
- vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
+ vmcs_write32(GUEST_ACTIVITY_STATE, 0);
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
@@ -2864,7 +2638,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx_fpu_activate(&vmx->vcpu);
update_exception_bitmap(&vmx->vcpu);
- vpid_sync_context(vmx);
+ vpid_sync_vcpu_all(vmx);
ret = 0;
@@ -2872,6 +2646,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx->emulation_required = 0;
out:
+ kvm_srcu_read_unlock(&vcpu->kvm->srcu, idx);
return ret;
}
@@ -2893,10 +2668,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
return;
}
- if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
- enable_irq_window(vcpu);
- return;
- }
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
@@ -2912,8 +2683,16 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
++vcpu->stat.irq_injections;
if (vmx->rmode.vm86_active) {
- if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE)
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ vmx->rmode.irq.pending = true;
+ vmx->rmode.irq.vector = irq;
+ vmx->rmode.irq.rip = kvm_rip_read(vcpu);
+ if (vcpu->arch.interrupt.soft)
+ vmx->rmode.irq.rip +=
+ vmx->vcpu.arch.event_exit_inst_len;
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+ irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK);
+ vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+ kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
return;
}
intr = irq | INTR_INFO_VALID_MASK;
@@ -2924,7 +2703,6 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
} else
intr |= INTR_TYPE_EXT_INTR;
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
- vmx_clear_hlt(vcpu);
}
static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -2946,13 +2724,18 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
++vcpu->stat.nmi_injections;
if (vmx->rmode.vm86_active) {
- if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE)
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ vmx->rmode.irq.pending = true;
+ vmx->rmode.irq.vector = NMI_VECTOR;
+ vmx->rmode.irq.rip = kvm_rip_read(vcpu);
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+ NMI_VECTOR | INTR_TYPE_SOFT_INTR |
+ INTR_INFO_VALID_MASK);
+ vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+ kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
return;
}
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
- vmx_clear_hlt(vcpu);
}
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -2961,15 +2744,16 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
return 0;
return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
- (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
- | GUEST_INTR_STATE_NMI));
+ (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI));
}
static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
{
if (!cpu_has_virtual_nmis())
return to_vmx(vcpu)->soft_vnmi_blocked;
- return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
+ else
+ return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+ GUEST_INTR_STATE_NMI);
}
static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
@@ -3023,7 +2807,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
* Cause the #SS fault with 0 error code in VM86 mode.
*/
if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0)
- if (emulate_instruction(vcpu, 0) == EMULATE_DONE)
+ if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE)
return 1;
/*
* Forward all other exceptions that are valid in real mode.
@@ -3120,7 +2904,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
}
if (is_invalid_opcode(intr_info)) {
- er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
+ er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD);
if (er != EMULATE_DONE)
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
@@ -3139,7 +2923,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
if (kvm_event_needs_reinjection(vcpu))
kvm_mmu_unprotect_page_virt(vcpu, cr2);
- return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
+ return kvm_mmu_page_fault(vcpu, cr2, error_code);
}
if (vmx->rmode.vm86_active &&
@@ -3204,20 +2988,22 @@ static int handle_io(struct kvm_vcpu *vcpu)
int size, in, string;
unsigned port;
+ ++vcpu->stat.io_exits;
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
string = (exit_qualification & 16) != 0;
- in = (exit_qualification & 8) != 0;
- ++vcpu->stat.io_exits;
-
- if (string || in)
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ if (string) {
+ if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO)
+ return 0;
+ return 1;
+ }
- port = exit_qualification >> 16;
size = (exit_qualification & 7) + 1;
- skip_emulated_instruction(vcpu);
+ in = (exit_qualification & 8) != 0;
+ port = exit_qualification >> 16;
- return kvm_fast_pio_out(vcpu, size, port);
+ skip_emulated_instruction(vcpu);
+ return kvm_emulate_pio(vcpu, in, size, port);
}
static void
@@ -3236,7 +3022,6 @@ static int handle_cr(struct kvm_vcpu *vcpu)
unsigned long exit_qualification, val;
int cr;
int reg;
- int err;
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
cr = exit_qualification & 15;
@@ -3247,22 +3032,22 @@ static int handle_cr(struct kvm_vcpu *vcpu)
trace_kvm_cr_write(cr, val);
switch (cr) {
case 0:
- err = kvm_set_cr0(vcpu, val);
- kvm_complete_insn_gp(vcpu, err);
+ kvm_set_cr0(vcpu, val);
+ skip_emulated_instruction(vcpu);
return 1;
case 3:
- err = kvm_set_cr3(vcpu, val);
- kvm_complete_insn_gp(vcpu, err);
+ kvm_set_cr3(vcpu, val);
+ skip_emulated_instruction(vcpu);
return 1;
case 4:
- err = kvm_set_cr4(vcpu, val);
- kvm_complete_insn_gp(vcpu, err);
+ kvm_set_cr4(vcpu, val);
+ skip_emulated_instruction(vcpu);
return 1;
case 8: {
u8 cr8_prev = kvm_get_cr8(vcpu);
u8 cr8 = kvm_register_read(vcpu, reg);
- err = kvm_set_cr8(vcpu, cr8);
- kvm_complete_insn_gp(vcpu, err);
+ kvm_set_cr8(vcpu, cr8);
+ skip_emulated_instruction(vcpu);
if (irqchip_in_kernel(vcpu->kvm))
return 1;
if (cr8_prev <= cr8)
@@ -3281,9 +3066,8 @@ static int handle_cr(struct kvm_vcpu *vcpu)
case 1: /*mov from cr*/
switch (cr) {
case 3:
- val = kvm_read_cr3(vcpu);
- kvm_register_write(vcpu, reg, val);
- trace_kvm_cr_read(cr, val);
+ kvm_register_write(vcpu, reg, vcpu->arch.cr3);
+ trace_kvm_cr_read(cr, vcpu->arch.cr3);
skip_emulated_instruction(vcpu);
return 1;
case 8:
@@ -3310,9 +3094,19 @@ static int handle_cr(struct kvm_vcpu *vcpu)
return 0;
}
+static int check_dr_alias(struct kvm_vcpu *vcpu)
+{
+ if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return -1;
+ }
+ return 0;
+}
+
static int handle_dr(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
+ unsigned long val;
int dr, reg;
/* Do not handle if the CPL > 0, will trigger GP on re-entry */
@@ -3347,20 +3141,67 @@ static int handle_dr(struct kvm_vcpu *vcpu)
dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
reg = DEBUG_REG_ACCESS_REG(exit_qualification);
if (exit_qualification & TYPE_MOV_FROM_DR) {
- unsigned long val;
- if (!kvm_get_dr(vcpu, dr, &val))
- kvm_register_write(vcpu, reg, val);
- } else
- kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]);
+ switch (dr) {
+ case 0 ... 3:
+ val = vcpu->arch.db[dr];
+ break;
+ case 4:
+ if (check_dr_alias(vcpu) < 0)
+ return 1;
+ /* fall through */
+ case 6:
+ val = vcpu->arch.dr6;
+ break;
+ case 5:
+ if (check_dr_alias(vcpu) < 0)
+ return 1;
+ /* fall through */
+ default: /* 7 */
+ val = vcpu->arch.dr7;
+ break;
+ }
+ kvm_register_write(vcpu, reg, val);
+ } else {
+ val = vcpu->arch.regs[reg];
+ switch (dr) {
+ case 0 ... 3:
+ vcpu->arch.db[dr] = val;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+ vcpu->arch.eff_db[dr] = val;
+ break;
+ case 4:
+ if (check_dr_alias(vcpu) < 0)
+ return 1;
+ /* fall through */
+ case 6:
+ if (val & 0xffffffff00000000ULL) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+ vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
+ break;
+ case 5:
+ if (check_dr_alias(vcpu) < 0)
+ return 1;
+ /* fall through */
+ default: /* 7 */
+ if (val & 0xffffffff00000000ULL) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+ vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+ vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
+ vcpu->arch.switch_db_regs =
+ (val & DR7_BP_EN_MASK);
+ }
+ break;
+ }
+ }
skip_emulated_instruction(vcpu);
return 1;
}
-static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
-{
- vmcs_writel(GUEST_DR7, val);
-}
-
static int handle_cpuid(struct kvm_vcpu *vcpu)
{
kvm_emulate_cpuid(vcpu);
@@ -3406,7 +3247,6 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
{
- kvm_make_request(KVM_REQ_EVENT, vcpu);
return 1;
}
@@ -3419,8 +3259,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
-
++vcpu->stat.irq_window_exits;
/*
@@ -3455,11 +3293,6 @@ static int handle_vmx_insn(struct kvm_vcpu *vcpu)
return 1;
}
-static int handle_invd(struct kvm_vcpu *vcpu)
-{
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
-}
-
static int handle_invlpg(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -3472,31 +3305,34 @@ static int handle_invlpg(struct kvm_vcpu *vcpu)
static int handle_wbinvd(struct kvm_vcpu *vcpu)
{
skip_emulated_instruction(vcpu);
- kvm_emulate_wbinvd(vcpu);
+ /* TODO: Add support for VT-d/pass-through device */
return 1;
}
-static int handle_xsetbv(struct kvm_vcpu *vcpu)
+static int handle_apic_access(struct kvm_vcpu *vcpu)
{
- u64 new_bv = kvm_read_edx_eax(vcpu);
- u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ unsigned long exit_qualification;
+ enum emulation_result er;
+ unsigned long offset;
- if (kvm_set_xcr(vcpu, index, new_bv) == 0)
- skip_emulated_instruction(vcpu);
- return 1;
-}
+ exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+ offset = exit_qualification & 0xffful;
-static int handle_apic_access(struct kvm_vcpu *vcpu)
-{
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ er = emulate_instruction(vcpu, 0, 0, 0);
+
+ if (er != EMULATE_DONE) {
+ printk(KERN_ERR
+ "Fail to handle apic access vmexit! Offset is 0x%lx\n",
+ offset);
+ return -ENOEXEC;
+ }
+ return 1;
}
static int handle_task_switch(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qualification;
- bool has_error_code = false;
- u32 error_code = 0;
u16 tss_selector;
int reason, type, idt_v;
@@ -3519,13 +3355,6 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
kvm_clear_interrupt_queue(vcpu);
break;
case INTR_TYPE_HARD_EXCEPTION:
- if (vmx->idt_vectoring_info &
- VECTORING_INFO_DELIVER_CODE_MASK) {
- has_error_code = true;
- error_code =
- vmcs_read32(IDT_VECTORING_ERROR_CODE);
- }
- /* fall through */
case INTR_TYPE_SOFT_EXCEPTION:
kvm_clear_exception_queue(vcpu);
break;
@@ -3540,13 +3369,8 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
type != INTR_TYPE_NMI_INTR))
skip_emulated_instruction(vcpu);
- if (kvm_task_switch(vcpu, tss_selector, reason,
- has_error_code, error_code) == EMULATE_FAIL) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
+ if (!kvm_task_switch(vcpu, tss_selector, reason))
return 0;
- }
/* clear all local breakpoint enable flags */
vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55);
@@ -3587,7 +3411,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
trace_kvm_page_fault(gpa, exit_qualification);
- return kvm_mmu_page_fault(vcpu, gpa, exit_qualification & 0x3, NULL, 0);
+ return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
}
static u64 ept_rsvd_mask(u64 spte, int level)
@@ -3682,7 +3506,6 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
++vcpu->stat.nmi_window_exits;
- kvm_make_request(KVM_REQ_EVENT, vcpu);
return 1;
}
@@ -3692,26 +3515,22 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
enum emulation_result err = EMULATE_DONE;
int ret = 1;
- u32 cpu_exec_ctrl;
- bool intr_window_requested;
-
- cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
- intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
while (!guest_state_valid(vcpu)) {
- if (intr_window_requested
- && (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF))
- return handle_interrupt_window(&vmx->vcpu);
-
- err = emulate_instruction(vcpu, 0);
+ err = emulate_instruction(vcpu, 0, 0, 0);
if (err == EMULATE_DO_MMIO) {
ret = 0;
goto out;
}
- if (err != EMULATE_DONE)
- return 0;
+ if (err != EMULATE_DONE) {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ ret = 0;
+ goto out;
+ }
if (signal_pending(current))
goto out;
@@ -3760,7 +3579,6 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_MSR_WRITE] = handle_wrmsr,
[EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
[EXIT_REASON_HLT] = handle_halt,
- [EXIT_REASON_INVD] = handle_invd,
[EXIT_REASON_INVLPG] = handle_invlpg,
[EXIT_REASON_VMCALL] = handle_vmcall,
[EXIT_REASON_VMCLEAR] = handle_vmx_insn,
@@ -3775,7 +3593,6 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
[EXIT_REASON_APIC_ACCESS] = handle_apic_access,
[EXIT_REASON_WBINVD] = handle_wbinvd,
- [EXIT_REASON_XSETBV] = handle_xsetbv,
[EXIT_REASON_TASK_SWITCH] = handle_task_switch,
[EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
@@ -3788,12 +3605,6 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
static const int kvm_vmx_max_exit_handlers =
ARRAY_SIZE(kvm_vmx_exit_handlers);
-static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
-{
- *info1 = vmcs_readl(EXIT_QUALIFICATION);
- *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
-}
-
/*
* The guest has exited. See if we can fix it or if we need userspace
* assistance.
@@ -3804,18 +3615,16 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
u32 exit_reason = vmx->exit_reason;
u32 vectoring_info = vmx->idt_vectoring_info;
- trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
+ trace_kvm_exit(exit_reason, kvm_rip_read(vcpu));
/* If guest state is invalid, start emulating */
if (vmx->emulation_required && emulate_invalid_guest_state)
return handle_invalid_guest_state(vcpu);
- if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
- vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
- vcpu->run->fail_entry.hardware_entry_failure_reason
- = exit_reason;
- return 0;
- }
+ /* Access CR3 don't cause VMExit in paging mode, so we need
+ * to sync with guest real CR3. */
+ if (enable_ept && is_paging(vcpu))
+ vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
if (unlikely(vmx->fail)) {
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -3870,9 +3679,18 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
vmcs_write32(TPR_THRESHOLD, irr);
}
-static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
+static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
{
- u32 exit_intr_info = vmx->exit_intr_info;
+ u32 exit_intr_info;
+ u32 idt_vectoring_info = vmx->idt_vectoring_info;
+ bool unblock_nmi;
+ u8 vector;
+ int type;
+ bool idtv_info_valid;
+
+ exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+
+ vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
/* Handle machine checks before interrupts are enabled */
if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
@@ -3882,21 +3700,10 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
/* We need to handle NMIs before interrupts are enabled */
if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
- (exit_intr_info & INTR_INFO_VALID_MASK)) {
- kvm_before_handle_nmi(&vmx->vcpu);
+ (exit_intr_info & INTR_INFO_VALID_MASK))
asm("int $2");
- kvm_after_handle_nmi(&vmx->vcpu);
- }
-}
-
-static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
-{
- u32 exit_intr_info = vmx->exit_intr_info;
- bool unblock_nmi;
- u8 vector;
- bool idtv_info_valid;
- idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
+ idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
if (cpu_has_virtual_nmis()) {
unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
@@ -3918,18 +3725,6 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
} else if (unlikely(vmx->soft_vnmi_blocked))
vmx->vnmi_blocked_time +=
ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
-}
-
-static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
- u32 idt_vectoring_info,
- int instr_len_field,
- int error_code_field)
-{
- u8 vector;
- int type;
- bool idtv_info_valid;
-
- idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
vmx->vcpu.arch.nmi_injected = false;
kvm_clear_exception_queue(&vmx->vcpu);
@@ -3938,8 +3733,6 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
if (!idtv_info_valid)
return;
- kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
-
vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
@@ -3956,18 +3749,18 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
break;
case INTR_TYPE_SOFT_EXCEPTION:
vmx->vcpu.arch.event_exit_inst_len =
- vmcs_read32(instr_len_field);
+ vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
/* fall through */
case INTR_TYPE_HARD_EXCEPTION:
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
- u32 err = vmcs_read32(error_code_field);
+ u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE);
kvm_queue_exception_e(&vmx->vcpu, vector, err);
} else
kvm_queue_exception(&vmx->vcpu, vector);
break;
case INTR_TYPE_SOFT_INTR:
vmx->vcpu.arch.event_exit_inst_len =
- vmcs_read32(instr_len_field);
+ vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
/* fall through */
case INTR_TYPE_EXT_INTR:
kvm_queue_interrupt(&vmx->vcpu, vector,
@@ -3978,21 +3771,27 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx,
}
}
-static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
-{
- __vmx_complete_interrupts(vmx, vmx->idt_vectoring_info,
- VM_EXIT_INSTRUCTION_LEN,
- IDT_VECTORING_ERROR_CODE);
-}
-
-static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
+/*
+ * Failure to inject an interrupt should give us the information
+ * in IDT_VECTORING_INFO_FIELD. However, if the failure occurs
+ * when fetching the interrupt redirection bitmap in the real-mode
+ * tss, this doesn't happen. So we do it ourselves.
+ */
+static void fixup_rmode_irq(struct vcpu_vmx *vmx)
{
- __vmx_complete_interrupts(to_vmx(vcpu),
- vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
- VM_ENTRY_INSTRUCTION_LEN,
- VM_ENTRY_EXCEPTION_ERROR_CODE);
-
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
+ vmx->rmode.irq.pending = 0;
+ if (kvm_rip_read(&vmx->vcpu) + 1 != vmx->rmode.irq.rip)
+ return;
+ kvm_rip_write(&vmx->vcpu, vmx->rmode.irq.rip);
+ if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
+ vmx->idt_vectoring_info &= ~VECTORING_INFO_TYPE_MASK;
+ vmx->idt_vectoring_info |= INTR_TYPE_EXT_INTR;
+ return;
+ }
+ vmx->idt_vectoring_info =
+ VECTORING_INFO_VALID_MASK
+ | INTR_TYPE_EXT_INTR
+ | vmx->rmode.irq.vector;
}
#ifdef CONFIG_X86_64
@@ -4029,6 +3828,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);
+ /*
+ * Loading guest fpu may have cleared host cr0.ts
+ */
+ vmcs_writel(HOST_CR0, read_cr0());
+
asm(
/* Store host registers */
"push %%"R"dx; push %%"R"bp;"
@@ -4119,27 +3923,23 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
#endif
[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2))
: "cc", "memory"
- , R"ax", R"bx", R"di", R"si"
+ , R"bx", R"di", R"si"
#ifdef CONFIG_X86_64
, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
#endif
);
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
- | (1 << VCPU_EXREG_PDPTR)
- | (1 << VCPU_EXREG_CR3));
+ | (1 << VCPU_EXREG_PDPTR));
vcpu->arch.regs_dirty = 0;
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+ if (vmx->rmode.irq.pending)
+ fixup_rmode_irq(vmx);
asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
vmx->launched = 1;
- vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
- vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-
- vmx_complete_atomic_exit(vmx);
- vmx_recover_nmi_blocking(vmx);
vmx_complete_interrupts(vmx);
}
@@ -4161,26 +3961,16 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- free_vpid(vmx);
+ spin_lock(&vmx_vpid_lock);
+ if (vmx->vpid != 0)
+ __clear_bit(vmx->vpid, vmx_vpid_bitmap);
+ spin_unlock(&vmx_vpid_lock);
vmx_free_vmcs(vcpu);
kfree(vmx->guest_msrs);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vmx);
}
-static inline void vmcs_init(struct vmcs *vmcs)
-{
- u64 phys_addr = __pa(per_cpu(vmxarea, raw_smp_processor_id()));
-
- if (!vmm_exclusive)
- kvm_cpu_vmxon(phys_addr);
-
- vmcs_clear(vmcs);
-
- if (!vmm_exclusive)
- kvm_cpu_vmxoff();
-}
-
static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
{
int err;
@@ -4206,11 +3996,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (!vmx->vmcs)
goto free_msrs;
- vmcs_init(vmx->vmcs);
+ vmcs_clear(vmx->vmcs);
cpu = get_cpu();
vmx_vcpu_load(&vmx->vcpu, cpu);
- vmx->vcpu.cpu = cpu;
err = vmx_vcpu_setup(vmx);
vmx_vcpu_put(&vmx->vcpu);
put_cpu();
@@ -4237,7 +4026,6 @@ free_msrs:
uninit_vcpu:
kvm_vcpu_uninit(&vmx->vcpu);
free_vcpu:
- free_vpid(vmx);
kmem_cache_free(kvm_vcpu_cache, vmx);
return ERR_PTR(err);
}
@@ -4341,6 +4129,11 @@ static int vmx_get_lpage_level(void)
return PT_PDPE_LEVEL;
}
+static inline u32 bit(int bitno)
+{
+ return 1 << (bitno & 31);
+}
+
static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -4363,10 +4156,6 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
}
}
-static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
-{
-}
-
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -4394,7 +4183,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
.get_cpl = vmx_get_cpl,
.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
.decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
- .decache_cr3 = vmx_decache_cr3,
.decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
.set_cr0 = vmx_set_cr0,
.set_cr3 = vmx_set_cr3,
@@ -4404,7 +4192,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
.set_idt = vmx_set_idt,
.get_gdt = vmx_get_gdt,
.set_gdt = vmx_set_gdt,
- .set_dr7 = vmx_set_dr7,
.cache_reg = vmx_cache_reg,
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,
@@ -4422,7 +4209,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
.set_irq = vmx_inject_irq,
.set_nmi = vmx_inject_nmi,
.queue_exception = vmx_queue_exception,
- .cancel_injection = vmx_cancel_injection,
.interrupt_allowed = vmx_interrupt_allowed,
.nmi_allowed = vmx_nmi_allowed,
.get_nmi_mask = vmx_get_nmi_mask,
@@ -4435,23 +4221,12 @@ static struct kvm_x86_ops vmx_x86_ops = {
.get_tdp_level = get_ept_level,
.get_mt_mask = vmx_get_mt_mask,
- .get_exit_info = vmx_get_exit_info,
.exit_reasons_str = vmx_exit_reasons_str,
-
.get_lpage_level = vmx_get_lpage_level,
.cpuid_update = vmx_cpuid_update,
.rdtscp_supported = vmx_rdtscp_supported,
-
- .set_supported_cpuid = vmx_set_supported_cpuid,
-
- .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
-
- .write_tsc_offset = vmx_write_tsc_offset,
- .adjust_tsc_offset = vmx_adjust_tsc_offset,
-
- .set_tdp_cr3 = vmx_set_cr3,
};
static int __init vmx_init(void)
@@ -4499,8 +4274,7 @@ static int __init vmx_init(void)
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
- r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
- __alignof__(struct vcpu_vmx), THIS_MODULE);
+ r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE);
if (r)
goto out3;
@@ -4513,6 +4287,8 @@ static int __init vmx_init(void)
if (enable_ept) {
bypass_guest_pf = 0;
+ kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
+ VMX_EPT_WRITABLE_MASK);
kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
VMX_EPT_EXECUTABLE_MASK);
kvm_enable_tdp();