diff options
Diffstat (limited to 'linux/x86/vmx.c')
-rw-r--r-- | linux/x86/vmx.c | 1012 |
1 files changed, 394 insertions, 618 deletions
diff --git a/linux/x86/vmx.c b/linux/x86/vmx.c index ed4cf97..e29837e 100644 --- a/linux/x86/vmx.c +++ b/linux/x86/vmx.c @@ -45,7 +45,6 @@ * machines without emulation or binary translation. * * Copyright (C) 2006 Qumranet, Inc. - * Copyright 2010 Red Hat, Inc. and/or its affiliates. * * Authors: * Avi Kivity <avi@qumranet.com> @@ -68,7 +67,6 @@ #include <linux/moduleparam.h> #include <linux/ftrace_event.h> #include <linux/slab.h> -#include <linux/tboot.h> #include "kvm_cache_regs.h" #include "x86.h" @@ -77,14 +75,12 @@ #include <asm/vmx.h> #include <asm/virtext.h> #include <asm/mce.h> -#include <asm/i387.h> -#include <asm/xcr.h> #include "trace.h" #define __ex(x) __kvm_handle_fault_on_reboot(x) -MODULE_INFO(version, "kvm-kmod-2.6.38-rc7"); +MODULE_INFO(version, "kvm-kmod-2.6.34"); MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -107,12 +103,6 @@ module_param_named(unrestricted_guest, static int __read_mostly emulate_invalid_guest_state = 0; module_param(emulate_invalid_guest_state, bool, S_IRUGO); -static int __read_mostly vmm_exclusive = 1; -module_param(vmm_exclusive, bool, S_IRUGO); - -static int __read_mostly yield_on_hlt = 1; -module_param(yield_on_hlt, bool, S_IRUGO); - #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) #define KVM_GUEST_CR0_MASK \ @@ -149,8 +139,6 @@ module_param(ple_gap, int, S_IRUGO); static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; module_param(ple_window, int, S_IRUGO); -#define NR_AUTOLOAD_MSRS 1 - struct vmcs { u32 revision_id; u32 abort; @@ -169,7 +157,6 @@ struct vcpu_vmx { unsigned long host_rsp; int launched; u8 fail; - u32 exit_intr_info; u32 idt_vectoring_info; struct shared_msr_entry *guest_msrs; int nmsrs; @@ -179,11 +166,6 @@ struct vcpu_vmx { u64 msr_guest_kernel_gs_base; #endif struct vmcs *vmcs; - struct msr_autoload { - unsigned nr; - struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; - struct vmx_msr_entry host[NR_AUTOLOAD_MSRS]; - } msr_autoload; struct { int loaded; u16 fs_sel, gs_sel, ldt_sel; @@ -199,6 +181,11 @@ struct vcpu_vmx { u32 limit; u32 ar; } tr, es, ds, fs, gs; + struct { + bool pending; + u8 vector; + unsigned rip; + } irq; } rmode; int vpid; bool emulation_required; @@ -219,22 +206,16 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) static int init_rmode(struct kvm *kvm); static u64 construct_eptp(unsigned long root_hpa); -static void kvm_cpu_vmxon(u64 addr); -static void kvm_cpu_vmxoff(void); -static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); -static DEFINE_PER_CPU(struct kvm_desc_ptr, host_gdt); static unsigned long *vmx_io_bitmap_a; static unsigned long *vmx_io_bitmap_b; static unsigned long *vmx_msr_bitmap_legacy; static unsigned long *vmx_msr_bitmap_longmode; -static bool cpu_has_load_ia32_efer; - static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); static DEFINE_SPINLOCK(vmx_vpid_lock); @@ -283,67 +264,67 @@ static u64 host_efer; static void ept_save_pdptrs(struct kvm_vcpu *vcpu); /* - * Keep MSR_STAR at the end, as setup_msrs() will try to optimize it + * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it * away by decrementing the array size. */ static const u32 vmx_msr_index[] = { #ifdef CONFIG_X86_64 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, #endif - MSR_EFER, MSR_TSC_AUX, MSR_STAR, + MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR, }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) -static inline bool is_page_fault(u32 intr_info) +static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); } -static inline bool is_no_device(u32 intr_info) +static inline int is_no_device(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); } -static inline bool is_invalid_opcode(u32 intr_info) +static inline int is_invalid_opcode(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); } -static inline bool is_external_interrupt(u32 intr_info) +static inline int is_external_interrupt(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); } -static inline bool is_machine_check(u32 intr_info) +static inline int is_machine_check(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); } -static inline bool cpu_has_vmx_msr_bitmap(void) +static inline int cpu_has_vmx_msr_bitmap(void) { return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; } -static inline bool cpu_has_vmx_tpr_shadow(void) +static inline int cpu_has_vmx_tpr_shadow(void) { return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; } -static inline bool vm_need_tpr_shadow(struct kvm *kvm) +static inline int vm_need_tpr_shadow(struct kvm *kvm) { return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); } -static inline bool cpu_has_secondary_exec_ctrls(void) +static inline int cpu_has_secondary_exec_ctrls(void) { return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; @@ -363,105 +344,84 @@ static inline bool cpu_has_vmx_flexpriority(void) static inline bool cpu_has_vmx_ept_execute_only(void) { - return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT; + return !!(vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT); } static inline bool cpu_has_vmx_eptp_uncacheable(void) { - return vmx_capability.ept & VMX_EPTP_UC_BIT; + return !!(vmx_capability.ept & VMX_EPTP_UC_BIT); } static inline bool cpu_has_vmx_eptp_writeback(void) { - return vmx_capability.ept & VMX_EPTP_WB_BIT; + return !!(vmx_capability.ept & VMX_EPTP_WB_BIT); } static inline bool cpu_has_vmx_ept_2m_page(void) { - return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT; + return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); } static inline bool cpu_has_vmx_ept_1g_page(void) { - return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; -} - -static inline bool cpu_has_vmx_ept_4levels(void) -{ - return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; -} - -static inline bool cpu_has_vmx_invept_individual_addr(void) -{ - return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; + return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT); } -static inline bool cpu_has_vmx_invept_context(void) +static inline int cpu_has_vmx_invept_individual_addr(void) { - return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT; + return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); } -static inline bool cpu_has_vmx_invept_global(void) +static inline int cpu_has_vmx_invept_context(void) { - return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; + return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT); } -static inline bool cpu_has_vmx_invvpid_single(void) +static inline int cpu_has_vmx_invept_global(void) { - return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT; + return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT); } -static inline bool cpu_has_vmx_invvpid_global(void) -{ - return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; -} - -static inline bool cpu_has_vmx_ept(void) +static inline int cpu_has_vmx_ept(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_EPT; } -static inline bool cpu_has_vmx_unrestricted_guest(void) +static inline int cpu_has_vmx_unrestricted_guest(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_UNRESTRICTED_GUEST; } -static inline bool cpu_has_vmx_ple(void) +static inline int cpu_has_vmx_ple(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PAUSE_LOOP_EXITING; } -static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm) +static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) { return flexpriority_enabled && irqchip_in_kernel(kvm); } -static inline bool cpu_has_vmx_vpid(void) +static inline int cpu_has_vmx_vpid(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_VPID; } -static inline bool cpu_has_vmx_rdtscp(void) +static inline int cpu_has_vmx_rdtscp(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_RDTSCP; } -static inline bool cpu_has_virtual_nmis(void) +static inline int cpu_has_virtual_nmis(void) { return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; } -static inline bool cpu_has_vmx_wbinvd_exit(void) -{ - return vmcs_config.cpu_based_2nd_exec_ctrl & - SECONDARY_EXEC_WBINVD_EXITING; -} - static inline bool report_flexpriority(void) { return flexpriority_enabled; @@ -519,26 +479,13 @@ static void vmcs_clear(struct vmcs *vmcs) u8 error; asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0" - : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) + : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) : "cc", "memory"); if (error) printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", vmcs, phys_addr); } -static void vmcs_load(struct vmcs *vmcs) -{ - u64 phys_addr = __pa(vmcs); - u8 error; - - asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" - : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) - : "cc", "memory"); - if (error) - printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", - vmcs, phys_addr); -} - static void __vcpu_clear(void *arg) { struct vcpu_vmx *vmx = arg; @@ -548,6 +495,7 @@ static void __vcpu_clear(void *arg) vmcs_clear(vmx->vmcs); if (per_cpu(current_vmcs, cpu) == vmx->vmcs) per_cpu(current_vmcs, cpu) = NULL; + rdtscll(vmx->vcpu.arch.host_tsc); list_del(&vmx->local_vcpus_link); vmx->vcpu.cpu = -1; vmx->launched = 0; @@ -560,27 +508,12 @@ static void vcpu_clear(struct vcpu_vmx *vmx) smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); } -static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx) +static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) { if (vmx->vpid == 0) return; - if (cpu_has_vmx_invvpid_single()) - __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); -} - -static inline void vpid_sync_vcpu_global(void) -{ - if (cpu_has_vmx_invvpid_global()) - __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); -} - -static inline void vpid_sync_context(struct vcpu_vmx *vmx) -{ - if (cpu_has_vmx_invvpid_single()) - vpid_sync_vcpu_single(vmx); - else - vpid_sync_vcpu_global(); + __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); } static inline void ept_sync_global(void) @@ -612,10 +545,10 @@ static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) static unsigned long vmcs_readl(unsigned long field) { - unsigned long value = 0; + unsigned long value; asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) - : "+a"(value) : "d"(field) : "cc"); + : "=a"(value) : "d"(field) : "cc"); return value; } @@ -703,69 +636,16 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) vmcs_write32(EXCEPTION_BITMAP, eb); } -static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) -{ - unsigned i; - struct msr_autoload *m = &vmx->msr_autoload; - - if (msr == MSR_EFER && cpu_has_load_ia32_efer) { - vmcs_clear_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER); - vmcs_clear_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER); - return; - } - - for (i = 0; i < m->nr; ++i) - if (m->guest[i].index == msr) - break; - - if (i == m->nr) - return; - --m->nr; - m->guest[i] = m->guest[m->nr]; - m->host[i] = m->host[m->nr]; - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); -} - -static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, - u64 guest_val, u64 host_val) -{ - unsigned i; - struct msr_autoload *m = &vmx->msr_autoload; - - if (msr == MSR_EFER && cpu_has_load_ia32_efer) { - vmcs_write64(GUEST_IA32_EFER, guest_val); - vmcs_write64(HOST_IA32_EFER, host_val); - vmcs_set_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER); - vmcs_set_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER); - return; - } - - for (i = 0; i < m->nr; ++i) - if (m->guest[i].index == msr) - break; - - if (i == m->nr) { - ++m->nr; - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); - } - - m->guest[i].index = msr; - m->guest[i].value = guest_val; - m->host[i].index = msr; - m->host[i].value = host_val; -} - static void reload_tss(void) { /* * VT restores TR but not its size. Useless. */ - struct kvm_desc_ptr *gdt = &__get_cpu_var(host_gdt); + struct descriptor_table gdt; struct kvm_desc_struct *descs; - descs = (void *)gdt->address; + kvm_get_gdt(&gdt); + descs = (void *)gdt.base; descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ load_TR_desc(); } @@ -792,56 +672,9 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) guest_efer |= host_efer & ignore_bits; vmx->guest_msrs[efer_offset].data = guest_efer; vmx->guest_msrs[efer_offset].mask = ~ignore_bits; - - clear_atomic_switch_msr(vmx, MSR_EFER); - /* On ept, can't emulate nx, and must switch nx atomically */ - if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) { - guest_efer = vmx->vcpu.arch.efer; - if (!(guest_efer & EFER_LMA)) - guest_efer &= ~EFER_LME; - add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer); - return false; - } - return true; } -static unsigned long segment_base(u16 selector) -{ - struct kvm_desc_ptr *gdt = &__get_cpu_var(host_gdt); - struct kvm_desc_struct *d; - unsigned long table_base; - unsigned long v; - - if (!(selector & ~3)) - return 0; - - table_base = gdt->address; - - if (selector & 4) { /* from ldt */ - u16 ldt_selector = kvm_read_ldt(); - - if (!(ldt_selector & ~3)) - return 0; - - table_base = segment_base(ldt_selector); - } - d = (struct kvm_desc_struct *)(table_base + (selector & ~7)); - v = kvm_get_desc_base(d); -#ifdef CONFIG_X86_64 - if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) - v |= ((unsigned long)((struct kvm_ldttss_desc64 *)d)->base3) << 32; -#endif - return v; -} - -static inline unsigned long kvm_read_tr_base(void) -{ - u16 tr; - asm("str %0" : "=g"(tr)); - return segment_base(tr); -} - static void vmx_save_host_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -857,7 +690,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) */ vmx->host_state.ldt_sel = kvm_read_ldt(); vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; - savesegment(fs, vmx->host_state.fs_sel); + vmx->host_state.fs_sel = kvm_read_fs(); if (!(vmx->host_state.fs_sel & 7)) { vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); vmx->host_state.fs_reload_needed = 0; @@ -865,7 +698,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) vmcs_write16(HOST_FS_SELECTOR, 0); vmx->host_state.fs_reload_needed = 1; } - savesegment(gs, vmx->host_state.gs_sel); + vmx->host_state.gs_sel = kvm_read_gs(); if (!(vmx->host_state.gs_sel & 7)) vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); else { @@ -882,9 +715,10 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) #endif #ifdef CONFIG_X86_64 - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); - if (is_long_mode(&vmx->vcpu)) + if (is_long_mode(&vmx->vcpu)) { + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + } #endif for (i = 0; i < vmx->save_nmsrs; ++i) kvm_set_shared_msr(vmx->guest_msrs[i].index, @@ -894,32 +728,35 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) static void __vmx_load_host_state(struct vcpu_vmx *vmx) { + unsigned long flags; + if (!vmx->host_state.loaded) return; ++vmx->vcpu.stat.host_state_reload; vmx->host_state.loaded = 0; -#ifdef CONFIG_X86_64 - if (is_long_mode(&vmx->vcpu)) - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); -#endif + if (vmx->host_state.fs_reload_needed) + kvm_load_fs(vmx->host_state.fs_sel); if (vmx->host_state.gs_ldt_reload_needed) { kvm_load_ldt(vmx->host_state.ldt_sel); + /* + * If we have to reload gs, we must take care to + * preserve our gs base. + */ + local_irq_save(flags); + kvm_load_gs(vmx->host_state.gs_sel); #ifdef CONFIG_X86_64 - load_gs_index(vmx->host_state.gs_sel); -#else - loadsegment(gs, vmx->host_state.gs_sel); + wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); #endif + local_irq_restore(flags); } - if (vmx->host_state.fs_reload_needed) - loadsegment(fs, vmx->host_state.fs_sel); reload_tss(); #ifdef CONFIG_X86_64 - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); + if (is_long_mode(&vmx->vcpu)) { + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); + } #endif - if (current_thread_info()->status & TS_USEDFPU) - clts(); - kvm_load_gdt(&__get_cpu_var(host_gdt)); } static void vmx_load_host_state(struct vcpu_vmx *vmx) @@ -936,47 +773,62 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx) static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); + u64 phys_addr = __pa(vmx->vmcs); + u64 tsc_this, delta, new_offset; - if (!vmm_exclusive) - kvm_cpu_vmxon(phys_addr); - else if (vcpu->cpu != cpu) + if (vcpu->cpu != cpu) { vcpu_clear(vmx); + kvm_migrate_timers(vcpu); + set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); + local_irq_disable(); + list_add(&vmx->local_vcpus_link, + &per_cpu(vcpus_on_cpu, cpu)); + local_irq_enable(); + } if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { + u8 error; + per_cpu(current_vmcs, cpu) = vmx->vmcs; - vmcs_load(vmx->vmcs); + asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" + : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) + : "cc"); + if (error) + printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", + vmx->vmcs, phys_addr); } if (vcpu->cpu != cpu) { - struct kvm_desc_ptr *gdt = &__get_cpu_var(host_gdt); + struct descriptor_table dt; unsigned long sysenter_esp; - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - local_irq_disable(); - list_add(&vmx->local_vcpus_link, - &per_cpu(vcpus_on_cpu, cpu)); - local_irq_enable(); - + vcpu->cpu = cpu; /* * Linux uses per-cpu TSS and GDT, so set these when switching * processors. */ vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ - vmcs_writel(HOST_GDTR_BASE, gdt->address); /* 22.2.4 */ + kvm_get_gdt(&dt); + vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ + + /* + * Make sure the time stamp counter is monotonous. + */ + rdtscll(tsc_this); + if (tsc_this < vcpu->arch.host_tsc) { + delta = vcpu->arch.host_tsc - tsc_this; + new_offset = vmcs_read64(TSC_OFFSET) + delta; + vmcs_write64(TSC_OFFSET, new_offset); + } } } static void vmx_vcpu_put(struct kvm_vcpu *vcpu) { __vmx_load_host_state(to_vmx(vcpu)); - if (!vmm_exclusive) { - __vcpu_clear(to_vmx(vcpu)); - kvm_cpu_vmxoff(); - } } static void vmx_fpu_activate(struct kvm_vcpu *vcpu) @@ -1035,9 +887,9 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) int ret = 0; if (interruptibility & GUEST_INTR_STATE_STI) - ret |= KVM_X86_SHADOW_INT_STI; + ret |= X86_SHADOW_INT_STI; if (interruptibility & GUEST_INTR_STATE_MOV_SS) - ret |= KVM_X86_SHADOW_INT_MOV_SS; + ret |= X86_SHADOW_INT_MOV_SS; return ret & mask; } @@ -1049,9 +901,9 @@ static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); - if (mask & KVM_X86_SHADOW_INT_MOV_SS) + if (mask & X86_SHADOW_INT_MOV_SS) interruptibility |= GUEST_INTR_STATE_MOV_SS; - else if (mask & KVM_X86_SHADOW_INT_STI) + if (mask & X86_SHADOW_INT_STI) interruptibility |= GUEST_INTR_STATE_STI; if ((interruptibility != interruptibility_old)) @@ -1070,20 +922,8 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) vmx_set_interrupt_shadow(vcpu, 0); } -static void vmx_clear_hlt(struct kvm_vcpu *vcpu) -{ - /* Ensure that we clear the HLT state in the VMCS. We don't need to - * explicitly skip the instruction because if the HLT state is set, then - * the instruction is already executing and RIP has already been - * advanced. */ - if (!yield_on_hlt && - vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) - vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); -} - static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, - bool has_error_code, u32 error_code, - bool reinject) + bool has_error_code, u32 error_code) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 intr_info = nr | INTR_INFO_VALID_MASK; @@ -1094,8 +934,16 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, } if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, nr) != EMULATE_DONE) - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + vmx->rmode.irq.pending = true; + vmx->rmode.irq.vector = nr; + vmx->rmode.irq.rip = kvm_rip_read(vcpu); + if (kvm_exception_is_soft(nr)) + vmx->rmode.irq.rip += + vmx->vcpu.arch.event_exit_inst_len; + intr_info |= INTR_TYPE_SOFT_INTR; + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); + kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); return; } @@ -1107,7 +955,6 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, intr_info |= INTR_TYPE_HARD_EXCEPTION; vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); - vmx_clear_hlt(vcpu); } static bool vmx_rdtscp_supported(void) @@ -1154,10 +1001,10 @@ static void setup_msrs(struct vcpu_vmx *vmx) if (index >= 0 && vmx->rdtscp_enabled) move_msr_up(vmx, index, save_nmsrs++); /* - * MSR_STAR is only needed on long mode guests, and only + * MSR_K6_STAR is only needed on long mode guests, and only * if efer.sce is enabled. */ - index = __find_msr_index(vmx, MSR_STAR); + index = __find_msr_index(vmx, MSR_K6_STAR); if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE)) move_msr_up(vmx, index, save_nmsrs++); } @@ -1192,17 +1039,12 @@ static u64 guest_read_tsc(void) } /* - * writes 'offset' into guest's timestamp counter offset register + * writes 'guest_tsc' into guest's timestamp counter "register" + * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc */ -static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) -{ - vmcs_write64(TSC_OFFSET, offset); -} - -static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment) +static void guest_write_tsc(u64 guest_tsc, u64 host_tsc) { - u64 offset = vmcs_read64(TSC_OFFSET); - vmcs_write64(TSC_OFFSET, offset + adjustment); + vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc); } /* @@ -1275,6 +1117,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct shared_msr_entry *msr; + u64 host_tsc; int ret = 0; switch (msr_index) { @@ -1304,7 +1147,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_TSC: - kvm_write_tsc(vcpu, data); + rdtscll(host_tsc); + guest_write_tsc(data, host_tsc); break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { @@ -1373,58 +1217,37 @@ static __init int vmx_disabled_by_bios(void) u64 msr; rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); - if (msr & FEATURE_CONTROL_LOCKED) { - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) - && kvm_tboot_enabled()) - return 1; - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) - && !kvm_tboot_enabled()) { - printk(KERN_WARNING "kvm: disable TXT in the BIOS or " - " activate TXT before enabling KVM\n"); - return 1; - } - } - - return 0; + return (msr & (FEATURE_CONTROL_LOCKED | + FEATURE_CONTROL_VMXON_ENABLED)) + == FEATURE_CONTROL_LOCKED; /* locked but not enabled */ } -static void kvm_cpu_vmxon(u64 addr) -{ - asm volatile (ASM_VMX_VMXON_RAX - : : "a"(&addr), "m"(addr) - : "memory", "cc"); -} - static int hardware_enable(void *garbage) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); - u64 old, test_bits; + u64 old; if (read_cr4() & X86_CR4_VMXE) return -EBUSY; INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); rdmsrl(MSR_IA32_FEATURE_CONTROL, old); - - test_bits = FEATURE_CONTROL_LOCKED; - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - if (kvm_tboot_enabled()) - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; - - if ((old & test_bits) != test_bits) { + if ((old & (FEATURE_CONTROL_LOCKED | + FEATURE_CONTROL_VMXON_ENABLED)) + != (FEATURE_CONTROL_LOCKED | + FEATURE_CONTROL_VMXON_ENABLED)) /* enable and lock */ - wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); - } + wrmsrl(MSR_IA32_FEATURE_CONTROL, old | + FEATURE_CONTROL_LOCKED | + FEATURE_CONTROL_VMXON_ENABLED); write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ + asm volatile (ASM_VMX_VMXON_RAX + : : "a"(&phys_addr), "m"(phys_addr) + : "memory", "cc"); - if (vmm_exclusive) { - kvm_cpu_vmxon(phys_addr); - ept_sync_global(); - } - - kvm_store_gdt(&__get_cpu_var(host_gdt)); + ept_sync_global(); return 0; } @@ -1446,15 +1269,13 @@ static void vmclear_local_vcpus(void) static void kvm_cpu_vmxoff(void) { asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); + write_cr4(read_cr4() & ~X86_CR4_VMXE); } static void hardware_disable(void *garbage) { - if (vmm_exclusive) { - vmclear_local_vcpus(); - kvm_cpu_vmxoff(); - } - write_cr4(read_cr4() & ~X86_CR4_VMXE); + vmclear_local_vcpus(); + kvm_cpu_vmxoff(); } static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, @@ -1476,14 +1297,6 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, return 0; } -static __init bool allow_1_setting(u32 msr, u32 ctl) -{ - u32 vmx_msr_low, vmx_msr_high; - - rdmsr(msr, vmx_msr_low, vmx_msr_high); - return vmx_msr_high & ctl; -} - static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) { u32 vmx_msr_low, vmx_msr_high; @@ -1500,7 +1313,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) &_pin_based_exec_control) < 0) return -EIO; - min = + min = CPU_BASED_HLT_EXITING | #ifdef CONFIG_X86_64 CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING | @@ -1513,10 +1326,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) CPU_BASED_MWAIT_EXITING | CPU_BASED_MONITOR_EXITING | CPU_BASED_INVLPG_EXITING; - - if (yield_on_hlt) - min |= CPU_BASED_HLT_EXITING; - opt = CPU_BASED_TPR_SHADOW | CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; @@ -1598,12 +1407,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) vmcs_conf->vmexit_ctrl = _vmexit_control; vmcs_conf->vmentry_ctrl = _vmentry_control; - cpu_has_load_ia32_efer = - allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS, - VM_ENTRY_LOAD_IA32_EFER) - && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS, - VM_EXIT_LOAD_IA32_EFER); - return 0; } @@ -1671,8 +1474,7 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_vpid()) enable_vpid = 0; - if (!cpu_has_vmx_ept() || - !cpu_has_vmx_ept_4levels()) { + if (!cpu_has_vmx_ept()) { enable_ept = 0; enable_unrestricted_guest = 0; } @@ -1760,8 +1562,8 @@ static gva_t rmode_tss_base(struct kvm *kvm) struct kvm_memslots *slots; gfn_t base_gfn; - slots = kvm_memslots(kvm); - base_gfn = slots->memslots[0].base_gfn + + slots = rcu_dereference(kvm->memslots); + base_gfn = kvm->memslots->memslots[0].base_gfn + kvm->memslots->memslots[0].npages - 3; return base_gfn << PAGE_SHIFT; } @@ -1777,13 +1579,9 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save) save->limit = vmcs_read32(sf->limit); save->ar = vmcs_read32(sf->ar_bytes); vmcs_write16(sf->selector, save->base >> 4); - vmcs_write32(sf->base, save->base & 0xffff0); + vmcs_write32(sf->base, save->base & 0xfffff); vmcs_write32(sf->limit, 0xffff); vmcs_write32(sf->ar_bytes, 0xf3); - if (save->base & 0xf) - printk_once(KERN_WARNING "kvm: segment base is not paragraph" - " aligned when entering protected mode (seg=%d)", - seg); } static void enter_rmode(struct kvm_vcpu *vcpu) @@ -1881,27 +1679,26 @@ static void enter_lmode(struct kvm_vcpu *vcpu) (guest_tr_ar & ~AR_TYPE_MASK) | AR_TYPE_BUSY_64_TSS); } - vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA); + vcpu->arch.efer |= EFER_LMA; + vmx_set_efer(vcpu, vcpu->arch.efer); } static void exit_lmode(struct kvm_vcpu *vcpu) { + vcpu->arch.efer &= ~EFER_LMA; + vmcs_write32(VM_ENTRY_CONTROLS, vmcs_read32(VM_ENTRY_CONTROLS) & ~VM_ENTRY_IA32E_MODE); - vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); } #endif static void vmx_flush_tlb(struct kvm_vcpu *vcpu) { - vpid_sync_context(to_vmx(vcpu)); - if (enable_ept) { - if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) - return; + vpid_sync_vcpu_all(to_vmx(vcpu)); + if (enable_ept) ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); - } } static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) @@ -1912,13 +1709,6 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; } -static void vmx_decache_cr3(struct kvm_vcpu *vcpu) -{ - if (enable_ept && is_paging(vcpu)) - vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); - __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); -} - static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) { ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; @@ -1934,20 +1724,20 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu) return; if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { - vmcs_write64(GUEST_PDPTR0, vcpu->arch.mmu.pdptrs[0]); - vmcs_write64(GUEST_PDPTR1, vcpu->arch.mmu.pdptrs[1]); - vmcs_write64(GUEST_PDPTR2, vcpu->arch.mmu.pdptrs[2]); - vmcs_write64(GUEST_PDPTR3, vcpu->arch.mmu.pdptrs[3]); + vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]); + vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]); + vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]); + vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]); } } static void ept_save_pdptrs(struct kvm_vcpu *vcpu) { if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { - vcpu->arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0); - vcpu->arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1); - vcpu->arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2); - vcpu->arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3); + vcpu->arch.pdptrs[0] = vmcs_read64(GUEST_PDPTR0); + vcpu->arch.pdptrs[1] = vmcs_read64(GUEST_PDPTR1); + vcpu->arch.pdptrs[2] = vmcs_read64(GUEST_PDPTR2); + vcpu->arch.pdptrs[3] = vmcs_read64(GUEST_PDPTR3); } __set_bit(VCPU_EXREG_PDPTR, @@ -1962,7 +1752,6 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, unsigned long cr0, struct kvm_vcpu *vcpu) { - vmx_decache_cr3(vcpu); if (!(cr0 & X86_CR0_PG)) { /* From paging/starting to nonpaging */ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, @@ -2043,7 +1832,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (enable_ept) { eptp = construct_eptp(cr3); vmcs_write64(EPT_POINTER, eptp); - guest_cr3 = is_paging(vcpu) ? kvm_read_cr3(vcpu) : + guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : vcpu->kvm->arch.ept_identity_map_addr; ept_load_pdptrs(vcpu); } @@ -2186,28 +1975,28 @@ static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) *l = (ar >> 13) & 1; } -static void vmx_get_idt(struct kvm_vcpu *vcpu, struct kvm_desc_ptr *dt) +static void vmx_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { - dt->size = vmcs_read32(GUEST_IDTR_LIMIT); - dt->address = vmcs_readl(GUEST_IDTR_BASE); + dt->limit = vmcs_read32(GUEST_IDTR_LIMIT); + dt->base = vmcs_readl(GUEST_IDTR_BASE); } -static void vmx_set_idt(struct kvm_vcpu *vcpu, struct kvm_desc_ptr *dt) +static void vmx_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { - vmcs_write32(GUEST_IDTR_LIMIT, dt->size); - vmcs_writel(GUEST_IDTR_BASE, dt->address); + vmcs_write32(GUEST_IDTR_LIMIT, dt->limit); + vmcs_writel(GUEST_IDTR_BASE, dt->base); } -static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct kvm_desc_ptr *dt) +static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { - dt->size = vmcs_read32(GUEST_GDTR_LIMIT); - dt->address = vmcs_readl(GUEST_GDTR_BASE); + dt->limit = vmcs_read32(GUEST_GDTR_LIMIT); + dt->base = vmcs_readl(GUEST_GDTR_BASE); } -static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct kvm_desc_ptr *dt) +static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { - vmcs_write32(GUEST_GDTR_LIMIT, dt->size); - vmcs_writel(GUEST_GDTR_BASE, dt->address); + vmcs_write32(GUEST_GDTR_LIMIT, dt->limit); + vmcs_writel(GUEST_GDTR_BASE, dt->base); } static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) @@ -2548,16 +2337,6 @@ static void allocate_vpid(struct vcpu_vmx *vmx) spin_unlock(&vmx_vpid_lock); } -static void free_vpid(struct vcpu_vmx *vmx) -{ - if (!enable_vpid) - return; - spin_lock(&vmx_vpid_lock); - if (vmx->vpid != 0) - __clear_bit(vmx->vpid, vmx_vpid_bitmap); - spin_unlock(&vmx_vpid_lock); -} - static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) { int f = sizeof(unsigned long); @@ -2594,9 +2373,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) { u32 host_sysenter_cs, msr_low, msr_high; u32 junk; - u64 host_pat; + u64 host_pat, tsc_this, tsc_base; unsigned long a; - struct kvm_desc_ptr dt; + struct descriptor_table dt; int i; unsigned long kvm_vmx_return; u32 exec_control; @@ -2655,15 +2434,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ - vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */ + vmcs_writel(HOST_CR0, read_cr0()); /* 22.2.3 */ vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ - vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ - vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ + vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs()); /* 22.2.4 */ + vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs()); /* 22.2.4 */ vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ #ifdef CONFIG_X86_64 rdmsrl(MSR_FS_BASE, a); @@ -2677,16 +2456,14 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ - kvm_native_store_idt(&dt); - vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ + kvm_get_idt(&dt); + vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); - vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); - vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest)); rdmsr(MSR_IA32_SYSENTER_CS, host_sysenter_cs, junk); vmcs_write32(HOST_IA32_SYSENTER_CS, host_sysenter_cs); @@ -2735,34 +2512,33 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); - kvm_write_tsc(&vmx->vcpu, 0); + tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; + rdtscll(tsc_this); + if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc) + tsc_base = tsc_this; + + guest_write_tsc(0, tsc_base); return 0; } static int init_rmode(struct kvm *kvm) { - int idx, ret = 0; - - idx = srcu_read_lock(&kvm->srcu); if (!init_rmode_tss(kvm)) - goto exit; + return 0; if (!init_rmode_identity_map(kvm)) - goto exit; - - ret = 1; -exit: - srcu_read_unlock(&kvm->srcu, idx); - return ret; + return 0; + return 1; } static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u64 msr; - int ret; + int ret, idx; vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); + idx = kvm_srcu_read_lock(&vcpu->kvm->srcu); if (!init_rmode(vmx->vcpu.kvm)) { ret = -ENOMEM; goto out; @@ -2779,9 +2555,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) msr |= MSR_IA32_APICBASE_BSP; kvm_set_apic_base(&vmx->vcpu, msr); - ret = fx_init(&vmx->vcpu); - if (ret != 0) - goto out; + fx_init(&vmx->vcpu); seg_setup(VCPU_SREG_CS); /* @@ -2831,7 +2605,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_IDTR_BASE, 0); vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); - vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); + vmcs_write32(GUEST_ACTIVITY_STATE, 0); vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); @@ -2864,7 +2638,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx_fpu_activate(&vmx->vcpu); update_exception_bitmap(&vmx->vcpu); - vpid_sync_context(vmx); + vpid_sync_vcpu_all(vmx); ret = 0; @@ -2872,6 +2646,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx->emulation_required = 0; out: + kvm_srcu_read_unlock(&vcpu->kvm->srcu, idx); return ret; } @@ -2893,10 +2668,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) return; } - if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { - enable_irq_window(vcpu); - return; - } cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); @@ -2912,8 +2683,16 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) ++vcpu->stat.irq_injections; if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, irq) != EMULATE_DONE) - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + vmx->rmode.irq.pending = true; + vmx->rmode.irq.vector = irq; + vmx->rmode.irq.rip = kvm_rip_read(vcpu); + if (vcpu->arch.interrupt.soft) + vmx->rmode.irq.rip += + vmx->vcpu.arch.event_exit_inst_len; + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK); + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); + kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); return; } intr = irq | INTR_INFO_VALID_MASK; @@ -2924,7 +2703,6 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) } else intr |= INTR_TYPE_EXT_INTR; vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); - vmx_clear_hlt(vcpu); } static void vmx_inject_nmi(struct kvm_vcpu *vcpu) @@ -2946,13 +2724,18 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) ++vcpu->stat.nmi_injections; if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR) != EMULATE_DONE) - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + vmx->rmode.irq.pending = true; + vmx->rmode.irq.vector = NMI_VECTOR; + vmx->rmode.irq.rip = kvm_rip_read(vcpu); + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + NMI_VECTOR | INTR_TYPE_SOFT_INTR | + INTR_INFO_VALID_MASK); + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); + kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); return; } vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); - vmx_clear_hlt(vcpu); } static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) @@ -2961,15 +2744,16 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) return 0; return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI - | GUEST_INTR_STATE_NMI)); + (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI)); } static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) { if (!cpu_has_virtual_nmis()) return to_vmx(vcpu)->soft_vnmi_blocked; - return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; + else + return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + GUEST_INTR_STATE_NMI); } static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) @@ -3023,7 +2807,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, * Cause the #SS fault with 0 error code in VM86 mode. */ if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) - if (emulate_instruction(vcpu, 0) == EMULATE_DONE) + if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE) return 1; /* * Forward all other exceptions that are valid in real mode. @@ -3120,7 +2904,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) } if (is_invalid_opcode(intr_info)) { - er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); + er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); return 1; @@ -3139,7 +2923,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (kvm_event_needs_reinjection(vcpu)) kvm_mmu_unprotect_page_virt(vcpu, cr2); - return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); + return kvm_mmu_page_fault(vcpu, cr2, error_code); } if (vmx->rmode.vm86_active && @@ -3204,20 +2988,22 @@ static int handle_io(struct kvm_vcpu *vcpu) int size, in, string; unsigned port; + ++vcpu->stat.io_exits; exit_qualification = vmcs_readl(EXIT_QUALIFICATION); string = (exit_qualification & 16) != 0; - in = (exit_qualification & 8) != 0; - ++vcpu->stat.io_exits; - - if (string || in) - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + if (string) { + if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO) + return 0; + return 1; + } - port = exit_qualification >> 16; size = (exit_qualification & 7) + 1; - skip_emulated_instruction(vcpu); + in = (exit_qualification & 8) != 0; + port = exit_qualification >> 16; - return kvm_fast_pio_out(vcpu, size, port); + skip_emulated_instruction(vcpu); + return kvm_emulate_pio(vcpu, in, size, port); } static void @@ -3236,7 +3022,6 @@ static int handle_cr(struct kvm_vcpu *vcpu) unsigned long exit_qualification, val; int cr; int reg; - int err; exit_qualification = vmcs_readl(EXIT_QUALIFICATION); cr = exit_qualification & 15; @@ -3247,22 +3032,22 @@ static int handle_cr(struct kvm_vcpu *vcpu) trace_kvm_cr_write(cr, val); switch (cr) { case 0: - err = kvm_set_cr0(vcpu, val); - kvm_complete_insn_gp(vcpu, err); + kvm_set_cr0(vcpu, val); + skip_emulated_instruction(vcpu); return 1; case 3: - err = kvm_set_cr3(vcpu, val); - kvm_complete_insn_gp(vcpu, err); + kvm_set_cr3(vcpu, val); + skip_emulated_instruction(vcpu); return 1; case 4: - err = kvm_set_cr4(vcpu, val); - kvm_complete_insn_gp(vcpu, err); + kvm_set_cr4(vcpu, val); + skip_emulated_instruction(vcpu); return 1; case 8: { u8 cr8_prev = kvm_get_cr8(vcpu); u8 cr8 = kvm_register_read(vcpu, reg); - err = kvm_set_cr8(vcpu, cr8); - kvm_complete_insn_gp(vcpu, err); + kvm_set_cr8(vcpu, cr8); + skip_emulated_instruction(vcpu); if (irqchip_in_kernel(vcpu->kvm)) return 1; if (cr8_prev <= cr8) @@ -3281,9 +3066,8 @@ static int handle_cr(struct kvm_vcpu *vcpu) case 1: /*mov from cr*/ switch (cr) { case 3: - val = kvm_read_cr3(vcpu); - kvm_register_write(vcpu, reg, val); - trace_kvm_cr_read(cr, val); + kvm_register_write(vcpu, reg, vcpu->arch.cr3); + trace_kvm_cr_read(cr, vcpu->arch.cr3); skip_emulated_instruction(vcpu); return 1; case 8: @@ -3310,9 +3094,19 @@ static int handle_cr(struct kvm_vcpu *vcpu) return 0; } +static int check_dr_alias(struct kvm_vcpu *vcpu) +{ + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return -1; + } + return 0; +} + static int handle_dr(struct kvm_vcpu *vcpu) { unsigned long exit_qualification; + unsigned long val; int dr, reg; /* Do not handle if the CPL > 0, will trigger GP on re-entry */ @@ -3347,20 +3141,67 @@ static int handle_dr(struct kvm_vcpu *vcpu) dr = exit_qualification & DEBUG_REG_ACCESS_NUM; reg = DEBUG_REG_ACCESS_REG(exit_qualification); if (exit_qualification & TYPE_MOV_FROM_DR) { - unsigned long val; - if (!kvm_get_dr(vcpu, dr, &val)) - kvm_register_write(vcpu, reg, val); - } else - kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); + switch (dr) { + case 0 ... 3: + val = vcpu->arch.db[dr]; + break; + case 4: + if (check_dr_alias(vcpu) < 0) + return 1; + /* fall through */ + case 6: + val = vcpu->arch.dr6; + break; + case 5: + if (check_dr_alias(vcpu) < 0) + return 1; + /* fall through */ + default: /* 7 */ + val = vcpu->arch.dr7; + break; + } + kvm_register_write(vcpu, reg, val); + } else { + val = vcpu->arch.regs[reg]; + switch (dr) { + case 0 ... 3: + vcpu->arch.db[dr] = val; + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) + vcpu->arch.eff_db[dr] = val; + break; + case 4: + if (check_dr_alias(vcpu) < 0) + return 1; + /* fall through */ + case 6: + if (val & 0xffffffff00000000ULL) { + kvm_inject_gp(vcpu, 0); + return 1; + } + vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; + break; + case 5: + if (check_dr_alias(vcpu) < 0) + return 1; + /* fall through */ + default: /* 7 */ + if (val & 0xffffffff00000000ULL) { + kvm_inject_gp(vcpu, 0); + return 1; + } + vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { + vmcs_writel(GUEST_DR7, vcpu->arch.dr7); + vcpu->arch.switch_db_regs = + (val & DR7_BP_EN_MASK); + } + break; + } + } skip_emulated_instruction(vcpu); return 1; } -static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) -{ - vmcs_writel(GUEST_DR7, val); -} - static int handle_cpuid(struct kvm_vcpu *vcpu) { kvm_emulate_cpuid(vcpu); @@ -3406,7 +3247,6 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu) static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) { - kvm_make_request(KVM_REQ_EVENT, vcpu); return 1; } @@ -3419,8 +3259,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu) cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); - kvm_make_request(KVM_REQ_EVENT, vcpu); - ++vcpu->stat.irq_window_exits; /* @@ -3455,11 +3293,6 @@ static int handle_vmx_insn(struct kvm_vcpu *vcpu) return 1; } -static int handle_invd(struct kvm_vcpu *vcpu) -{ - return emulate_instruction(vcpu, 0) == EMULATE_DONE; -} - static int handle_invlpg(struct kvm_vcpu *vcpu) { unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); @@ -3472,31 +3305,34 @@ static int handle_invlpg(struct kvm_vcpu *vcpu) static int handle_wbinvd(struct kvm_vcpu *vcpu) { skip_emulated_instruction(vcpu); - kvm_emulate_wbinvd(vcpu); + /* TODO: Add support for VT-d/pass-through device */ return 1; } -static int handle_xsetbv(struct kvm_vcpu *vcpu) +static int handle_apic_access(struct kvm_vcpu *vcpu) { - u64 new_bv = kvm_read_edx_eax(vcpu); - u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX); + unsigned long exit_qualification; + enum emulation_result er; + unsigned long offset; - if (kvm_set_xcr(vcpu, index, new_bv) == 0) - skip_emulated_instruction(vcpu); - return 1; -} + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + offset = exit_qualification & 0xffful; -static int handle_apic_access(struct kvm_vcpu *vcpu) -{ - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + er = emulate_instruction(vcpu, 0, 0, 0); + + if (er != EMULATE_DONE) { + printk(KERN_ERR + "Fail to handle apic access vmexit! Offset is 0x%lx\n", + offset); + return -ENOEXEC; + } + return 1; } static int handle_task_switch(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qualification; - bool has_error_code = false; - u32 error_code = 0; u16 tss_selector; int reason, type, idt_v; @@ -3519,13 +3355,6 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) kvm_clear_interrupt_queue(vcpu); break; case INTR_TYPE_HARD_EXCEPTION: - if (vmx->idt_vectoring_info & - VECTORING_INFO_DELIVER_CODE_MASK) { - has_error_code = true; - error_code = - vmcs_read32(IDT_VECTORING_ERROR_CODE); - } - /* fall through */ case INTR_TYPE_SOFT_EXCEPTION: kvm_clear_exception_queue(vcpu); break; @@ -3540,13 +3369,8 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) type != INTR_TYPE_NMI_INTR)) skip_emulated_instruction(vcpu); - if (kvm_task_switch(vcpu, tss_selector, reason, - has_error_code, error_code) == EMULATE_FAIL) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; + if (!kvm_task_switch(vcpu, tss_selector, reason)) return 0; - } /* clear all local breakpoint enable flags */ vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); @@ -3587,7 +3411,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); trace_kvm_page_fault(gpa, exit_qualification); - return kvm_mmu_page_fault(vcpu, gpa, exit_qualification & 0x3, NULL, 0); + return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); } static u64 ept_rsvd_mask(u64 spte, int level) @@ -3682,7 +3506,6 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu) cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); ++vcpu->stat.nmi_window_exits; - kvm_make_request(KVM_REQ_EVENT, vcpu); return 1; } @@ -3692,26 +3515,22 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); enum emulation_result err = EMULATE_DONE; int ret = 1; - u32 cpu_exec_ctrl; - bool intr_window_requested; - - cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; while (!guest_state_valid(vcpu)) { - if (intr_window_requested - && (kvm_get_rflags(&vmx->vcpu) & X86_EFLAGS_IF)) - return handle_interrupt_window(&vmx->vcpu); - - err = emulate_instruction(vcpu, 0); + err = emulate_instruction(vcpu, 0, 0, 0); if (err == EMULATE_DO_MMIO) { ret = 0; goto out; } - if (err != EMULATE_DONE) - return 0; + if (err != EMULATE_DONE) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + ret = 0; + goto out; + } if (signal_pending(current)) goto out; @@ -3760,7 +3579,6 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_MSR_WRITE] = handle_wrmsr, [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, [EXIT_REASON_HLT] = handle_halt, - [EXIT_REASON_INVD] = handle_invd, [EXIT_REASON_INVLPG] = handle_invlpg, [EXIT_REASON_VMCALL] = handle_vmcall, [EXIT_REASON_VMCLEAR] = handle_vmx_insn, @@ -3775,7 +3593,6 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, [EXIT_REASON_WBINVD] = handle_wbinvd, - [EXIT_REASON_XSETBV] = handle_xsetbv, [EXIT_REASON_TASK_SWITCH] = handle_task_switch, [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, @@ -3788,12 +3605,6 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { static const int kvm_vmx_max_exit_handlers = ARRAY_SIZE(kvm_vmx_exit_handlers); -static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) -{ - *info1 = vmcs_readl(EXIT_QUALIFICATION); - *info2 = vmcs_read32(VM_EXIT_INTR_INFO); -} - /* * The guest has exited. See if we can fix it or if we need userspace * assistance. @@ -3804,18 +3615,16 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) u32 exit_reason = vmx->exit_reason; u32 vectoring_info = vmx->idt_vectoring_info; - trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); + trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); /* If guest state is invalid, start emulating */ if (vmx->emulation_required && emulate_invalid_guest_state) return handle_invalid_guest_state(vcpu); - if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { - vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; - vcpu->run->fail_entry.hardware_entry_failure_reason - = exit_reason; - return 0; - } + /* Access CR3 don't cause VMExit in paging mode, so we need + * to sync with guest real CR3. */ + if (enable_ept && is_paging(vcpu)) + vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); if (unlikely(vmx->fail)) { vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; @@ -3870,9 +3679,18 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) vmcs_write32(TPR_THRESHOLD, irr); } -static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) +static void vmx_complete_interrupts(struct vcpu_vmx *vmx) { - u32 exit_intr_info = vmx->exit_intr_info; + u32 exit_intr_info; + u32 idt_vectoring_info = vmx->idt_vectoring_info; + bool unblock_nmi; + u8 vector; + int type; + bool idtv_info_valid; + + exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); /* Handle machine checks before interrupts are enabled */ if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) @@ -3882,21 +3700,10 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) /* We need to handle NMIs before interrupts are enabled */ if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && - (exit_intr_info & INTR_INFO_VALID_MASK)) { - kvm_before_handle_nmi(&vmx->vcpu); + (exit_intr_info & INTR_INFO_VALID_MASK)) asm("int $2"); - kvm_after_handle_nmi(&vmx->vcpu); - } -} - -static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) -{ - u32 exit_intr_info = vmx->exit_intr_info; - bool unblock_nmi; - u8 vector; - bool idtv_info_valid; - idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; + idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; if (cpu_has_virtual_nmis()) { unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; @@ -3918,18 +3725,6 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) } else if (unlikely(vmx->soft_vnmi_blocked)) vmx->vnmi_blocked_time += ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); -} - -static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, - u32 idt_vectoring_info, - int instr_len_field, - int error_code_field) -{ - u8 vector; - int type; - bool idtv_info_valid; - - idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; vmx->vcpu.arch.nmi_injected = false; kvm_clear_exception_queue(&vmx->vcpu); @@ -3938,8 +3733,6 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, if (!idtv_info_valid) return; - kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu); - vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; @@ -3956,18 +3749,18 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, break; case INTR_TYPE_SOFT_EXCEPTION: vmx->vcpu.arch.event_exit_inst_len = - vmcs_read32(instr_len_field); + vmcs_read32(VM_EXIT_INSTRUCTION_LEN); /* fall through */ case INTR_TYPE_HARD_EXCEPTION: if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { - u32 err = vmcs_read32(error_code_field); + u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE); kvm_queue_exception_e(&vmx->vcpu, vector, err); } else kvm_queue_exception(&vmx->vcpu, vector); break; case INTR_TYPE_SOFT_INTR: vmx->vcpu.arch.event_exit_inst_len = - vmcs_read32(instr_len_field); + vmcs_read32(VM_EXIT_INSTRUCTION_LEN); /* fall through */ case INTR_TYPE_EXT_INTR: kvm_queue_interrupt(&vmx->vcpu, vector, @@ -3978,21 +3771,27 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, } } -static void vmx_complete_interrupts(struct vcpu_vmx *vmx) -{ - __vmx_complete_interrupts(vmx, vmx->idt_vectoring_info, - VM_EXIT_INSTRUCTION_LEN, - IDT_VECTORING_ERROR_CODE); -} - -static void vmx_cancel_injection(struct kvm_vcpu *vcpu) +/* + * Failure to inject an interrupt should give us the information + * in IDT_VECTORING_INFO_FIELD. However, if the failure occurs + * when fetching the interrupt redirection bitmap in the real-mode + * tss, this doesn't happen. So we do it ourselves. + */ +static void fixup_rmode_irq(struct vcpu_vmx *vmx) { - __vmx_complete_interrupts(to_vmx(vcpu), - vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), - VM_ENTRY_INSTRUCTION_LEN, - VM_ENTRY_EXCEPTION_ERROR_CODE); - - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); + vmx->rmode.irq.pending = 0; + if (kvm_rip_read(&vmx->vcpu) + 1 != vmx->rmode.irq.rip) + return; + kvm_rip_write(&vmx->vcpu, vmx->rmode.irq.rip); + if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) { + vmx->idt_vectoring_info &= ~VECTORING_INFO_TYPE_MASK; + vmx->idt_vectoring_info |= INTR_TYPE_EXT_INTR; + return; + } + vmx->idt_vectoring_info = + VECTORING_INFO_VALID_MASK + | INTR_TYPE_EXT_INTR + | vmx->rmode.irq.vector; } #ifdef CONFIG_X86_64 @@ -4029,6 +3828,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) vmx_set_interrupt_shadow(vcpu, 0); + /* + * Loading guest fpu may have cleared host cr0.ts + */ + vmcs_writel(HOST_CR0, read_cr0()); + asm( /* Store host registers */ "push %%"R"dx; push %%"R"bp;" @@ -4119,27 +3923,23 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)) : "cc", "memory" - , R"ax", R"bx", R"di", R"si" + , R"bx", R"di", R"si" #ifdef CONFIG_X86_64 , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" #endif ); vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) - | (1 << VCPU_EXREG_PDPTR) - | (1 << VCPU_EXREG_CR3)); + | (1 << VCPU_EXREG_PDPTR)); vcpu->arch.regs_dirty = 0; vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); + if (vmx->rmode.irq.pending) + fixup_rmode_irq(vmx); asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); vmx->launched = 1; - vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - - vmx_complete_atomic_exit(vmx); - vmx_recover_nmi_blocking(vmx); vmx_complete_interrupts(vmx); } @@ -4161,26 +3961,16 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - free_vpid(vmx); + spin_lock(&vmx_vpid_lock); + if (vmx->vpid != 0) + __clear_bit(vmx->vpid, vmx_vpid_bitmap); + spin_unlock(&vmx_vpid_lock); vmx_free_vmcs(vcpu); kfree(vmx->guest_msrs); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vmx); } -static inline void vmcs_init(struct vmcs *vmcs) -{ - u64 phys_addr = __pa(per_cpu(vmxarea, raw_smp_processor_id())); - - if (!vmm_exclusive) - kvm_cpu_vmxon(phys_addr); - - vmcs_clear(vmcs); - - if (!vmm_exclusive) - kvm_cpu_vmxoff(); -} - static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) { int err; @@ -4206,11 +3996,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx->vmcs) goto free_msrs; - vmcs_init(vmx->vmcs); + vmcs_clear(vmx->vmcs); cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); - vmx->vcpu.cpu = cpu; err = vmx_vcpu_setup(vmx); vmx_vcpu_put(&vmx->vcpu); put_cpu(); @@ -4237,7 +4026,6 @@ free_msrs: uninit_vcpu: kvm_vcpu_uninit(&vmx->vcpu); free_vcpu: - free_vpid(vmx); kmem_cache_free(kvm_vcpu_cache, vmx); return ERR_PTR(err); } @@ -4341,6 +4129,11 @@ static int vmx_get_lpage_level(void) return PT_PDPE_LEVEL; } +static inline u32 bit(int bitno) +{ + return 1 << (bitno & 31); +} + static void vmx_cpuid_update(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; @@ -4363,10 +4156,6 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) } } -static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) -{ -} - static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -4394,7 +4183,6 @@ static struct kvm_x86_ops vmx_x86_ops = { .get_cpl = vmx_get_cpl, .get_cs_db_l_bits = vmx_get_cs_db_l_bits, .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, - .decache_cr3 = vmx_decache_cr3, .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, .set_cr0 = vmx_set_cr0, .set_cr3 = vmx_set_cr3, @@ -4404,7 +4192,6 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_idt = vmx_set_idt, .get_gdt = vmx_get_gdt, .set_gdt = vmx_set_gdt, - .set_dr7 = vmx_set_dr7, .cache_reg = vmx_cache_reg, .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, @@ -4422,7 +4209,6 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_irq = vmx_inject_irq, .set_nmi = vmx_inject_nmi, .queue_exception = vmx_queue_exception, - .cancel_injection = vmx_cancel_injection, .interrupt_allowed = vmx_interrupt_allowed, .nmi_allowed = vmx_nmi_allowed, .get_nmi_mask = vmx_get_nmi_mask, @@ -4435,23 +4221,12 @@ static struct kvm_x86_ops vmx_x86_ops = { .get_tdp_level = get_ept_level, .get_mt_mask = vmx_get_mt_mask, - .get_exit_info = vmx_get_exit_info, .exit_reasons_str = vmx_exit_reasons_str, - .get_lpage_level = vmx_get_lpage_level, .cpuid_update = vmx_cpuid_update, .rdtscp_supported = vmx_rdtscp_supported, - - .set_supported_cpuid = vmx_set_supported_cpuid, - - .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, - - .write_tsc_offset = vmx_write_tsc_offset, - .adjust_tsc_offset = vmx_adjust_tsc_offset, - - .set_tdp_cr3 = vmx_set_cr3, }; static int __init vmx_init(void) @@ -4499,8 +4274,7 @@ static int __init vmx_init(void) set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ - r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), - __alignof__(struct vcpu_vmx), THIS_MODULE); + r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); if (r) goto out3; @@ -4513,6 +4287,8 @@ static int __init vmx_init(void) if (enable_ept) { bypass_guest_pf = 0; + kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | + VMX_EPT_WRITABLE_MASK); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, VMX_EPT_EXECUTABLE_MASK); kvm_enable_tdp(); |