diff options
-rw-r--r-- | ioapic.h | 6 | ||||
-rw-r--r-- | kvm.c | 612 | ||||
-rw-r--r-- | kvm.h | 137 | ||||
-rw-r--r-- | kvm_lapic.c | 1278 | ||||
-rw-r--r-- | kvm_lapic.h | 48 | ||||
-rw-r--r-- | kvm_x86.c | 813 |
6 files changed, 1446 insertions, 1448 deletions
@@ -60,10 +60,6 @@ static struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) return kvm->arch.vioapic; } -#ifdef XXX -int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, int dest, int dest_mode); -int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_destroy(struct kvm *kvm); @@ -73,5 +69,5 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq); int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); -#endif /*XXX*/ + #endif @@ -37,6 +37,7 @@ #include "kvm_host.h" #include "kvm_x86host.h" #include "kvm_i8254.h" +#include "kvm_lapic.h" #include "processor-flags.h" #include "hyperv.h" #include "apicdef.h" @@ -203,20 +204,14 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu); void vmx_fpu_activate(struct kvm_vcpu *vcpu); void kvm_set_pfn_dirty(pfn_t); -extern int irqchip_in_kernel(struct kvm *kvm); extern void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); -extern void kvm_set_apic_base(struct kvm_vcpu *vcpu, uint64_t data); extern void kvm_release_pfn_dirty(pfn_t pfn); extern void kvm_release_pfn_clean(pfn_t pfn); extern void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); extern int mmu_topup_memory_caches(struct kvm_vcpu *vcpu); -extern int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, - struct kvm_lapic_irq *irq); static int hardware_enable_all(void); static void hardware_disable_all(void); extern int sigprocmask(int, const sigset_t *, sigset_t *); -extern void start_apic_timer(struct kvm_lapic *); -extern void update_divide_count(struct kvm_lapic *); extern void cli(void); extern void sti(void); static void kvm_destroy_vm(struct kvm *); @@ -823,135 +818,6 @@ vmx_fpu_deactivate(struct kvm_vcpu *vcpu) vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); } -#define MSR_EFER 0xc0000080 /* extended feature register */ - -/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ -#define X86_FEATURE_FPU (0 * 32 + 0) /* Onboard FPU */ -#define X86_FEATURE_VME (0 * 32 + 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE (0 * 32 + 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE (0 * 32 + 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC (0 * 32 + 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR (0 * 32 + 5) /* Model-Specific Registers */ -#define X86_FEATURE_PAE (0 * 32 + 6) /* Phys. Address Extensions */ -#define X86_FEATURE_MCE (0 * 32 + 7) /* Machine Check Exception */ -#define X86_FEATURE_CX8 (0 * 32 + 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC (0 * 32 + 9) /* Onboard APIC */ -#define X86_FEATURE_SEP (0 * 32 + 11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR (0 * 32 + 12) /* Memory Type Range Regs. */ -#define X86_FEATURE_PGE (0 * 32 + 13) /* Page Global Enable */ -#define X86_FEATURE_MCA (0 * 32 + 14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV (0 * 32 + 15) /* CMOV instructions */ - /* (+ FCMOVcc, FCOMI w/ FPU) */ -#define X86_FEATURE_PAT (0 * 32 + 16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 (0 * 32 + 17) /* 36-bit PSEs */ -#define X86_FEATURE_PN (0 * 32 + 18) /* Processor serial number */ -#define X86_FEATURE_CLFLSH (0 * 32 + 19) /* "clflush" instruction */ -#define X86_FEATURE_DS (0 * 32 + 21) /* "dts" Debug Store */ -#define X86_FEATURE_ACPI (0 * 32 + 22) /* ACPI via MSR */ -#define X86_FEATURE_MMX (0 * 32 + 23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR (0 * 32 + 24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ -#define X86_FEATURE_XMM (0 * 32 + 25) /* "sse" */ -#define X86_FEATURE_XMM2 (0 * 32 + 26) /* "sse2" */ -#define X86_FEATURE_SELFSNOOP (0 * 32 + 27) /* "ss" CPU self snoop */ -#define X86_FEATURE_HT (0 * 32 + 28) /* Hyper-Threading */ -#define X86_FEATURE_ACC (0 * 32 + 29) /* "tm" Auto. clock control */ -#define X86_FEATURE_IA64 (0 * 32 + 30) /* IA-64 processor */ -#define X86_FEATURE_PBE (0 * 32 + 31) /* Pending Break Enable */ - -/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ -/* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL (1 * 32 + 11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP (1 * 32 + 19) /* MP Capable. */ -#define X86_FEATURE_NX (1 * 32 + 20) /* Execute Disable */ -#define X86_FEATURE_MMXEXT (1 * 32 + 22) /* AMD MMX extensions */ -#define X86_FEATURE_FXSR_OPT (1 * 32 + 25) /* FXSAVE/FXRSTOR optimiztns */ -#define X86_FEATURE_GBPAGES (1 * 32 + 26) /* "pdpe1gb" GB pages */ -#define X86_FEATURE_RDTSCP (1 * 32 + 27) /* RDTSCP */ -#define X86_FEATURE_LM (1 * 32 + 29) /* Long Mode (x86-64) */ -#define X86_FEATURE_3DNOWEXT (1 * 32 + 30) /* AMD 3DNow! extensions */ -#define X86_FEATURE_3DNOW (1 * 32 + 31) /* 3DNow! */ - -/* cpu types for specific tunings: */ -#define X86_FEATURE_K8 (3 * 32 + 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_K7 (3 * 32 + 5) /* "" Athlon */ -#define X86_FEATURE_P3 (3 * 32 + 6) /* "" P3 */ -#define X86_FEATURE_P4 (3 * 32 + 7) /* "" P4 */ -#define X86_FEATURE_CONSTANT_TSC (3 * 32 + 8) /* TSC ticks at constant rate */ -#define X86_FEATURE_UP (3 * 32 + 9) /* smp kernel running on up */ -#define X86_FEATURE_FXSAVE_LEAK (3 * 32 + 10) /* FXSAVE leaks FOP/FIP/FOP */ -#define X86_FEATURE_ARCH_PERFMON (3 * 32 + 11) /* Intel Arch. PerfMon */ -#define X86_FEATURE_PEBS (3 * 32 + 12) /* Precise-Event Based Smplng */ -#define X86_FEATURE_BTS (3 * 32 + 13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 (3 * 32 + 14) /* syscall in ia32 userspace */ -#define X86_FEATURE_SYSENTER32 (3 * 32 + 15) /* sysenter in ia32 userspace */ -#define X86_FEATURE_REP_GOOD (3 * 32 + 16) /* rep microcode works well */ -#define X86_FEATURE_MFENCE_RDTSC (3 * 32 + 17) /* Mfence synchronizes RDTSC */ -#define X86_FEATURE_LFENCE_RDTSC (3 * 32 + 18) /* Lfence synchronizes RDTSC */ -#define X86_FEATURE_11AP (3 * 32 + 19) /* Bad local APIC aka 11AP */ -#define X86_FEATURE_NOPL (3 * 32 + 20) /* NOPL (0F 1F) instructions */ -#define X86_FEATURE_AMDC1E (3 * 32 + 21) /* AMD C1E detected */ -#define X86_FEATURE_XTOPOLOGY (3 * 32 + 22) /* topology enum extensions */ -#define X86_FEATURE_TSC_RELIABLE (3 * 32 + 23) /* TSC is reliable */ -#define X86_FEATURE_NONSTOP_TSC (3 * 32 + 24) /* TSC continues in C states */ -#define X86_FEATURE_CLFLUSH_MONITOR (3 * 32 + 25) /* clflush reqd w/ monitor */ -#define X86_FEATURE_EXTD_APICID (3 * 32 + 26) /* extended APICID (8 bits) */ -#define X86_FEATURE_AMD_DCM (3 * 32 + 27) /* multi-node processor */ -#define X86_FEATURE_APERFMPERF (3 * 32 + 28) /* APERFMPERF */ - -/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ -#define X86_FEATURE_XMM3 (4 * 32 + 0) /* "pni" SSE-3 */ -#define X86_FEATURE_PCLMULQDQ (4 * 32 + 1) /* PCLMULQDQ instruction */ -#define X86_FEATURE_DTES64 (4 * 32 + 2) /* 64-bit Debug Store */ -#define X86_FEATURE_MWAIT (4 * 32 + 3) /* "monitor" Monitor/Mwait */ -#define X86_FEATURE_DSCPL (4 * 32 + 4) /* ds_cpl CPL Qual Debug Str */ -#define X86_FEATURE_VMX (4 * 32 + 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX (4 * 32 + 6) /* Safer mode */ -#define X86_FEATURE_EST (4 * 32 + 7) /* Enhanced SpeedStep */ -#define X86_FEATURE_TM2 (4 * 32 + 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_SSSE3 (4 * 32 + 9) /* Supplemental SSE-3 */ -#define X86_FEATURE_CID (4 * 32 + 10) /* Context ID */ -#define X86_FEATURE_FMA (4 * 32 + 12) /* Fused multiply-add */ -#define X86_FEATURE_CX16 (4 * 32 + 13) /* CMPXCHG16B */ -#define X86_FEATURE_XTPR (4 * 32 + 14) /* Send Task Priority Msgs */ -#define X86_FEATURE_PDCM (4 * 32 + 15) /* Performance Capabilities */ -#define X86_FEATURE_DCA (4 * 32 + 18) /* Direct Cache Access */ -#define X86_FEATURE_XMM4_1 (4 * 32 + 19) /* "sse4_1" SSE-4.1 */ -#define X86_FEATURE_XMM4_2 (4 * 32 + 20) /* "sse4_2" SSE-4.2 */ -#define X86_FEATURE_X2APIC (4 * 32 + 21) /* x2APIC */ -#define X86_FEATURE_MOVBE (4 * 32 + 22) /* MOVBE instruction */ -#define X86_FEATURE_POPCNT (4 * 32 + 23) /* POPCNT instruction */ -#define X86_FEATURE_AES (4 * 32 + 25) /* AES instructions */ -#define X86_FEATURE_XSAVE (4 * 32 + 26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ -#define X86_FEATURE_OSXSAVE (4 * 32 + 27) /* "" XSAVE enabled in the OS */ -#define X86_FEATURE_AVX (4 * 32 + 28) /* Advanced Vector Extensions */ -#define X86_FEATURE_HYPERVISOR (4 * 32 + 31) /* Running on a hypervisor */ - -/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ -#define X86_FEATURE_LAHF_LM (6 * 32 + 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY (6 * 32 + 1) /* HyperThreading invalid */ -#define X86_FEATURE_SVM (6 * 32 + 2) /* Secure virtual machine */ -#define X86_FEATURE_EXTAPIC (6 * 32 + 3) /* Extended APIC space */ -#define X86_FEATURE_CR8_LEGACY (6 * 32 + 4) /* CR8 in 32-bit mode */ -#define X86_FEATURE_ABM (6 * 32 + 5) /* Advanced bit manipulation */ -#define X86_FEATURE_SSE4A (6 * 32 + 6) /* SSE-4A */ -#define X86_FEATURE_MISALIGNSSE (6 * 32 + 7) /* Misaligned SSE mode */ -#define X86_FEATURE_3DNOWPREFETCH (6 * 32 + 8) /* 3DNow prefetch */ -#define X86_FEATURE_OSVW (6 * 32 + 9) /* OS Visible Workaround */ -#define X86_FEATURE_IBS (6 * 32 + 10) /* Instruction Based Sampling */ -#define X86_FEATURE_SSE5 (6 * 32 + 11) /* SSE-5 */ -#define X86_FEATURE_SKINIT (6 * 32 + 12) /* SKINIT/STGI instructions */ -#define X86_FEATURE_WDT (6 * 32 + 13) /* Watchdog timer */ -#define X86_FEATURE_NODEID_MSR (6 * 32 + 19) /* NodeId MSR */ - -/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY (2 * 32 + 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN (2 * 32 + 1) /* Longrun power control */ -#define X86_FEATURE_LRTI (2 * 32 + 3) /* LongRun table interface */ - - -struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, - uint32_t function, uint32_t index); - static inline uint32_t bit(int bitno) { @@ -5383,25 +5249,6 @@ kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function, uint32_t index) /* 14 is the version for Xeon and Pentium 8.4.8 */ #define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16)) -extern void apic_set_reg(struct kvm_lapic *apic, int reg_off, uint32_t val); - -void -kvm_apic_set_version(struct kvm_vcpu *vcpu) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - struct kvm_cpuid_entry2 *feat; - uint32_t v = APIC_VERSION; - - if (!irqchip_in_kernel(vcpu->kvm)) - return; - - feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); - if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) - v |= APIC_LVR_DIRECTED_EOI; - apic_set_reg(apic, APIC_LVR, v); -} - - static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid) { @@ -5744,22 +5591,6 @@ kvm_read_cr0(struct kvm_vcpu *vcpu) return (kvm_read_cr0_bits(vcpu, ~0UL)); } -extern inline uint32_t apic_get_reg(struct kvm_lapic *apic, int reg_off); - -uint64_t -kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - uint64_t tpr; - - if (apic == NULL) - return (0); - - tpr = (uint64_t)apic_get_reg(apic, APIC_TASKPRI); - - return ((tpr & 0xf0) >> 4); -} - unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) { @@ -5770,8 +5601,6 @@ kvm_get_cr8(struct kvm_vcpu *vcpu) } } -extern uint64_t kvm_get_apic_base(struct kvm_vcpu *vcpu); - int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { @@ -5947,94 +5776,6 @@ vmx_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) vmcs_write32(TPR_THRESHOLD, irr); } -static int -fls(int x) -{ - int r = 32; - - if (!x) - return (0); - - if (!(x & 0xffff0000u)) { - x <<= 16; - r -= 16; - } - if (!(x & 0xff000000u)) { - x <<= 8; - r -= 8; - } - if (!(x & 0xf0000000u)) { - x <<= 4; - r -= 4; - } - if (!(x & 0xc0000000u)) { - x <<= 2; - r -= 2; - } - if (!(x & 0x80000000u)) { - x <<= 1; - r -= 1; - } - - return (r); -} - -static int -find_highest_vector(void *bitmap) -{ - uint32_t *word = bitmap; - int word_offset = MAX_APIC_VECTOR >> 5; - - while ((word_offset != 0) && (word[(--word_offset) << 2] == 0)) - continue; - - if (!word_offset && !word[0]) - return (-1); - else - return (fls(word[word_offset << 2]) - 1 + (word_offset << 5)); -} - -static inline int -apic_search_irr(struct kvm_lapic *apic) -{ - return (find_highest_vector((void *)((uintptr_t)apic->regs + - APIC_IRR))); -} - -static inline int -apic_find_highest_irr(struct kvm_lapic *apic) -{ - int result; - - if (!apic->irr_pending) - return (-1); - - result = apic_search_irr(apic); - ASSERT(result == -1 || result >= 16); - - return (result); -} - -int -kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - int highest_irr; - - /* - * This may race with setting of irr in __apic_accept_irq() and - * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq - * will cause vmexit immediately and the value will be recalculated - * on the next vmentry. - */ - if (!apic) - return (0); - - highest_irr = apic_find_highest_irr(apic); - - return (highest_irr); -} - static void update_cr8_intercept(struct kvm_vcpu *vcpu) { @@ -6209,7 +5950,6 @@ is_protmode(struct kvm_vcpu *vcpu) return (kvm_read_cr0_bits(vcpu, X86_CR0_PE)); } - int kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) { @@ -6649,88 +6389,6 @@ set_msr_mtrr(struct kvm_vcpu *vcpu, uint32_t msr, uint64_t data) return (0); } -static inline int -apic_x2apic_mode(struct kvm_lapic *apic) -{ - return (apic->vcpu->arch.apic_base & X2APIC_ENABLE); -} - -extern int apic_reg_write(struct kvm_lapic *apic, uint32_t reg, uint32_t val); - -int -kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, uint32_t msr, uint64_t data) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - uint32_t reg = (msr - APIC_BASE_MSR) << 4; - - if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) - return (1); - - /* if this is ICR write vector before command */ - if (msr == 0x830) - apic_reg_write(apic, APIC_ICR2, (uint32_t)(data >> 32)); - - return (apic_reg_write(apic, reg, (uint32_t)data)); -} - -extern int apic_reg_read(struct kvm_lapic *apic, - uint32_t offset, int len, void *data); - -int -kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, uint32_t msr, uint64_t *data) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - uint32_t reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; - - if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) - return (1); - - if (apic_reg_read(apic, reg, 4, &low)) - return (1); - - if (msr == 0x830) - apic_reg_read(apic, APIC_ICR2, 4, &high); - - *data = (((uint64_t)high) << 32) | low; - - return (0); -} - -int -kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, uint32_t reg, uint64_t data) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - - if (!irqchip_in_kernel(vcpu->kvm)) - return (1); - - /* if this is ICR write vector before command */ - if (reg == APIC_ICR) - apic_reg_write(apic, APIC_ICR2, (uint32_t)(data >> 32)); - - return (apic_reg_write(apic, reg, (uint32_t)data)); -} - -int -kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, uint32_t reg, uint64_t *data) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - uint32_t low, high = 0; - - if (!irqchip_in_kernel(vcpu->kvm)) - return (1); - - if (apic_reg_read(apic, reg, 4, &low)) - return (1); - - if (reg == APIC_ICR) - apic_reg_read(apic, APIC_ICR2, 4, &high); - - *data = (((uint64_t)high) << 32) | low; - - return (0); -} - int clear_user(void *addr, unsigned long size) { @@ -11103,74 +10761,6 @@ handle_invalid_op(struct kvm_vcpu *vcpu) return (1); } -inline int -apic_find_highest_isr(struct kvm_lapic *apic) -{ - int ret; - - ret = find_highest_vector((void *)((uintptr_t)apic->regs + APIC_ISR)); - ASSERT(ret == -1 || ret >= 16); - - return (ret); -} - -void -apic_update_ppr(struct kvm_lapic *apic) -{ - uint32_t tpr, isrv, ppr; - int isr; - - tpr = apic_get_reg(apic, APIC_TASKPRI); - isr = apic_find_highest_isr(apic); - isrv = (isr != -1) ? isr : 0; - - if ((tpr & 0xf0) >= (isrv & 0xf0)) - ppr = tpr & 0xff; - else - ppr = isrv & 0xf0; - - apic_set_reg(apic, APIC_PROCPRI, ppr); -} - -extern inline int apic_enabled(struct kvm_lapic *apic); - -int -kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - int highest_irr; - - if (!apic || !apic_enabled(apic)) - return (-1); - - apic_update_ppr(apic); - highest_irr = apic_find_highest_irr(apic); - if ((highest_irr == -1) || - ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI))) - return (-1); - - return (highest_irr); -} - -extern inline int apic_hw_enabled(struct kvm_lapic *apic); - -int -kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) -{ - uint32_t lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0); - int r = 0; - - if (kvm_vcpu_is_bsp(vcpu)) { - if (!apic_hw_enabled(vcpu->arch.apic)) - r = 1; - if ((lvt0 & APIC_LVT_MASKED) == 0 && - GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) - r = 1; - } - - return (r); -} - /* * check if there is pending interrupt without intack. */ @@ -11192,34 +10782,6 @@ kvm_cpu_has_interrupt(struct kvm_vcpu *v) return (1); } -extern inline void apic_set_vector(int vec, caddr_t bitmap); -extern inline void apic_clear_vector(int vec, caddr_t bitmap); - -static inline void -apic_clear_irr(int vec, struct kvm_lapic *apic) -{ - apic->irr_pending = 0; - apic_clear_vector(vec, (void *)((uintptr_t)apic->regs + APIC_IRR)); - if (apic_search_irr(apic) != -1) - apic->irr_pending = 1; -} - -int -kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) -{ - int vector = kvm_apic_has_interrupt(vcpu); - struct kvm_lapic *apic = vcpu->arch.apic; - - if (vector == -1) - return (-1); - - apic_set_vector(vector, (void *)((uintptr_t)apic->regs + APIC_ISR)); - apic_update_ppr(apic); - apic_clear_irr(vector, apic); - - return (vector); -} - static int handle_interrupt_window(struct kvm_vcpu *vcpu) { @@ -11429,8 +10991,6 @@ kvm_mmu_unload(struct kvm_vcpu *vcpu) mmu_free_roots(vcpu); } -extern void apic_set_tpr(struct kvm_lapic *apic, uint32_t tpr); - /* * Often times we have pages that correspond to addresses that are in a users * virtual address space. Rather than trying to constantly map them in and out @@ -11445,74 +11005,6 @@ page_address(page_t *page) } void -kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) -{ - uint32_t data; - void *vapic; - - if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr) - return; - - vapic = page_address(vcpu->arch.apic->vapic_page); - - data = *(uint32_t *)((uintptr_t)vapic + - offset_in_page(vcpu->arch.apic->vapic_addr)); -#ifdef XXX - kunmap_atomic(vapic, KM_USER0); -#else - XXX_KVM_PROBE; -#endif - - apic_set_tpr(vcpu->arch.apic, data & 0xff); -} - -void -kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) -{ - uint32_t data, tpr; - int max_irr, max_isr; - struct kvm_lapic *apic; - void *vapic; - - if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr) - return; - - apic = vcpu->arch.apic; - tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; - max_irr = apic_find_highest_irr(apic); - if (max_irr < 0) - max_irr = 0; - max_isr = apic_find_highest_isr(apic); - if (max_isr < 0) - max_isr = 0; - data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); - - vapic = page_address(vcpu->arch.apic->vapic_page); - - *(uint32_t *)((uintptr_t)vapic + - offset_in_page(vcpu->arch.apic->vapic_addr)) = data; -#ifdef XXX - kunmap_atomic(vapic, KM_USER0); -#else - XXX_KVM_PROBE; -#endif -} - -extern inline int apic_sw_enabled(struct kvm_lapic *apic); - -int -kvm_apic_present(struct kvm_vcpu *vcpu) -{ - return (vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic)); -} - -int -kvm_lapic_enabled(struct kvm_vcpu *vcpu) -{ - return (kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic)); -} - -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; @@ -12210,8 +11702,6 @@ vapic_enter(struct kvm_vcpu *vcpu) vcpu->arch.apic->vapic_page = page; } -extern int kvm_apic_id(struct kvm_lapic *apic); - static void vapic_exit(struct kvm_vcpu *vcpu) { @@ -12234,71 +11724,6 @@ vapic_exit(struct kvm_vcpu *vcpu) #endif } -void -kvm_lapic_reset(struct kvm_vcpu *vcpu) -{ - struct kvm_lapic *apic; - int i; - - ASSERT(vcpu); - apic = vcpu->arch.apic; - ASSERT(apic != NULL); - -#ifdef XXX - /* Stop the timer in case it's a reset to an active apic */ - hrtimer_cancel(&apic->lapic_timer.timer); -#else - mutex_enter(&cpu_lock); - if (apic->lapic_timer.active) { - cyclic_remove(apic->lapic_timer.kvm_cyclic_id); - apic->lapic_timer.active = 0; - } - mutex_exit(&cpu_lock); - XXX_KVM_PROBE; -#endif - - apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24); - kvm_apic_set_version(apic->vcpu); - - for (i = 0; i < APIC_LVT_NUM; i++) - apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); - - apic_set_reg(apic, APIC_LVT0, - SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); - - apic_set_reg(apic, APIC_DFR, 0xffffffffU); - apic_set_reg(apic, APIC_SPIV, 0xff); - apic_set_reg(apic, APIC_TASKPRI, 0); - apic_set_reg(apic, APIC_LDR, 0); - apic_set_reg(apic, APIC_ESR, 0); - apic_set_reg(apic, APIC_ICR, 0); - apic_set_reg(apic, APIC_ICR2, 0); - apic_set_reg(apic, APIC_TDCR, 0); - apic_set_reg(apic, APIC_TMICT, 0); - for (i = 0; i < 8; i++) { - apic_set_reg(apic, APIC_IRR + 0x10 * i, 0); - apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); - apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); - } - apic->irr_pending = 0; - update_divide_count(apic); -#ifdef XXX - atomic_set(&apic->lapic_timer.pending, 0); -#else - apic->lapic_timer.pending = 0; - XXX_KVM_PROBE; -#endif - if (kvm_vcpu_is_bsp(vcpu)) - vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; - apic_update_ppr(apic); - - vcpu->arch.apic_arb_prio = 0; - - cmn_err(CE_NOTE, "%s: vcpu=%p, id=%d, base_msr= %lx PRIx64 " - "base_address=%lx\n", __func__, vcpu, kvm_apic_id(apic), - vcpu->arch.apic_base, apic->base_address); -} - static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) { @@ -12969,8 +12394,6 @@ ioapic_deliver(struct kvm_ioapic *ioapic, int irq) return (kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe)); } -extern int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); - static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) { @@ -13717,28 +13140,6 @@ kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, mutex_exit(&kvm->irq_lock); } -void -kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - - apic->base_address = vcpu->arch.apic_base & - MSR_IA32_APICBASE_BASE; - kvm_apic_set_version(vcpu); - - apic_update_ppr(apic); - - mutex_enter(&cpu_lock); - if (apic->lapic_timer.active) - cyclic_remove(apic->lapic_timer.kvm_cyclic_id); - apic->lapic_timer.active = 0; - mutex_exit(&cpu_lock); - - update_divide_count(apic); - start_apic_timer(apic); - apic->irr_pending = 1; -} - static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { @@ -13918,17 +13319,6 @@ kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) } static int -kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, struct kvm_vapic_addr *addr) -{ - if (!irqchip_in_kernel(vcpu->kvm)) - return (EINVAL); - - vcpu->arch.apic->vapic_addr = addr->vapic_addr; - - return (0); -} - -static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, uint64_t *mcg_capp) { int rval; @@ -1866,6 +1866,130 @@ typedef struct kvm_irq_mask_notifier { struct list_node link; } kvm_irq_mask_notifier_t; +/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ +#define X86_FEATURE_FPU (0 * 32 + 0) /* Onboard FPU */ +#define X86_FEATURE_VME (0 * 32 + 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE (0 * 32 + 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE (0 * 32 + 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC (0 * 32 + 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR (0 * 32 + 5) /* Model-Specific Registers */ +#define X86_FEATURE_PAE (0 * 32 + 6) /* Phys. Address Extensions */ +#define X86_FEATURE_MCE (0 * 32 + 7) /* Machine Check Exception */ +#define X86_FEATURE_CX8 (0 * 32 + 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC (0 * 32 + 9) /* Onboard APIC */ +#define X86_FEATURE_SEP (0 * 32 + 11) /* SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR (0 * 32 + 12) /* Memory Type Range Regs. */ +#define X86_FEATURE_PGE (0 * 32 + 13) /* Page Global Enable */ +#define X86_FEATURE_MCA (0 * 32 + 14) /* Machine Check Architecture */ +#define X86_FEATURE_CMOV (0 * 32 + 15) /* CMOV instructions */ + /* (+ FCMOVcc, FCOMI w/ FPU) */ +#define X86_FEATURE_PAT (0 * 32 + 16) /* Page Attribute Table */ +#define X86_FEATURE_PSE36 (0 * 32 + 17) /* 36-bit PSEs */ +#define X86_FEATURE_PN (0 * 32 + 18) /* Processor serial number */ +#define X86_FEATURE_CLFLSH (0 * 32 + 19) /* "clflush" instruction */ +#define X86_FEATURE_DS (0 * 32 + 21) /* "dts" Debug Store */ +#define X86_FEATURE_ACPI (0 * 32 + 22) /* ACPI via MSR */ +#define X86_FEATURE_MMX (0 * 32 + 23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR (0 * 32 + 24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_XMM (0 * 32 + 25) /* "sse" */ +#define X86_FEATURE_XMM2 (0 * 32 + 26) /* "sse2" */ +#define X86_FEATURE_SELFSNOOP (0 * 32 + 27) /* "ss" CPU self snoop */ +#define X86_FEATURE_HT (0 * 32 + 28) /* Hyper-Threading */ +#define X86_FEATURE_ACC (0 * 32 + 29) /* "tm" Auto. clock control */ +#define X86_FEATURE_IA64 (0 * 32 + 30) /* IA-64 processor */ +#define X86_FEATURE_PBE (0 * 32 + 31) /* Pending Break Enable */ + +/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ +/* Don't duplicate feature flags which are redundant with Intel! */ +#define X86_FEATURE_SYSCALL (1 * 32 + 11) /* SYSCALL/SYSRET */ +#define X86_FEATURE_MP (1 * 32 + 19) /* MP Capable. */ +#define X86_FEATURE_NX (1 * 32 + 20) /* Execute Disable */ +#define X86_FEATURE_MMXEXT (1 * 32 + 22) /* AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT (1 * 32 + 25) /* FXSAVE/FXRSTOR optimiztns */ +#define X86_FEATURE_GBPAGES (1 * 32 + 26) /* "pdpe1gb" GB pages */ +#define X86_FEATURE_RDTSCP (1 * 32 + 27) /* RDTSCP */ +#define X86_FEATURE_LM (1 * 32 + 29) /* Long Mode (x86-64) */ +#define X86_FEATURE_3DNOWEXT (1 * 32 + 30) /* AMD 3DNow! extensions */ +#define X86_FEATURE_3DNOW (1 * 32 + 31) /* 3DNow! */ + +/* cpu types for specific tunings: */ +#define X86_FEATURE_K8 (3 * 32 + 4) /* "" Opteron, Athlon64 */ +#define X86_FEATURE_K7 (3 * 32 + 5) /* "" Athlon */ +#define X86_FEATURE_P3 (3 * 32 + 6) /* "" P3 */ +#define X86_FEATURE_P4 (3 * 32 + 7) /* "" P4 */ +#define X86_FEATURE_CONSTANT_TSC (3 * 32 + 8) /* TSC ticks at constant rate */ +#define X86_FEATURE_UP (3 * 32 + 9) /* smp kernel running on up */ +#define X86_FEATURE_FXSAVE_LEAK (3 * 32 + 10) /* FXSAVE leaks FOP/FIP/FOP */ +#define X86_FEATURE_ARCH_PERFMON (3 * 32 + 11) /* Intel Arch. PerfMon */ +#define X86_FEATURE_PEBS (3 * 32 + 12) /* Precise-Event Based Smplng */ +#define X86_FEATURE_BTS (3 * 32 + 13) /* Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 (3 * 32 + 14) /* syscall in ia32 userspace */ +#define X86_FEATURE_SYSENTER32 (3 * 32 + 15) /* sysenter in ia32 userspace */ +#define X86_FEATURE_REP_GOOD (3 * 32 + 16) /* rep microcode works well */ +#define X86_FEATURE_MFENCE_RDTSC (3 * 32 + 17) /* Mfence synchronizes RDTSC */ +#define X86_FEATURE_LFENCE_RDTSC (3 * 32 + 18) /* Lfence synchronizes RDTSC */ +#define X86_FEATURE_11AP (3 * 32 + 19) /* Bad local APIC aka 11AP */ +#define X86_FEATURE_NOPL (3 * 32 + 20) /* NOPL (0F 1F) instructions */ +#define X86_FEATURE_AMDC1E (3 * 32 + 21) /* AMD C1E detected */ +#define X86_FEATURE_XTOPOLOGY (3 * 32 + 22) /* topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE (3 * 32 + 23) /* TSC is reliable */ +#define X86_FEATURE_NONSTOP_TSC (3 * 32 + 24) /* TSC continues in C states */ +#define X86_FEATURE_CLFLUSH_MONITOR (3 * 32 + 25) /* clflush reqd w/ monitor */ +#define X86_FEATURE_EXTD_APICID (3 * 32 + 26) /* extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM (3 * 32 + 27) /* multi-node processor */ +#define X86_FEATURE_APERFMPERF (3 * 32 + 28) /* APERFMPERF */ + +/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ +#define X86_FEATURE_XMM3 (4 * 32 + 0) /* "pni" SSE-3 */ +#define X86_FEATURE_PCLMULQDQ (4 * 32 + 1) /* PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 (4 * 32 + 2) /* 64-bit Debug Store */ +#define X86_FEATURE_MWAIT (4 * 32 + 3) /* "monitor" Monitor/Mwait */ +#define X86_FEATURE_DSCPL (4 * 32 + 4) /* ds_cpl CPL Qual Debug Str */ +#define X86_FEATURE_VMX (4 * 32 + 5) /* Hardware virtualization */ +#define X86_FEATURE_SMX (4 * 32 + 6) /* Safer mode */ +#define X86_FEATURE_EST (4 * 32 + 7) /* Enhanced SpeedStep */ +#define X86_FEATURE_TM2 (4 * 32 + 8) /* Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 (4 * 32 + 9) /* Supplemental SSE-3 */ +#define X86_FEATURE_CID (4 * 32 + 10) /* Context ID */ +#define X86_FEATURE_FMA (4 * 32 + 12) /* Fused multiply-add */ +#define X86_FEATURE_CX16 (4 * 32 + 13) /* CMPXCHG16B */ +#define X86_FEATURE_XTPR (4 * 32 + 14) /* Send Task Priority Msgs */ +#define X86_FEATURE_PDCM (4 * 32 + 15) /* Performance Capabilities */ +#define X86_FEATURE_DCA (4 * 32 + 18) /* Direct Cache Access */ +#define X86_FEATURE_XMM4_1 (4 * 32 + 19) /* "sse4_1" SSE-4.1 */ +#define X86_FEATURE_XMM4_2 (4 * 32 + 20) /* "sse4_2" SSE-4.2 */ +#define X86_FEATURE_X2APIC (4 * 32 + 21) /* x2APIC */ +#define X86_FEATURE_MOVBE (4 * 32 + 22) /* MOVBE instruction */ +#define X86_FEATURE_POPCNT (4 * 32 + 23) /* POPCNT instruction */ +#define X86_FEATURE_AES (4 * 32 + 25) /* AES instructions */ +#define X86_FEATURE_XSAVE (4 * 32 + 26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ +#define X86_FEATURE_OSXSAVE (4 * 32 + 27) /* "" XSAVE enabled in the OS */ +#define X86_FEATURE_AVX (4 * 32 + 28) /* Advanced Vector Extensions */ +#define X86_FEATURE_HYPERVISOR (4 * 32 + 31) /* Running on a hypervisor */ + +/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ +#define X86_FEATURE_LAHF_LM (6 * 32 + 0) /* LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY (6 * 32 + 1) /* HyperThreading invalid */ +#define X86_FEATURE_SVM (6 * 32 + 2) /* Secure virtual machine */ +#define X86_FEATURE_EXTAPIC (6 * 32 + 3) /* Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY (6 * 32 + 4) /* CR8 in 32-bit mode */ +#define X86_FEATURE_ABM (6 * 32 + 5) /* Advanced bit manipulation */ +#define X86_FEATURE_SSE4A (6 * 32 + 6) /* SSE-4A */ +#define X86_FEATURE_MISALIGNSSE (6 * 32 + 7) /* Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH (6 * 32 + 8) /* 3DNow prefetch */ +#define X86_FEATURE_OSVW (6 * 32 + 9) /* OS Visible Workaround */ +#define X86_FEATURE_IBS (6 * 32 + 10) /* Instruction Based Sampling */ +#define X86_FEATURE_SSE5 (6 * 32 + 11) /* SSE-5 */ +#define X86_FEATURE_SKINIT (6 * 32 + 12) /* SKINIT/STGI instructions */ +#define X86_FEATURE_WDT (6 * 32 + 13) /* Watchdog timer */ +#define X86_FEATURE_NODEID_MSR (6 * 32 + 19) /* NodeId MSR */ + +/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ +#define X86_FEATURE_RECOVERY (2 * 32 + 0) /* CPU in recovery mode */ +#define X86_FEATURE_LONGRUN (2 * 32 + 1) /* Longrun power control */ +#define X86_FEATURE_LRTI (2 * 32 + 3) /* LongRun table interface */ + + #ifdef __KVM_HAVE_IOAPIC void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, union kvm_ioapic_redirect_entry *entry, @@ -2039,7 +2163,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_io_device *dev); -extern unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot); +unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot); int kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, @@ -2183,6 +2307,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, struct kvm_irq_mask_notifier *kimn); void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, int mask); +extern int irqchip_in_kernel(struct kvm *kvm); +extern void kvm_inject_nmi(struct kvm_vcpu *); + int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); int kvm_iommu_map_guest(struct kvm *kvm); int kvm_iommu_unmap_guest(struct kvm *kvm); @@ -2191,6 +2318,12 @@ int kvm_assign_device(struct kvm *kvm, int kvm_deassign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev); +extern unsigned long kvm_rip_read(struct kvm_vcpu *); +extern int kvm_vcpu_is_bsp(struct kvm_vcpu *); + +extern struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, + uint32_t function, uint32_t index); + #define for_each_unsync_children(bitmap, idx) \ for (idx = bt_getlowbit(bitmap, 0, 512); \ idx < 512; \ @@ -2200,5 +2333,7 @@ int kvm_deassign_device(struct kvm *kvm, #define BITS_PER_LONG (sizeof (unsigned long) * 8) +#define MSR_EFER 0xc0000080 /* extended feature register */ + #endif #endif /*SOLARIS_KVM_H*/ diff --git a/kvm_lapic.c b/kvm_lapic.c index 54db1e6..545b5cb 100644 --- a/kvm_lapic.c +++ b/kvm_lapic.c @@ -31,20 +31,1027 @@ #include "iodev.h" #include "kvm.h" #include "apicdef.h" +#include "ioapic.h" #include "irq.h" -extern uint32_t apic_get_reg(struct kvm_lapic *, int); -extern int apic_enabled(struct kvm_lapic *); -extern int apic_hw_enabled(struct kvm_lapic *); -extern int __apic_accept_irq(struct kvm_lapic *, int, int, int, int); +int __apic_accept_irq(struct kvm_lapic *, int, int, int, int); extern caddr_t page_address(page_t *); +#define LVT_MASK \ + (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) + +#define LINT_MASK \ + (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ + APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) + +#define VEC_POS(v) ((v) & (32 - 1)) +#define REG_POS(v) (((v) >> 5) << 4) + + +inline uint32_t +apic_get_reg(struct kvm_lapic *apic, int reg_off) +{ + return (*((uint32_t *)((uintptr_t)apic->regs + reg_off))); +} + +void +apic_set_reg(struct kvm_lapic *apic, int reg_off, uint32_t val) +{ + *((uint32_t *)((uintptr_t)apic->regs + reg_off)) = val; +} + +static inline int +apic_test_and_set_vector(int vec, caddr_t bitmap) +{ + return (test_and_set_bit(VEC_POS(vec), (unsigned long *)(bitmap + + REG_POS(vec)))); +} + +static inline int +apic_test_and_clear_vector(int vec, caddr_t bitmap) +{ + return (test_and_clear_bit(VEC_POS(vec), + (unsigned long *)(bitmap + REG_POS(vec)))); +} + +inline void +apic_set_vector(int vec, caddr_t bitmap) +{ + set_bit(VEC_POS(vec), (unsigned long *)(bitmap + REG_POS(vec))); +} + +inline void +apic_clear_vector(int vec, caddr_t bitmap) +{ + clear_bit(VEC_POS(vec), (unsigned long *)(bitmap + REG_POS(vec))); +} + +inline int +apic_hw_enabled(struct kvm_lapic *apic) +{ + return ((apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE); +} + +inline int +apic_sw_enabled(struct kvm_lapic *apic) +{ + return (apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED); +} + +inline int +apic_enabled(struct kvm_lapic *apic) +{ + return (apic_sw_enabled(apic) && apic_hw_enabled(apic)); +} + +int +kvm_apic_id(struct kvm_lapic *apic) +{ + return ((apic_get_reg(apic, APIC_ID) >> 24) & 0xff); +} + static int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) { return (!(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED)); } +static int +apic_lvtt_period(struct kvm_lapic *apic) +{ + return (apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC); +} + +static inline int +apic_lvt_nmi_mode(uint32_t lvt_val) +{ + return ((lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI); +} + +void +kvm_apic_set_version(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + struct kvm_cpuid_entry2 *feat; + uint32_t v = APIC_VERSION; + + if (!irqchip_in_kernel(vcpu->kvm)) + return; + + feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); + if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) + v |= APIC_LVR_DIRECTED_EOI; + apic_set_reg(apic, APIC_LVR, v); +} + +static inline int +apic_x2apic_mode(struct kvm_lapic *apic) +{ + return (apic->vcpu->arch.apic_base & X2APIC_ENABLE); +} + +static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { + LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ + LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ + LVT_MASK | APIC_MODE_MASK, /* LVTPC */ + LINT_MASK, LINT_MASK, /* LVT0-1 */ + LVT_MASK /* LVTERR */ +}; + +static int +fls(int x) +{ + int r = 32; + + if (!x) + return (0); + + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + + return (r); +} + +static int +find_highest_vector(void *bitmap) +{ + uint32_t *word = bitmap; + int word_offset = MAX_APIC_VECTOR >> 5; + + while ((word_offset != 0) && (word[(--word_offset) << 2] == 0)) + continue; + + if (!word_offset && !word[0]) + return (-1); + else + return (fls(word[word_offset << 2]) - 1 + (word_offset << 5)); +} + +static inline int +apic_test_and_set_irr(int vec, struct kvm_lapic *apic) +{ + apic->irr_pending = 1; + return (apic_test_and_set_vector(vec, (void *)((uintptr_t)apic->regs + + APIC_IRR))); +} + +static inline int +apic_search_irr(struct kvm_lapic *apic) +{ + return (find_highest_vector((void *)((uintptr_t)apic->regs + + APIC_IRR))); +} + +static inline int +apic_find_highest_irr(struct kvm_lapic *apic) +{ + int result; + + if (!apic->irr_pending) + return (-1); + + result = apic_search_irr(apic); + ASSERT(result == -1 || result >= 16); + + return (result); +} + +static inline void +apic_clear_irr(int vec, struct kvm_lapic *apic) +{ + apic->irr_pending = 0; + apic_clear_vector(vec, (void *)((uintptr_t)apic->regs + APIC_IRR)); + if (apic_search_irr(apic) != -1) + apic->irr_pending = 1; +} + +int +kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + int highest_irr; + + /* + * This may race with setting of irr in __apic_accept_irq() and + * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq + * will cause vmexit immediately and the value will be recalculated + * on the next vmentry. + */ + if (!apic) + return (0); + + highest_irr = apic_find_highest_irr(apic); + + return (highest_irr); +} + +int +kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + return (__apic_accept_irq(apic, irq->delivery_mode, irq->vector, + irq->level, irq->trig_mode)); +} + +inline int +apic_find_highest_isr(struct kvm_lapic *apic) +{ + int ret; + + ret = find_highest_vector((void *)((uintptr_t)apic->regs + APIC_ISR)); + ASSERT(ret == -1 || ret >= 16); + + return (ret); +} + +void +apic_update_ppr(struct kvm_lapic *apic) +{ + uint32_t tpr, isrv, ppr; + int isr; + + tpr = apic_get_reg(apic, APIC_TASKPRI); + isr = apic_find_highest_isr(apic); + isrv = (isr != -1) ? isr : 0; + + if ((tpr & 0xf0) >= (isrv & 0xf0)) + ppr = tpr & 0xff; + else + ppr = isrv & 0xf0; + + apic_set_reg(apic, APIC_PROCPRI, ppr); +} + +void +apic_set_tpr(struct kvm_lapic *apic, uint32_t tpr) +{ + apic_set_reg(apic, APIC_TASKPRI, tpr); + apic_update_ppr(apic); +} + +int +kvm_apic_match_physical_addr(struct kvm_lapic *apic, uint16_t dest) +{ + return (dest == 0xff || kvm_apic_id(apic) == dest); +} + +int +kvm_apic_match_logical_addr(struct kvm_lapic *apic, uint8_t mda) +{ + int result = 0; + uint32_t logical_id; + + if (apic_x2apic_mode(apic)) { + logical_id = apic_get_reg(apic, APIC_LDR); + return (logical_id & mda); + } + + logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR)); + + switch (apic_get_reg(apic, APIC_DFR)) { + case APIC_DFR_FLAT: + if (logical_id & mda) + result = 1; + break; + case APIC_DFR_CLUSTER: + if (((logical_id >> 4) == (mda >> 0x4)) && + (logical_id & mda & 0xf)) + result = 1; + break; + default: + cmn_err(CE_WARN, "Bad DFR vcpu %d: %08x\n", + apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR)); + break; + } + + return (result); +} + +int +kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, + int short_hand, int dest, int dest_mode) +{ + int result = 0; + struct kvm_lapic *target = vcpu->arch.apic; + + /* XXX - debug */ + cmn_err(CE_NOTE, "target %p, source %p, dest 0x%x, dest_mode 0x%x, " + "short_hand 0x%x\n", target, source, dest, dest_mode, short_hand); + + ASSERT(target != NULL); + switch (short_hand) { + case APIC_DEST_NOSHORT: + if (dest_mode == 0) + /* Physical mode. */ + result = kvm_apic_match_physical_addr(target, dest); + else + /* Logical mode. */ + result = kvm_apic_match_logical_addr(target, dest); + break; + case APIC_DEST_SELF: + result = (target == source); + break; + case APIC_DEST_ALLINC: + result = 1; + break; + case APIC_DEST_ALLBUT: + result = (target != source); + break; + default: + cmn_err(CE_WARN, "Bad dest shorthand value %x\n", short_hand); + break; + } + + return (result); +} + +/* + * Add a pending IRQ into lapic. + * Return 1 if successfully added and 0 if discarded. + */ +int +__apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, + int vector, int level, int trig_mode) +{ + int result = 0; + struct kvm_vcpu *vcpu = apic->vcpu; + + switch (delivery_mode) { + case APIC_DM_LOWEST: + vcpu->arch.apic_arb_prio++; + case APIC_DM_FIXED: + /* FIXME add logic for vcpu on reset */ + if (!apic_enabled(apic)) + break; + + if (trig_mode) { + apic_set_vector(vector, (void *)((uintptr_t)apic->regs + + APIC_TMR)); + } else + apic_clear_vector(vector, + (void *)((uintptr_t)apic->regs + APIC_TMR)); + + result = !apic_test_and_set_irr(vector, apic); + if (!result) { + break; + } + + kvm_vcpu_kick(vcpu); + break; + + case APIC_DM_REMRD: + break; + + case APIC_DM_SMI: + break; + + case APIC_DM_NMI: + result = 1; + kvm_inject_nmi(vcpu); + kvm_vcpu_kick(vcpu); + break; + + case APIC_DM_INIT: + if (level) { + result = 1; + vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; + kvm_vcpu_kick(vcpu); + } + break; + + case APIC_DM_STARTUP: + if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + result = 1; + vcpu->arch.sipi_vector = vector; + vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; + kvm_vcpu_kick(vcpu); + } + break; + + case APIC_DM_EXTINT: + /* + * Should only be called by kvm_apic_local_deliver() with LVT0, + * before NMI watchdog was enabled. Already handled by + * kvm_apic_accept_pic_intr(). + */ + break; + + default: + break; + } + + return (result); +} + +int +kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) +{ + return (vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio); +} + +static void +apic_set_eoi(struct kvm_lapic *apic) +{ + int vector = apic_find_highest_isr(apic); + int trigger_mode; + /* + * Not every write EOI will has corresponding ISR, + * one example is when Kernel check timer on setup_IO_APIC + */ + if (vector == -1) + return; + + apic_clear_vector(vector, (void *)((uintptr_t)apic->regs + APIC_ISR)); + apic_update_ppr(apic); + + if (apic_test_and_clear_vector(vector, (void *)((uintptr_t)apic->regs + + APIC_TMR))) + trigger_mode = IOAPIC_LEVEL_TRIG; + else + trigger_mode = IOAPIC_EDGE_TRIG; + if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) + kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); +} + +static void +apic_send_ipi(struct kvm_lapic *apic) +{ + uint32_t icr_low = apic_get_reg(apic, APIC_ICR); + uint32_t icr_high = apic_get_reg(apic, APIC_ICR2); + struct kvm_lapic_irq irq; + + irq.vector = icr_low & APIC_VECTOR_MASK; + irq.delivery_mode = icr_low & APIC_MODE_MASK; + irq.dest_mode = icr_low & APIC_DEST_MASK; + irq.level = icr_low & APIC_INT_ASSERT; + irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; + irq.shorthand = icr_low & APIC_SHORT_MASK; + if (apic_x2apic_mode(apic)) + irq.dest_id = icr_high; + else + irq.dest_id = GET_APIC_DEST_FIELD(icr_high); + +#ifdef XXX_KVM_TRACE + trace_kvm_apic_ipi(icr_low, irq.dest_id); +#endif + + kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); +} + +static uint32_t +apic_get_tmcct(struct kvm_lapic *apic) +{ + hrtime_t now, remaining, elapsed; + uint32_t tmcct; + + VERIFY(apic != NULL); + + /* if initial count is 0, current count should also be 0 */ + if (apic_get_reg(apic, APIC_TMICT) == 0) + return (0); + + now = gethrtime(); + elapsed = now - apic->lapic_timer.start - + apic->lapic_timer.period * apic->lapic_timer.intervals; + remaining = apic->lapic_timer.period - elapsed; + + if (remaining < 0) + remaining = 0; + + remaining = remaining % apic->lapic_timer.period; + tmcct = remaining / (APIC_BUS_CYCLE_NS * apic->divide_count); + + return (tmcct); +} + +static void +__report_tpr_access(struct kvm_lapic *apic, int write) +{ + struct kvm_vcpu *vcpu = apic->vcpu; + struct kvm_run *run = vcpu->run; + + set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests); + run->tpr_access.rip = kvm_rip_read(vcpu); + run->tpr_access.is_write = write; +} + +static inline void +report_tpr_access(struct kvm_lapic *apic, int write) +{ + if (apic->vcpu->arch.tpr_access_reporting) + __report_tpr_access(apic, write); +} + +static uint32_t +__apic_read(struct kvm_lapic *apic, unsigned int offset) +{ + uint32_t val = 0; + + if (offset >= LAPIC_MMIO_LENGTH) + return (0); + + switch (offset) { + case APIC_ID: + if (apic_x2apic_mode(apic)) + val = kvm_apic_id(apic); + else + val = kvm_apic_id(apic) << 24; + break; + case APIC_ARBPRI: + cmn_err(CE_WARN, "Access APIC ARBPRI register " + "which is for P6\n"); + break; + + case APIC_TMCCT: /* Timer CCR */ + val = apic_get_tmcct(apic); + break; + + case APIC_TASKPRI: + report_tpr_access(apic, 0); + /* fall thru */ + default: + apic_update_ppr(apic); + val = apic_get_reg(apic, offset); + break; + } + + return (val); +} + +static inline struct kvm_lapic * +to_lapic(struct kvm_io_device *dev) +{ +#ifdef XXX + return (container_of(dev, struct kvm_lapic, dev)); +#else + XXX_KVM_PROBE; + return (dev->lapic); +#endif +} + +int +apic_reg_read(struct kvm_lapic *apic, uint32_t offset, int len, void *data) +{ + unsigned char alignment = offset & 0xf; + uint32_t result; + /* this bitmask has a bit cleared for each reserver register */ + static const uint64_t rmask = 0x43ff01ffffffe70cULL; + + if ((alignment + len) > 4) { + return (1); + } + + if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) { + return (1); + } + + result = __apic_read(apic, offset & ~0xf); +#ifdef XXX_KVM_TRACE + trace_kvm_apic_read(offset, result); +#endif + + switch (len) { + case 1: + case 2: + case 4: + memcpy(data, (char *)&result + alignment, len); + break; + default: + cmn_err(CE_WARN, "Local APIC read with len = %x, " + "should be 1,2, or 4 instead\n", len); + break; + } + + return (0); +} + +static int +apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) +{ + return (apic_hw_enabled(apic) && + addr >= apic->base_address && + addr < apic->base_address + LAPIC_MMIO_LENGTH); +} + +static int +apic_mmio_read(struct kvm_io_device *this, gpa_t address, int len, void *data) +{ + struct kvm_lapic *apic = to_lapic(this); + uint32_t offset = address - apic->base_address; + + if (!apic_mmio_in_range(apic, address)) + return (-EOPNOTSUPP); + + apic_reg_read(apic, offset, len, data); + + return (0); +} + +void +update_divide_count(struct kvm_lapic *apic) +{ + uint32_t tmp1, tmp2, tdcr; + + tdcr = apic_get_reg(apic, APIC_TDCR); + tmp1 = tdcr & 0xf; + tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; + apic->divide_count = 0x1 << (tmp2 & 0x7); +} + +void +start_apic_timer(struct kvm_lapic *apic) +{ + hrtime_t now = gethrtime(); + + apic->lapic_timer.period = (uint64_t)apic_get_reg(apic, APIC_TMICT) * + APIC_BUS_CYCLE_NS * apic->divide_count; + + if (!apic->lapic_timer.period) + return; + + mutex_enter(&cpu_lock); + + apic->lapic_timer.start = gethrtime(); + + /* + * Do not allow the guest to program periodic timers with small + * interval, since the hrtimers are not throttled by the host + * scheduler. + * + * If it is a one shot, we want to program it differently. + */ + if (apic_lvtt_period(apic)) { + if (apic->lapic_timer.period < NSEC_PER_MSEC / 2) + apic->lapic_timer.period = NSEC_PER_MSEC / 2; + apic->lapic_timer.kvm_cyc_when.cyt_when = 0; + apic->lapic_timer.kvm_cyc_when.cyt_interval = + apic->lapic_timer.period; + } else { + apic->lapic_timer.kvm_cyc_when.cyt_when = + apic->lapic_timer.start + apic->lapic_timer.period; + apic->lapic_timer.kvm_cyc_when.cyt_interval = CY_INFINITY; + } + + apic->lapic_timer.kvm_cyclic_id = + cyclic_add(&apic->lapic_timer.kvm_cyc_handler, + &apic->lapic_timer.kvm_cyc_when); + apic->lapic_timer.active = 1; + apic->lapic_timer.intervals = 0; + mutex_exit(&cpu_lock); +} + +static void +apic_manage_nmi_watchdog(struct kvm_lapic *apic, uint32_t lvt0_val) +{ + int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0)); + + if (apic_lvt_nmi_mode(lvt0_val)) { + if (!nmi_wd_enabled) { + /* XXX - debug */ + cmn_err(CE_NOTE, "Receive NMI setting on APIC_LVT0 " + "for cpu %d\n", apic->vcpu->vcpu_id); + apic->vcpu->kvm->arch.vapics_in_nmi_mode++; + } + } else if (nmi_wd_enabled) + apic->vcpu->kvm->arch.vapics_in_nmi_mode--; +} + +int +apic_reg_write(struct kvm_lapic *apic, uint32_t reg, uint32_t val) +{ + int ret = 0; + +#ifdef XXX_KVM_TRACE + trace_kvm_apic_write(reg, val); +#endif + + switch (reg) { + case APIC_ID: /* Local APIC ID */ + if (!apic_x2apic_mode(apic)) + apic_set_reg(apic, APIC_ID, val); + else + ret = 1; + break; + + case APIC_TASKPRI: + report_tpr_access(apic, 1); + apic_set_tpr(apic, val & 0xff); + break; + + case APIC_EOI: + apic_set_eoi(apic); + break; + + case APIC_LDR: + if (!apic_x2apic_mode(apic)) + apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK); + else + ret = 1; + break; + + case APIC_DFR: + if (!apic_x2apic_mode(apic)) + apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); + else + ret = 1; + break; + + case APIC_SPIV: { + uint32_t mask = 0x3ff; + if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) + mask |= APIC_SPIV_DIRECTED_EOI; + apic_set_reg(apic, APIC_SPIV, val & mask); + if (!(val & APIC_SPIV_APIC_ENABLED)) { + int i; + uint32_t lvt_val; + + for (i = 0; i < APIC_LVT_NUM; i++) { + lvt_val = apic_get_reg(apic, + APIC_LVTT + 0x10 * i); + apic_set_reg(apic, APIC_LVTT + 0x10 * i, + lvt_val | APIC_LVT_MASKED); + } + /* XXX pending needs protection? */ + apic->lapic_timer.pending = 0; + } + break; + } + case APIC_ICR: + /* No delay here, so we always clear the pending bit */ + apic_set_reg(apic, APIC_ICR, val & ~(1 << 12)); + apic_send_ipi(apic); + break; + + case APIC_ICR2: + if (!apic_x2apic_mode(apic)) + val &= 0xff000000; + apic_set_reg(apic, APIC_ICR2, val); + break; + + case APIC_LVT0: + apic_manage_nmi_watchdog(apic, val); + case APIC_LVTT: + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVT1: + case APIC_LVTERR: + /* TODO: Check vector */ + if (!apic_sw_enabled(apic)) + val |= APIC_LVT_MASKED; + + val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; + apic_set_reg(apic, reg, val); + + break; + + case APIC_TMICT: + mutex_enter(&cpu_lock); + if (apic->lapic_timer.active) { + cyclic_remove(apic->lapic_timer.kvm_cyclic_id); + apic->lapic_timer.active = 0; + } + mutex_exit(&cpu_lock); + + apic_set_reg(apic, APIC_TMICT, val); + start_apic_timer(apic); + break; + + case APIC_TDCR: + if (val & 4) + cmn_err(CE_WARN, "KVM_WRITE:TDCR %x\n", val); + apic_set_reg(apic, APIC_TDCR, val); + update_divide_count(apic); + break; + + case APIC_ESR: + if (apic_x2apic_mode(apic) && val != 0) { + cmn_err(CE_WARN, "KVM_WRITE:ESR not zero %x\n", val); + ret = 1; + } + break; + + case APIC_SELF_IPI: + if (apic_x2apic_mode(apic)) { + apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff)); + } else + ret = 1; + break; + default: + ret = 1; + break; + } + + return (ret); +} + +static int +apic_mmio_write(struct kvm_io_device *this, + gpa_t address, int len, const void *data) +{ + struct kvm_lapic *apic = to_lapic(this); + unsigned int offset = address - apic->base_address; + uint32_t val; + + if (!apic_mmio_in_range(apic, address)) + return (-EOPNOTSUPP); + + /* + * APIC register must be aligned on 128-bits boundary. + * 32/64/128 bits registers must be accessed thru 32 bits. + * Refer SDM 8.4.1 + */ + if (len != 4 || (offset & 0xf)) { + /* Don't shout loud, $infamous_os would cause only noise. */ + return (0); + } + + val = *(uint32_t *)data; + + apic_reg_write(apic, offset & 0xff0, val); + + return (0); +} + +void +kvm_free_lapic(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + if (apic == NULL) + return; + + mutex_enter(&cpu_lock); + if (apic->lapic_timer.active) + cyclic_remove(apic->lapic_timer.kvm_cyclic_id); + mutex_exit(&cpu_lock); + + if (apic->regs) + kmem_free(apic->regs, PAGESIZE); + + kmem_free(vcpu->arch.apic, sizeof (struct kvm_lapic)); +} + +/* + * Local APIC interface. + */ +void +kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + if (!apic) + return; + + apic_set_tpr(apic, ((cr8 & 0x0f) << 4) | + (apic_get_reg(apic, APIC_TASKPRI) & 4)); +} + +uint64_t +kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + uint64_t tpr; + + if (apic == NULL) + return (0); + + tpr = (uint64_t)apic_get_reg(apic, APIC_TASKPRI); + + return ((tpr & 0xf0) >> 4); +} + +void +kvm_lapic_set_base(struct kvm_vcpu *vcpu, uint64_t value) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + if (!apic) { + value |= MSR_IA32_APICBASE_BSP; + vcpu->arch.apic_base = value; + return; + } + + if (!kvm_vcpu_is_bsp(apic->vcpu)) + value &= ~MSR_IA32_APICBASE_BSP; + + vcpu->arch.apic_base = value; + if (apic_x2apic_mode(apic)) { + uint32_t id = kvm_apic_id(apic); + uint32_t ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf)); + apic_set_reg(apic, APIC_LDR, ldr); + } + + apic->base_address = apic->vcpu->arch.apic_base & + MSR_IA32_APICBASE_BASE; +} + +void +kvm_lapic_reset(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic; + int i; + + ASSERT(vcpu); + apic = vcpu->arch.apic; + ASSERT(apic != NULL); + +#ifdef XXX + /* Stop the timer in case it's a reset to an active apic */ + hrtimer_cancel(&apic->lapic_timer.timer); +#else + mutex_enter(&cpu_lock); + if (apic->lapic_timer.active) { + cyclic_remove(apic->lapic_timer.kvm_cyclic_id); + apic->lapic_timer.active = 0; + } + mutex_exit(&cpu_lock); + XXX_KVM_PROBE; +#endif + + apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24); + kvm_apic_set_version(apic->vcpu); + + for (i = 0; i < APIC_LVT_NUM; i++) + apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); + + apic_set_reg(apic, APIC_LVT0, + SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); + + apic_set_reg(apic, APIC_DFR, 0xffffffffU); + apic_set_reg(apic, APIC_SPIV, 0xff); + apic_set_reg(apic, APIC_TASKPRI, 0); + apic_set_reg(apic, APIC_LDR, 0); + apic_set_reg(apic, APIC_ESR, 0); + apic_set_reg(apic, APIC_ICR, 0); + apic_set_reg(apic, APIC_ICR2, 0); + apic_set_reg(apic, APIC_TDCR, 0); + apic_set_reg(apic, APIC_TMICT, 0); + for (i = 0; i < 8; i++) { + apic_set_reg(apic, APIC_IRR + 0x10 * i, 0); + apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); + apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); + } + apic->irr_pending = 0; + update_divide_count(apic); +#ifdef XXX + atomic_set(&apic->lapic_timer.pending, 0); +#else + apic->lapic_timer.pending = 0; + XXX_KVM_PROBE; +#endif + if (kvm_vcpu_is_bsp(vcpu)) + vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; + apic_update_ppr(apic); + + vcpu->arch.apic_arb_prio = 0; + + cmn_err(CE_NOTE, "%s: vcpu=%p, id=%d, base_msr= %lx PRIx64 " + "base_address=%lx\n", __func__, vcpu, kvm_apic_id(apic), + vcpu->arch.apic_base, apic->base_address); +} + +int +kvm_apic_present(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic)); +} + +int +kvm_lapic_enabled(struct kvm_vcpu *vcpu) +{ + return (kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic)); +} + +/* + * APIC timer interface + */ +static int +lapic_is_periodic(struct kvm_timer *ktimer) +{ + struct kvm_lapic *apic = (struct kvm_lapic *)((caddr_t)ktimer - + offsetof(struct kvm_lapic, lapic_timer)); + + return (apic_lvtt_period(apic)); +} + int apic_has_pending_timer(struct kvm_vcpu *vcpu) { @@ -78,6 +1085,100 @@ kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) } void +kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + if (apic) + kvm_apic_local_deliver(apic, APIC_LVT0); +} + +static struct kvm_timer_ops lapic_timer_ops = { + .is_periodic = lapic_is_periodic, +}; + +static const struct kvm_io_device_ops apic_mmio_ops = { + .read = apic_mmio_read, + .write = apic_mmio_write, +}; + +int +kvm_create_lapic(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic; + + ASSERT(vcpu != NULL); + + apic = kmem_zalloc(sizeof (*apic), KM_SLEEP); + if (!apic) + goto nomem; + + vcpu->arch.apic = apic; + + apic->regs = kmem_zalloc(PAGESIZE, KM_SLEEP); + memset(apic->regs, 0, PAGESIZE); + apic->vcpu = vcpu; + + apic->lapic_timer.kvm_cyc_handler.cyh_func = kvm_timer_fire; + apic->lapic_timer.kvm_cyc_handler.cyh_arg = &apic->lapic_timer; + apic->lapic_timer.kvm_cyc_handler.cyh_level = CY_LOW_LEVEL; + apic->lapic_timer.active = 0; + + apic->lapic_timer.t_ops = &lapic_timer_ops; + apic->lapic_timer.kvm = vcpu->kvm; + apic->lapic_timer.vcpu = vcpu; + + apic->base_address = APIC_DEFAULT_PHYS_BASE; + vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE; + + kvm_lapic_reset(vcpu); + kvm_iodevice_init(&apic->dev, &apic_mmio_ops); + apic->dev.lapic = apic; + + return (0); +nomem_free_apic: + if (apic) + kmem_free(apic, sizeof (struct kvm_lapic)); +nomem: + return (-ENOMEM); +} + +int +kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + int highest_irr; + + if (!apic || !apic_enabled(apic)) + return (-1); + + apic_update_ppr(apic); + highest_irr = apic_find_highest_irr(apic); + if ((highest_irr == -1) || + ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI))) + return (-1); + + return (highest_irr); +} + +int +kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) +{ + uint32_t lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0); + int r = 0; + + if (kvm_vcpu_is_bsp(vcpu)) { + if (!apic_hw_enabled(vcpu->arch.apic)) + r = 1; + if ((lvt0 & APIC_LVT_MASKED) == 0 && + GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) + r = 1; + } + + return (r); +} + +void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; @@ -96,31 +1197,42 @@ kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) #endif } -void -kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) +int +kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) { + int vector = kvm_apic_has_interrupt(vcpu); struct kvm_lapic *apic = vcpu->arch.apic; - if (apic) - kvm_apic_local_deliver(apic, APIC_LVT0); + if (vector == -1) + return (-1); + + apic_set_vector(vector, (void *)((uintptr_t)apic->regs + APIC_ISR)); + apic_update_ppr(apic); + apic_clear_irr(vector, apic); + + return (vector); } void -kvm_free_lapic(struct kvm_vcpu *vcpu) +kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (apic == NULL) - return; + + apic->base_address = vcpu->arch.apic_base & + MSR_IA32_APICBASE_BASE; + kvm_apic_set_version(vcpu); + + apic_update_ppr(apic); mutex_enter(&cpu_lock); if (apic->lapic_timer.active) cyclic_remove(apic->lapic_timer.kvm_cyclic_id); + apic->lapic_timer.active = 0; mutex_exit(&cpu_lock); - if (apic->regs) - kmem_free(apic->regs, PAGESIZE); - - kmem_free(vcpu->arch.apic, sizeof (struct kvm_lapic)); + update_divide_count(apic); + start_apic_timer(apic); + apic->irr_pending = 1; } void @@ -140,3 +1252,139 @@ __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) XXX_KVM_PROBE; #endif } + +void +kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) +{ + uint32_t data; + void *vapic; + + if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr) + return; + + vapic = page_address(vcpu->arch.apic->vapic_page); + + data = *(uint32_t *)((uintptr_t)vapic + + offset_in_page(vcpu->arch.apic->vapic_addr)); +#ifdef XXX + kunmap_atomic(vapic, KM_USER0); +#else + XXX_KVM_PROBE; +#endif + + apic_set_tpr(vcpu->arch.apic, data & 0xff); +} + +void +kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) +{ + uint32_t data, tpr; + int max_irr, max_isr; + struct kvm_lapic *apic; + void *vapic; + + if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr) + return; + + apic = vcpu->arch.apic; + tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; + max_irr = apic_find_highest_irr(apic); + if (max_irr < 0) + max_irr = 0; + max_isr = apic_find_highest_isr(apic); + if (max_isr < 0) + max_isr = 0; + data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); + + vapic = page_address(vcpu->arch.apic->vapic_page); + + *(uint32_t *)((uintptr_t)vapic + + offset_in_page(vcpu->arch.apic->vapic_addr)) = data; +#ifdef XXX + kunmap_atomic(vapic, KM_USER0); +#else + XXX_KVM_PROBE; +#endif +} + +int +kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, struct kvm_vapic_addr *addr) +{ + if (!irqchip_in_kernel(vcpu->kvm)) + return (EINVAL); + + vcpu->arch.apic->vapic_addr = addr->vapic_addr; + + return (0); +} + +int +kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, uint32_t msr, uint64_t data) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + uint32_t reg = (msr - APIC_BASE_MSR) << 4; + + if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) + return (1); + + /* if this is ICR write vector before command */ + if (msr == 0x830) + apic_reg_write(apic, APIC_ICR2, (uint32_t)(data >> 32)); + + return (apic_reg_write(apic, reg, (uint32_t)data)); +} + +int +kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, uint32_t msr, uint64_t *data) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + uint32_t reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; + + if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) + return (1); + + if (apic_reg_read(apic, reg, 4, &low)) + return (1); + + if (msr == 0x830) + apic_reg_read(apic, APIC_ICR2, 4, &high); + + *data = (((uint64_t)high) << 32) | low; + + return (0); +} + +int +kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, uint32_t reg, uint64_t data) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + if (!irqchip_in_kernel(vcpu->kvm)) + return (1); + + /* if this is ICR write vector before command */ + if (reg == APIC_ICR) + apic_reg_write(apic, APIC_ICR2, (uint32_t)(data >> 32)); + + return (apic_reg_write(apic, reg, (uint32_t)data)); +} + +int +kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, uint32_t reg, uint64_t *data) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + uint32_t low, high = 0; + + if (!irqchip_in_kernel(vcpu->kvm)) + return (1); + + if (apic_reg_read(apic, reg, 4, &low)) + return (1); + + if (reg == APIC_ICR) + apic_reg_read(apic, APIC_ICR2, 4, &high); + + *data = (((uint64_t)high) << 32) | low; + + return (0); +} diff --git a/kvm_lapic.h b/kvm_lapic.h index 65d89d3..6f356a7 100644 --- a/kvm_lapic.h +++ b/kvm_lapic.h @@ -1,11 +1,53 @@ #ifndef __KVM_X86_LAPIC_H #define __KVM_X86_LAPIC_H +struct kvm_vapic_addr; + +extern int kvm_create_lapic(struct kvm_vcpu *); +extern void kvm_lapic_reset(struct kvm_vcpu *); +extern void kvm_free_lapic(struct kvm_vcpu *); + +extern void kvm_apic_set_version(struct kvm_vcpu *); +extern int kvm_apic_present(struct kvm_vcpu *vcpu); + +extern void kvm_lapic_sync_from_vapic(struct kvm_vcpu *); +extern void kvm_lapic_sync_to_vapic(struct kvm_vcpu *); + +extern int kvm_apic_has_interrupt(struct kvm_vcpu *); +extern int kvm_apic_accept_pic_intr(struct kvm_vcpu *); +extern int kvm_get_apic_interrupt(struct kvm_vcpu *); +extern int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, + int short_hand, int dest, int dest_mode); + +extern int kvm_lapic_enabled(struct kvm_vcpu *vcpu); +extern uint64_t kvm_lapic_get_cr8(struct kvm_vcpu *); +extern int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); +extern int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); +extern int kvm_apic_compare_prio(struct kvm_vcpu *, struct kvm_vcpu *); + +extern void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); +extern void kvm_lapic_set_base(struct kvm_vcpu *vcpu, uint64_t value); +extern int kvm_lapic_set_vapic_addr(struct kvm_vcpu *, struct kvm_vapic_addr *); + +extern int kvm_x2apic_msr_write(struct kvm_vcpu *, uint32_t, uint64_t); +extern int kvm_x2apic_msr_read(struct kvm_vcpu *, uint32_t, uint64_t *); + +extern int kvm_hv_vapic_msr_write(struct kvm_vcpu *, uint32_t, uint64_t); +extern int kvm_hv_vapic_msr_read(struct kvm_vcpu *, uint32_t, uint64_t *); + + + +extern uint64_t kvm_get_apic_base(struct kvm_vcpu *vcpu); +extern void kvm_set_apic_base(struct kvm_vcpu *vcpu, uint64_t data); + +extern int kvm_irq_delivery_to_apic(struct kvm *, + struct kvm_lapic *, struct kvm_lapic_irq *); +extern void kvm_apic_post_state_restore(struct kvm_vcpu *); + /* - * XXX - * Need to do proper header files at some point, sigh. + * XXX: needs to be in vmx */ +extern int vm_need_virtualize_apic_accesses(struct kvm *kvm); -void kvm_free_lapic(struct kvm_vcpu *); #endif @@ -55,10 +55,6 @@ extern int native_write_msr_safe(unsigned int msr, unsigned low, unsigned high); extern unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size); -extern int kvm_apic_present(struct kvm_vcpu *vcpu); -extern int kvm_lapic_enabled(struct kvm_vcpu *vcpu); -extern void kvm_lapic_reset(struct kvm_vcpu *vcpu); -extern int vm_need_virtualize_apic_accesses(struct kvm *kvm); extern uint32_t vmcs_read32(unsigned long field); extern uint16_t vmcs_read16(unsigned long field); extern ulong kvm_read_cr4(struct kvm_vcpu *vcpu); @@ -403,7 +399,6 @@ vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) vmcs_writel(GUEST_RFLAGS, rflags); } - extern void kvm_shared_msr_cpu_online(void); int @@ -621,251 +616,14 @@ kvm_mmu_create(struct kvm_vcpu *vcpu) return (alloc_mmu_pages(vcpu)); } -inline uint32_t -apic_get_reg(struct kvm_lapic *apic, int reg_off) -{ - return (*((uint32_t *)((uintptr_t)apic->regs + reg_off))); -} - -void -apic_set_reg(struct kvm_lapic *apic, int reg_off, uint32_t val) -{ - *((uint32_t *)((uintptr_t)apic->regs + reg_off)) = val; -} - static inline int apic_x2apic_mode(struct kvm_lapic *apic) { return (apic->vcpu->arch.apic_base & X2APIC_ENABLE); } -static uint32_t -apic_get_tmcct(struct kvm_lapic *apic) -{ - hrtime_t now, remaining, elapsed; - uint32_t tmcct; - - VERIFY(apic != NULL); - - /* if initial count is 0, current count should also be 0 */ - if (apic_get_reg(apic, APIC_TMICT) == 0) - return (0); - - now = gethrtime(); - elapsed = now - apic->lapic_timer.start - - apic->lapic_timer.period * apic->lapic_timer.intervals; - remaining = apic->lapic_timer.period - elapsed; - - if (remaining < 0) - remaining = 0; - - remaining = remaining % apic->lapic_timer.period; - tmcct = remaining / (APIC_BUS_CYCLE_NS * apic->divide_count); - - return (tmcct); -} - extern unsigned long kvm_rip_read(struct kvm_vcpu *vcpu); -static void -__report_tpr_access(struct kvm_lapic *apic, int write) -{ - struct kvm_vcpu *vcpu = apic->vcpu; - struct kvm_run *run = vcpu->run; - - set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests); - run->tpr_access.rip = kvm_rip_read(vcpu); - run->tpr_access.is_write = write; -} - -static inline void -report_tpr_access(struct kvm_lapic *apic, int write) -{ - if (apic->vcpu->arch.tpr_access_reporting) - __report_tpr_access(apic, write); -} - -extern void apic_update_ppr(struct kvm_lapic *apic); - -int -kvm_apic_id(struct kvm_lapic *apic) -{ - return ((apic_get_reg(apic, APIC_ID) >> 24) & 0xff); -} - -static uint32_t -__apic_read(struct kvm_lapic *apic, unsigned int offset) -{ - uint32_t val = 0; - - if (offset >= LAPIC_MMIO_LENGTH) - return (0); - - switch (offset) { - case APIC_ID: - if (apic_x2apic_mode(apic)) - val = kvm_apic_id(apic); - else - val = kvm_apic_id(apic) << 24; - break; - case APIC_ARBPRI: - cmn_err(CE_WARN, "Access APIC ARBPRI register " - "which is for P6\n"); - break; - - case APIC_TMCCT: /* Timer CCR */ - val = apic_get_tmcct(apic); - break; - - case APIC_TASKPRI: - report_tpr_access(apic, 0); - /* fall thru */ - default: - apic_update_ppr(apic); - val = apic_get_reg(apic, offset); - break; - } - - return (val); -} - -static inline struct kvm_lapic * -to_lapic(struct kvm_io_device *dev) -{ -#ifdef XXX - return (container_of(dev, struct kvm_lapic, dev)); -#else - XXX_KVM_PROBE; - return (dev->lapic); -#endif -} - -int -apic_reg_read(struct kvm_lapic *apic, uint32_t offset, int len, void *data) -{ - unsigned char alignment = offset & 0xf; - uint32_t result; - /* this bitmask has a bit cleared for each reserver register */ - static const uint64_t rmask = 0x43ff01ffffffe70cULL; - - if ((alignment + len) > 4) { - return (1); - } - - if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) { - return (1); - } - - result = __apic_read(apic, offset & ~0xf); -#ifdef XXX_KVM_TRACE - trace_kvm_apic_read(offset, result); -#endif - - switch (len) { - case 1: - case 2: - case 4: - memcpy(data, (char *)&result + alignment, len); - break; - default: - cmn_err(CE_WARN, "Local APIC read with len = %x, " - "should be 1,2, or 4 instead\n", len); - break; - } - - return (0); -} - -inline int -apic_hw_enabled(struct kvm_lapic *apic) -{ - return ((apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE); -} - -static int -apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) -{ - return (apic_hw_enabled(apic) && - addr >= apic->base_address && - addr < apic->base_address + LAPIC_MMIO_LENGTH); -} - -static int -apic_mmio_read(struct kvm_io_device *this, gpa_t address, int len, void *data) -{ - struct kvm_lapic *apic = to_lapic(this); - uint32_t offset = address - apic->base_address; - - if (!apic_mmio_in_range(apic, address)) - return (-EOPNOTSUPP); - - apic_reg_read(apic, offset, len, data); - - return (0); -} - -#define LVT_MASK \ - (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) - -#define LINT_MASK \ - (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ - APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) - -static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { - LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ - LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ - LVT_MASK | APIC_MODE_MASK, /* LVTPC */ - LINT_MASK, LINT_MASK, /* LVT0-1 */ - LVT_MASK /* LVTERR */ -}; - -static int apic_lvtt_period(struct kvm_lapic *apic) -{ - return (apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC); -} - -void -start_apic_timer(struct kvm_lapic *apic) -{ - hrtime_t now = gethrtime(); - - apic->lapic_timer.period = (uint64_t)apic_get_reg(apic, APIC_TMICT) * - APIC_BUS_CYCLE_NS * apic->divide_count; - - if (!apic->lapic_timer.period) - return; - - mutex_enter(&cpu_lock); - - apic->lapic_timer.start = gethrtime(); - - /* - * Do not allow the guest to program periodic timers with small - * interval, since the hrtimers are not throttled by the host - * scheduler. - * - * If it is a one shot, we want to program it differently. - */ - if (apic_lvtt_period(apic)) { - if (apic->lapic_timer.period < NSEC_PER_MSEC / 2) - apic->lapic_timer.period = NSEC_PER_MSEC / 2; - apic->lapic_timer.kvm_cyc_when.cyt_when = 0; - apic->lapic_timer.kvm_cyc_when.cyt_interval = - apic->lapic_timer.period; - } else { - apic->lapic_timer.kvm_cyc_when.cyt_when = - apic->lapic_timer.start + apic->lapic_timer.period; - apic->lapic_timer.kvm_cyc_when.cyt_interval = CY_INFINITY; - } - - apic->lapic_timer.kvm_cyclic_id = - cyclic_add(&apic->lapic_timer.kvm_cyc_handler, - &apic->lapic_timer.kvm_cyc_when); - apic->lapic_timer.active = 1; - apic->lapic_timer.intervals = 0; - mutex_exit(&cpu_lock); -} - inline static int kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) { @@ -877,223 +635,12 @@ kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) #endif } -#define VEC_POS(v) ((v) & (32 - 1)) -#define REG_POS(v) (((v) >> 5) << 4) - -inline void -apic_clear_vector(int vec, caddr_t bitmap) -{ - clear_bit(VEC_POS(vec), (unsigned long *)(bitmap + REG_POS(vec))); -} - void kvm_inject_nmi(struct kvm_vcpu *vcpu) { vcpu->arch.nmi_pending = 1; } -inline void -apic_set_vector(int vec, caddr_t bitmap) -{ - set_bit(VEC_POS(vec), (unsigned long *)(bitmap + REG_POS(vec))); -} - -static inline int -apic_test_and_set_vector(int vec, caddr_t bitmap) -{ - return (test_and_set_bit(VEC_POS(vec), (unsigned long *)(bitmap + - REG_POS(vec)))); -} - - -static inline int -apic_test_and_set_irr(int vec, struct kvm_lapic *apic) -{ - apic->irr_pending = 1; - return (apic_test_and_set_vector(vec, (void *)((uintptr_t)apic->regs + - APIC_IRR))); -} - -inline int -apic_sw_enabled(struct kvm_lapic *apic) -{ - return (apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED); -} - -inline int -apic_enabled(struct kvm_lapic *apic) -{ - return (apic_sw_enabled(apic) && apic_hw_enabled(apic)); -} - -/* - * Add a pending IRQ into lapic. - * Return 1 if successfully added and 0 if discarded. - */ -int -__apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, - int vector, int level, int trig_mode) -{ - int result = 0; - struct kvm_vcpu *vcpu = apic->vcpu; - - switch (delivery_mode) { - case APIC_DM_LOWEST: - vcpu->arch.apic_arb_prio++; - case APIC_DM_FIXED: - /* FIXME add logic for vcpu on reset */ - if (!apic_enabled(apic)) - break; - - if (trig_mode) { - apic_set_vector(vector, (void *)((uintptr_t)apic->regs + - APIC_TMR)); - } else - apic_clear_vector(vector, - (void *)((uintptr_t)apic->regs + APIC_TMR)); - - result = !apic_test_and_set_irr(vector, apic); - if (!result) { - break; - } - - kvm_vcpu_kick(vcpu); - break; - - case APIC_DM_REMRD: - break; - - case APIC_DM_SMI: - break; - - case APIC_DM_NMI: - result = 1; - kvm_inject_nmi(vcpu); - kvm_vcpu_kick(vcpu); - break; - - case APIC_DM_INIT: - if (level) { - result = 1; - vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; - kvm_vcpu_kick(vcpu); - } - break; - - case APIC_DM_STARTUP: - if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { - result = 1; - vcpu->arch.sipi_vector = vector; - vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; - kvm_vcpu_kick(vcpu); - } - break; - - case APIC_DM_EXTINT: - /* - * Should only be called by kvm_apic_local_deliver() with LVT0, - * before NMI watchdog was enabled. Already handled by - * kvm_apic_accept_pic_intr(). - */ - break; - - default: - break; - } - return (result); -} - -int -kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - - return (__apic_accept_irq(apic, irq->delivery_mode, irq->vector, - irq->level, irq->trig_mode)); -} - -int -kvm_apic_match_physical_addr(struct kvm_lapic *apic, uint16_t dest) -{ - return (dest == 0xff || kvm_apic_id(apic) == dest); -} - -int -kvm_apic_match_logical_addr(struct kvm_lapic *apic, uint8_t mda) -{ - int result = 0; - uint32_t logical_id; - - if (apic_x2apic_mode(apic)) { - logical_id = apic_get_reg(apic, APIC_LDR); - return (logical_id & mda); - } - - logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR)); - - switch (apic_get_reg(apic, APIC_DFR)) { - case APIC_DFR_FLAT: - if (logical_id & mda) - result = 1; - break; - case APIC_DFR_CLUSTER: - if (((logical_id >> 4) == (mda >> 0x4)) && - (logical_id & mda & 0xf)) - result = 1; - break; - default: - cmn_err(CE_WARN, "Bad DFR vcpu %d: %08x\n", - apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR)); - break; - } - - return (result); -} - -int -kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, - int short_hand, int dest, int dest_mode) -{ - int result = 0; - struct kvm_lapic *target = vcpu->arch.apic; - - /* XXX - debug */ - cmn_err(CE_NOTE, "target %p, source %p, dest 0x%x, dest_mode 0x%x, " - "short_hand 0x%x\n", target, source, dest, dest_mode, short_hand); - - ASSERT(target != NULL); - switch (short_hand) { - case APIC_DEST_NOSHORT: - if (dest_mode == 0) - /* Physical mode. */ - result = kvm_apic_match_physical_addr(target, dest); - else - /* Logical mode. */ - result = kvm_apic_match_logical_addr(target, dest); - break; - case APIC_DEST_SELF: - result = (target == source); - break; - case APIC_DEST_ALLINC: - result = 1; - break; - case APIC_DEST_ALLBUT: - result = (target != source); - break; - default: - cmn_err(CE_WARN, "Bad dest shorthand value %x\n", short_hand); - break; - } - - return (result); -} - -int -kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) -{ - return (vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio); -} - int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq) @@ -1137,49 +684,6 @@ kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, return (r); } -static void -apic_send_ipi(struct kvm_lapic *apic) -{ - uint32_t icr_low = apic_get_reg(apic, APIC_ICR); - uint32_t icr_high = apic_get_reg(apic, APIC_ICR2); - struct kvm_lapic_irq irq; - - irq.vector = icr_low & APIC_VECTOR_MASK; - irq.delivery_mode = icr_low & APIC_MODE_MASK; - irq.dest_mode = icr_low & APIC_DEST_MASK; - irq.level = icr_low & APIC_INT_ASSERT; - irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; - irq.shorthand = icr_low & APIC_SHORT_MASK; - if (apic_x2apic_mode(apic)) - irq.dest_id = icr_high; - else - irq.dest_id = GET_APIC_DEST_FIELD(icr_high); - -#ifdef XXX_KVM_TRACE - trace_kvm_apic_ipi(icr_low, irq.dest_id); -#endif - - kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); -} - -void -update_divide_count(struct kvm_lapic *apic) -{ - uint32_t tmp1, tmp2, tdcr; - - tdcr = apic_get_reg(apic, APIC_TDCR); - tmp1 = tdcr & 0xf; - tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; - apic->divide_count = 0x1 << (tmp2 & 0x7); -} - -void -apic_set_tpr(struct kvm_lapic *apic, uint32_t tpr) -{ - apic_set_reg(apic, APIC_TASKPRI, tpr); - apic_update_ppr(apic); -} - static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) { @@ -1263,288 +767,8 @@ __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, int trigger_mode) } } -extern void kvm_ioapic_update_eoi(struct kvm *kvm, - int vector, int trigger_mode); -extern inline int apic_find_highest_isr(struct kvm_lapic *apic); - -static inline int -apic_test_and_clear_vector(int vec, caddr_t bitmap) -{ -#ifndef XXX - return (test_and_clear_bit(VEC_POS(vec), - (unsigned long *)(bitmap + REG_POS(vec)))); -#else - if (BT_TEST((bitmap) + REG_POS(vec), VEC_POS(vec))) { - BT_CLEAR((bitmap) + REG_POS(vec), VEC_POS(vec)); - return (1); - } else - return (0); -#endif -} - -static inline int -apic_lvt_nmi_mode(uint32_t lvt_val) -{ - return ((lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI); -} - -static void -apic_manage_nmi_watchdog(struct kvm_lapic *apic, uint32_t lvt0_val) -{ - int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0)); - - if (apic_lvt_nmi_mode(lvt0_val)) { - if (!nmi_wd_enabled) { - /* XXX - debug */ - cmn_err(CE_NOTE, "Receive NMI setting on APIC_LVT0 " - "for cpu %d\n", apic->vcpu->vcpu_id); - apic->vcpu->kvm->arch.vapics_in_nmi_mode++; - } - } else if (nmi_wd_enabled) - apic->vcpu->kvm->arch.vapics_in_nmi_mode--; -} - -static void -apic_set_eoi(struct kvm_lapic *apic) -{ - int vector = apic_find_highest_isr(apic); - int trigger_mode; - /* - * Not every write EOI will has corresponding ISR, - * one example is when Kernel check timer on setup_IO_APIC - */ - if (vector == -1) - return; - - apic_clear_vector(vector, (void *)((uintptr_t)apic->regs + APIC_ISR)); - apic_update_ppr(apic); - - if (apic_test_and_clear_vector(vector, (void *)((uintptr_t)apic->regs + - APIC_TMR))) - trigger_mode = IOAPIC_LEVEL_TRIG; - else - trigger_mode = IOAPIC_EDGE_TRIG; - if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) - kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); -} - -int -apic_reg_write(struct kvm_lapic *apic, uint32_t reg, uint32_t val) -{ - int ret = 0; - -#ifdef XXX_KVM_TRACE - trace_kvm_apic_write(reg, val); -#endif - - switch (reg) { - case APIC_ID: /* Local APIC ID */ - if (!apic_x2apic_mode(apic)) - apic_set_reg(apic, APIC_ID, val); - else - ret = 1; - break; - - case APIC_TASKPRI: - report_tpr_access(apic, 1); - apic_set_tpr(apic, val & 0xff); - break; - - case APIC_EOI: - apic_set_eoi(apic); - break; - - case APIC_LDR: - if (!apic_x2apic_mode(apic)) - apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK); - else - ret = 1; - break; - - case APIC_DFR: - if (!apic_x2apic_mode(apic)) - apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); - else - ret = 1; - break; - - case APIC_SPIV: { - uint32_t mask = 0x3ff; - if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) - mask |= APIC_SPIV_DIRECTED_EOI; - apic_set_reg(apic, APIC_SPIV, val & mask); - if (!(val & APIC_SPIV_APIC_ENABLED)) { - int i; - uint32_t lvt_val; - - for (i = 0; i < APIC_LVT_NUM; i++) { - lvt_val = apic_get_reg(apic, - APIC_LVTT + 0x10 * i); - apic_set_reg(apic, APIC_LVTT + 0x10 * i, - lvt_val | APIC_LVT_MASKED); - } - /* XXX pending needs protection? */ - apic->lapic_timer.pending = 0; - } - break; - } - case APIC_ICR: - /* No delay here, so we always clear the pending bit */ - apic_set_reg(apic, APIC_ICR, val & ~(1 << 12)); - apic_send_ipi(apic); - break; - - case APIC_ICR2: - if (!apic_x2apic_mode(apic)) - val &= 0xff000000; - apic_set_reg(apic, APIC_ICR2, val); - break; - - case APIC_LVT0: - apic_manage_nmi_watchdog(apic, val); - case APIC_LVTT: - case APIC_LVTTHMR: - case APIC_LVTPC: - case APIC_LVT1: - case APIC_LVTERR: - /* TODO: Check vector */ - if (!apic_sw_enabled(apic)) - val |= APIC_LVT_MASKED; - - val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; - apic_set_reg(apic, reg, val); - - break; - - case APIC_TMICT: - mutex_enter(&cpu_lock); - if (apic->lapic_timer.active) { - cyclic_remove(apic->lapic_timer.kvm_cyclic_id); - apic->lapic_timer.active = 0; - } - mutex_exit(&cpu_lock); - - apic_set_reg(apic, APIC_TMICT, val); - start_apic_timer(apic); - break; - - case APIC_TDCR: - if (val & 4) - cmn_err(CE_WARN, "KVM_WRITE:TDCR %x\n", val); - apic_set_reg(apic, APIC_TDCR, val); - update_divide_count(apic); - break; - - case APIC_ESR: - if (apic_x2apic_mode(apic) && val != 0) { - cmn_err(CE_WARN, "KVM_WRITE:ESR not zero %x\n", val); - ret = 1; - } - break; - - case APIC_SELF_IPI: - if (apic_x2apic_mode(apic)) { - apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff)); - } else - ret = 1; - break; - default: - ret = 1; - break; - } - - return (ret); -} - -static int -apic_mmio_write(struct kvm_io_device *this, - gpa_t address, int len, const void *data) -{ - struct kvm_lapic *apic = to_lapic(this); - unsigned int offset = address - apic->base_address; - uint32_t val; - - if (!apic_mmio_in_range(apic, address)) - return (-EOPNOTSUPP); - - /* - * APIC register must be aligned on 128-bits boundary. - * 32/64/128 bits registers must be accessed thru 32 bits. - * Refer SDM 8.4.1 - */ - if (len != 4 || (offset & 0xf)) { - /* Don't shout loud, $infamous_os would cause only noise. */ - return (0); - } - - val = *(uint32_t *)data; - - apic_reg_write(apic, offset & 0xff0, val); - - return (0); -} - -static const struct kvm_io_device_ops apic_mmio_ops = { - .read = apic_mmio_read, - .write = apic_mmio_write, -}; - -static int -lapic_is_periodic(struct kvm_timer *ktimer) -{ - struct kvm_lapic *apic = (struct kvm_lapic *)((caddr_t)ktimer - - offsetof(struct kvm_lapic, lapic_timer)); - - return (apic_lvtt_period(apic)); -} - -static struct kvm_timer_ops lapic_timer_ops = { - .is_periodic = lapic_is_periodic, -}; - extern void kvm_timer_fire(void *); -int -kvm_create_lapic(struct kvm_vcpu *vcpu) -{ - struct kvm_lapic *apic; - - ASSERT(vcpu != NULL); - - apic = kmem_zalloc(sizeof (*apic), KM_SLEEP); - if (!apic) - goto nomem; - - vcpu->arch.apic = apic; - - apic->regs = kmem_zalloc(PAGESIZE, KM_SLEEP); - memset(apic->regs, 0, PAGESIZE); - apic->vcpu = vcpu; - - apic->lapic_timer.kvm_cyc_handler.cyh_func = kvm_timer_fire; - apic->lapic_timer.kvm_cyc_handler.cyh_arg = &apic->lapic_timer; - apic->lapic_timer.kvm_cyc_handler.cyh_level = CY_LOW_LEVEL; - apic->lapic_timer.active = 0; - - apic->lapic_timer.t_ops = &lapic_timer_ops; - apic->lapic_timer.kvm = vcpu->kvm; - apic->lapic_timer.vcpu = vcpu; - - apic->base_address = APIC_DEFAULT_PHYS_BASE; - vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE; - - kvm_lapic_reset(vcpu); - kvm_iodevice_init(&apic->dev, &apic_mmio_ops); - apic->dev.lapic = apic; - - return (0); -nomem_free_apic: - if (apic) - kmem_free(apic, sizeof (struct kvm_lapic)); -nomem: - return (-ENOMEM); -} - extern int kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); int @@ -2008,31 +1232,6 @@ update_exception_bitmap(struct kvm_vcpu *vcpu) } -void -kvm_lapic_set_base(struct kvm_vcpu *vcpu, uint64_t value) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - - if (!apic) { - value |= MSR_IA32_APICBASE_BSP; - vcpu->arch.apic_base = value; - return; - } - - if (!kvm_vcpu_is_bsp(apic->vcpu)) - value &= ~MSR_IA32_APICBASE_BSP; - - vcpu->arch.apic_base = value; - if (apic_x2apic_mode(apic)) { - uint32_t id = kvm_apic_id(apic); - uint32_t ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf)); - apic_set_reg(apic, APIC_LDR, ldr); - } - - apic->base_address = apic->vcpu->arch.apic_base & - MSR_IA32_APICBASE_BASE; -} - uint64_t kvm_get_apic_base(struct kvm_vcpu *vcpu) { @@ -2053,18 +1252,6 @@ kvm_set_apic_base(struct kvm_vcpu *vcpu, uint64_t data) } void -kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - - if (!apic) - return; - - apic_set_tpr(apic, ((cr8 & 0x0f) << 4) | - (apic_get_reg(apic, APIC_TASKPRI) & 4)); -} - -void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) { if (cr8 & CR8_RESERVED_BITS) { |