summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bitops.h473
-rw-r--r--kvmbin358152 -> 363368 bytes
-rw-r--r--kvm.c70
-rw-r--r--kvm.h6
-rw-r--r--kvm_host.h2
-rw-r--r--kvm_x86.c160
-rw-r--r--kvm_x86host.h2
-rw-r--r--paging_tmpl.h2
8 files changed, 622 insertions, 93 deletions
diff --git a/bitops.h b/bitops.h
new file mode 100644
index 0000000..dbcdff0
--- /dev/null
+++ b/bitops.h
@@ -0,0 +1,473 @@
+#ifndef _ASM_X86_BITOPS_H
+#define _ASM_X86_BITOPS_H
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ *
+ * Note: inlines with more than a single statement should be marked
+ * __always_inline to avoid problems with older gcc's inlining heuristics.
+ */
+
+#ifdef XXX
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <linux/compiler.h>
+#include <asm/alternative.h>
+#endif /*XXX*/
+
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, 8 * sizeof(long))
+
+/*
+ * These have to be done with inline assembly: that way the bit-setting
+ * is guaranteed to be atomic. All bit operations return 0 if the bit
+ * was cleared before the operation and != 0 if it was not.
+ *
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
+/* Technically wrong, but this avoids compilation errors on some gcc
+ versions. */
+#define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
+#else
+#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
+#endif
+
+#define ADDR BITOP_ADDR(addr)
+
+/*
+ * We do the locked ops that don't return the old value as
+ * a mask operation on a byte.
+ */
+#define IS_IMMEDIATE(nr) (__builtin_constant_p(nr))
+#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3))
+#define CONST_MASK(nr) (1 << ((nr) & 7))
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered. See __set_bit()
+ * if you do not require the atomic guarantees.
+ *
+ * Note: there are no guarantees that this function will not be reordered
+ * on non x86 architectures, so if you are writing portable code,
+ * make sure not to rely on its reordering guarantees.
+ *
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void
+set_bit(unsigned int nr, volatile unsigned long *addr)
+{
+ if (IS_IMMEDIATE(nr)) {
+ __asm__ volatile("lock orb %1,%0"
+ : CONST_MASK_ADDR(nr, addr)
+ : "iq" ((uint8_t)CONST_MASK(nr))
+ : "memory");
+ } else {
+ __asm__ volatile("lock bts %1,%0"
+ : BITOP_ADDR(addr) : "Ir" (nr) : "memory");
+ }
+}
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __set_bit(int nr, volatile unsigned long *addr)
+{
+ __asm__ volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered. However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static inline void
+clear_bit(int nr, volatile unsigned long *addr)
+{
+ if (IS_IMMEDIATE(nr)) {
+ __asm__ volatile("lock andb %1,%0"
+ : CONST_MASK_ADDR(nr, addr)
+ : "iq" ((uint8_t)~CONST_MASK(nr)));
+ } else {
+ __asm__ volatile("lock btr %1,%0"
+ : BITOP_ADDR(addr)
+ : "Ir" (nr));
+ }
+}
+
+/*
+ * clear_bit_unlock - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and implies release semantics before the memory
+ * operation. It can be used for an unlock.
+ */
+static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
+{
+ barrier();
+ clear_bit(nr, addr);
+}
+
+static inline void __clear_bit(int nr, volatile unsigned long *addr)
+{
+ __asm__ volatile("btr %1,%0" : ADDR : "Ir" (nr));
+}
+
+
+/*
+ * __clear_bit_unlock - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * __clear_bit() is non-atomic and implies release semantics before the memory
+ * operation. It can be used for an unlock if no other CPUs can concurrently
+ * modify other bits in the word.
+ *
+ * No memory barrier is required here, because x86 cannot reorder stores past
+ * older loads. Same principle as spin_unlock.
+ */
+static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
+{
+ barrier();
+ __clear_bit(nr, addr);
+}
+
+#define smp_mb__before_clear_bit() barrier()
+#define smp_mb__after_clear_bit() barrier()
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to change
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __change_bit(int nr, volatile unsigned long *addr)
+{
+ __asm__ volatile("btc %1,%0" : ADDR : "Ir" (nr));
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to change
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void change_bit(int nr, volatile unsigned long *addr)
+{
+ if (IS_IMMEDIATE(nr)) {
+ __asm__ volatile("lock xorb %1,%0"
+ : CONST_MASK_ADDR(nr, addr)
+ : "iq" ((uint8_t)CONST_MASK(nr)));
+ } else {
+ __asm__ volatile("lock btc %1,%0"
+ : BITOP_ADDR(addr)
+ : "Ir" (nr));
+ }
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+ int oldbit;
+
+ __asm__ volatile("lock bts %2,%1\n\t"
+ "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
+
+ return oldbit;
+}
+
+/**
+ * test_and_set_bit_lock - Set a bit and return its old value for lock
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This is the same as test_and_set_bit on x86.
+ */
+static inline int
+test_and_set_bit_lock(int nr, volatile unsigned long *addr)
+{
+ return test_and_set_bit(nr, addr);
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+ int oldbit;
+
+ __asm__("bts %2,%1\n\t"
+ "sbb %0,%0"
+ : "=r" (oldbit), ADDR
+ : "Ir" (nr));
+ return oldbit;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+ int oldbit;
+
+ __asm__ volatile("lock btr %2,%1\n\t"
+ "sbb %0,%0"
+ : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
+
+ return oldbit;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+ int oldbit;
+
+ __asm__ volatile("btr %2,%1\n\t"
+ "sbb %0,%0"
+ : "=r" (oldbit), ADDR
+ : "Ir" (nr));
+ return oldbit;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
+{
+ int oldbit;
+
+ __asm__ volatile("btc %2,%1\n\t"
+ "sbb %0,%0"
+ : "=r" (oldbit), ADDR
+ : "Ir" (nr) : "memory");
+
+ return oldbit;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ */
+static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
+{
+ int oldbit;
+
+ __asm__ volatile("lock btc %2,%1\n\t"
+ "sbb %0,%0"
+ : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
+
+ return oldbit;
+}
+
+static inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
+{
+ return ((1UL << (nr % 64)) &
+ (((unsigned long *)addr)[nr / 64])) != 0;
+}
+
+static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
+{
+ int oldbit;
+
+ __asm__ volatile("bt %2,%1\n\t"
+ "sbb %0,%0"
+ : "=r" (oldbit)
+ : "m" (*(unsigned long *)addr), "Ir" (nr));
+
+ return oldbit;
+}
+
+#if 0 /* Fool kernel-doc since it doesn't do macros yet */
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static int test_bit(int nr, const volatile unsigned long *addr);
+#endif
+
+#define test_bit(nr, addr) \
+ (__builtin_constant_p((nr)) \
+ ? constant_test_bit((nr), (addr)) \
+ : variable_test_bit((nr), (addr)))
+
+/**
+ * __ffs - find first set bit in word
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+ __asm__("bsf %1,%0"
+ : "=r" (word)
+ : "rm" (word));
+ return word;
+}
+
+/**
+ * ffz - find first zero bit in word
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static inline unsigned long ffz(unsigned long word)
+{
+ __asm__("bsf %1,%0"
+ : "=r" (word)
+ : "r" (~word));
+ return word;
+}
+
+/*
+ * __fls: find last set bit in word
+ * @word: The word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __fls(unsigned long word)
+{
+ __asm__("bsr %1,%0"
+ : "=r" (word)
+ : "rm" (word));
+ return word;
+}
+
+#ifdef __KERNEL__
+/**
+ * ffs - find first set bit in word
+ * @x: the word to search
+ *
+ * This is defined the same way as the libc and compiler builtin ffs
+ * routines, therefore differs in spirit from the other bitops.
+ *
+ * ffs(value) returns 0 if value is 0 or the position of the first
+ * set bit if value is nonzero. The first (least significant) bit
+ * is at position 1.
+ */
+static inline int ffs(int x)
+{
+ int r;
+#ifdef CONFIG_X86_CMOV
+ __asm__("bsfl %1,%0\n\t"
+ "cmovzl %2,%0"
+ : "=r" (r) : "rm" (x), "r" (-1));
+#else
+ __asm__("bsfl %1,%0\n\t"
+ "jnz 1f\n\t"
+ "movl $-1,%0\n"
+ "1:" : "=r" (r) : "rm" (x));
+#endif
+ return r + 1;
+}
+
+/**
+ * fls - find last set bit in word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffs, but returns the position of the most significant set bit.
+ *
+ * fls(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 32.
+ */
+static inline int fls(int x)
+{
+ int r;
+#ifdef CONFIG_X86_CMOV
+ __asm__("bsrl %1,%0\n\t"
+ "cmovzl %2,%0"
+ : "=&r" (r) : "rm" (x), "rm" (-1));
+#else
+ __asm__("bsrl %1,%0\n\t"
+ "jnz 1f\n\t"
+ "movl $-1,%0\n"
+ "1:" : "=r" (r) : "rm" (x));
+#endif
+ return r + 1;
+}
+#endif /* __KERNEL__ */
+
+#undef ADDR
+
+#ifdef __KERNEL__
+
+#include <__asm__-generic/bitops/sched.h>
+
+#define ARCH_HAS_FAST_MULTIPLIER 1
+
+#include <__asm__-generic/bitops/hweight.h>
+
+#endif /* __KERNEL__ */
+
+#ifdef XXX
+#include <__asm__-generic/bitops/fls64.h>
+#endif /*XXX*/
+
+#ifdef __KERNEL__
+
+#include <__asm__-generic/bitops/ext2-non-atomic.h>
+
+#define ext2_set_bit_atomic(lock, nr, addr) \
+ test_and_set_bit((nr), (unsigned long *)(addr))
+#define ext2_clear_bit_atomic(lock, nr, addr) \
+ test_and_clear_bit((nr), (unsigned long *)(addr))
+
+#include <asm-generic/bitops/minix.h>
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_X86_BITOPS_H */
diff --git a/kvm b/kvm
index beec04e..31ac5e6 100644
--- a/kvm
+++ b/kvm
Binary files differ
diff --git a/kvm.c b/kvm.c
index 52698e8..f664577 100644
--- a/kvm.c
+++ b/kvm.c
@@ -63,11 +63,7 @@ found:
return result + ffz(tmp);
}
-#ifdef XXX
int largepages_enabled = 1;
-#else
-int largepages_enabled = 0;
-#endif /*XXX*/
extern struct kvm *kvm_arch_create_vm(void);
extern void kvm_arch_destroy_vm(struct kvm *kvmp);
@@ -2026,9 +2022,7 @@ int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
void kvm_reload_remote_mmus(struct kvm *kvm)
{
-#ifdef XXX
make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
-#endif
}
void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
@@ -2333,7 +2327,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
root_gfn = 0;
if (mmu_check_root(vcpu, root_gfn))
return 1;
- sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
+ sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
PT32_ROOT_LEVEL, direct,
ACC_ALL, NULL);
#ifdef XXX
@@ -3460,9 +3454,6 @@ kvm_create_vm(void)
return (NULL);
}
- list_create(&kvmp->arch.active_mmu_pages, sizeof (struct kvm_mmu_page),
- offsetof(struct kvm_mmu_page, link));
-
rw_init(&kvmp->kvm_rwlock, NULL, RW_DRIVER, NULL);
for (i = 0; i < KVM_NR_BUSES; i++) {
@@ -3949,11 +3940,7 @@ skip_lpage:
kvm_arch_commit_memory_region(kvmp, mem, old, user_alloc);
kvm_free_physmem_slot(&old, &new);
-#ifdef NOTNOW
- /* XXX this needs to be here, but I'm getting kernel heap corruption */
- /* panics with someone writing to a buffer after it is freed */
kmem_free(old_memslots, sizeof (struct kvm_memslots));
-#endif /*NOTNOW*/
if (flush_shadow)
kvm_arch_flush_shadow(kvmp);
@@ -5586,7 +5573,7 @@ struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
return NULL;
}
-static inline unsigned long bad_hva(void)
+inline unsigned long bad_hva(void)
{
return PAGEOFFSET;
}
@@ -6089,6 +6076,7 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
unsigned long addr;
addr = gfn_to_hva(kvm, gfn);
+ cmn_err(CE_NOTE, "kvm_write_guest_page: gfn = %lx, hva = %lx\n", gfn, addr);
if (kvm_is_error_hva(addr))
return -EFAULT;
/* XXX - addr could be user or kernel */
@@ -6518,17 +6506,19 @@ inline void get_page(caddr_t page)
{
}
-extern caddr_t pfn_to_page(pfn_t pfn);
+extern caddr_t pfn_to_page(struct kvm *kvm, pfn_t pfn);
-inline int kvm_is_mmio_pfn(pfn_t pfn)
+inline int kvm_is_mmio_pfn(struct kvm *kvm, pfn_t pfn)
{
#ifdef XXX
if (pfn_valid(pfn)) {
- struct page *page = compound_head(pfn_to_page(pfn));
+ struct page *page = compound_head(pfn_to_page(kvm, pfn));
return PageReserved(page);
}
-#endif
return 1;
+#else
+ return 0;
+#endif /*XXX*/
}
caddr_t gfn_to_page(struct kvm *kvm, gfn_t gfn)
@@ -6537,8 +6527,8 @@ caddr_t gfn_to_page(struct kvm *kvm, gfn_t gfn)
pfn = gfn_to_pfn(kvm, gfn);
- if (!kvm_is_mmio_pfn(pfn))
- return pfn_to_page(pfn);
+ if (!kvm_is_mmio_pfn(kvm, pfn))
+ return pfn_to_page(kvm, pfn);
get_page(bad_page);
return (caddr_t)bad_page;
@@ -7348,6 +7338,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+
/* Record the guest's net vcpu time for enforced NMI injections. */
#ifdef XXX
if (!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)
@@ -8348,6 +8339,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, uint32_t error_code)
enum emulation_result er;
r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code);
+ cmn_err(CE_NOTE, "kvm_mmu_page_fault: %p(%p, %lx, %x) returned %x\n",
+ vcpu->arch.mmu.page_fault, vcpu, cr2, error_code, r);
if (r < 0)
goto out;
@@ -8355,12 +8348,14 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, uint32_t error_code)
r = 1;
goto out;
}
-
+ cmn_err(CE_CONT, "kvm_mmu_page_fault: topping up memory caches\n");
r = mmu_topup_memory_caches(vcpu);
if (r)
goto out;
er = emulate_instruction(vcpu, cr2, error_code, 0);
+ cmn_err(CE_CONT, "kvm_mmu_page_fault: emulate_instruction returned %x\n", er);
+
switch (er) {
case EMULATE_DONE:
@@ -8379,6 +8374,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, uint32_t error_code)
cmn_err(CE_PANIC, "kvm_mmu_page_fault: unknown return from emulate_instruction: %x\n", er);
}
out:
+ cmn_err(CE_NOTE, "kvm_mmu_page_fault: returns %d\n", r);
return r;
}
@@ -10295,6 +10291,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
unsigned long exit_qualification;
gpa_t gpa;
int gla_validity;
+ int rval;
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -10319,7 +10316,9 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
#ifdef XXX
trace_kvm_page_fault(gpa, exit_qualification);
#else
- return kvm_mmu_page_fault(vcpu, gpa & PAGEMASK, 0);
+ rval = kvm_mmu_page_fault(vcpu, gpa & PAGEMASK, 0);
+ cmn_err(CE_NOTE, "handle_ept_violation: returns %d\n", rval);
+ return rval;
#endif /*XXX*/
}
@@ -10569,7 +10568,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
uint32_t exit_reason = vmx->exit_reason;
uint32_t vectoring_info = vmx->idt_vectoring_info;
+ int rval;
+ cmn_err(CE_NOTE, "vmx_handle_exit: exit_reason = %d, vectoring_info = %x\n", exit_reason, vectoring_info);
/* If guest state is invalid, start emulating */
if (vmx->emulation_required && emulate_invalid_guest_state)
return handle_invalid_guest_state(vcpu);
@@ -10583,6 +10584,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
vcpu->run->fail_entry.hardware_entry_failure_reason
= vmcs_read16(VM_INSTRUCTION_ERROR)&0xff;
+ cmn_err(CE_NOTE, "vmx_handle_exit: fail = %x, failure reason = %x\n",
+ vmx->fail, (unsigned int)vcpu->run->fail_entry.hardware_entry_failure_reason&0xff);
+
return 0;
}
@@ -10613,9 +10617,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
}
if (exit_reason < kvm_vmx_max_exit_handlers
- && kvm_vmx_exit_handlers[exit_reason])
- return kvm_vmx_exit_handlers[exit_reason](vcpu);
- else {
+ && kvm_vmx_exit_handlers[exit_reason]) {
+ rval = kvm_vmx_exit_handlers[exit_reason](vcpu);
+ cmn_err(CE_NOTE, "vmx_handle_exit: returning %d from kvm_vmx_exit_handlers[%d]\n",
+ rval, exit_reason);
+ return rval;
+ } else {
vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
vcpu->run->hw.hardware_exit_reason = exit_reason;
}
@@ -11201,6 +11208,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
#endif /*XXX*/
kvm_lapic_sync_from_vapic(vcpu);
r = kvm_x86_ops->handle_exit(vcpu);
+ cmn_err(CE_NOTE, "vcpu_enter_guest: returning %d\n", r);
out:
return r;
}
@@ -11364,11 +11372,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
#ifdef XXX
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
#endif /*XXX*/
- /*
- * XXX - the following should use a bitset_t
- * and do bitset_atomic_test_and_del().
- * but I am lazy, and will get to it later
- */
if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
{
switch(vcpu->arch.mp_state) {
@@ -11385,8 +11388,10 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
}
}
- if (r <= 0)
+ if (r <= 0) {
+ cmn_err(CE_NOTE, "__vcpu_run: r = %d\n", r);
break;
+ }
#ifdef XXX
clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
@@ -11415,6 +11420,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
#endif /*XXX*/
post_kvm_run_save(vcpu);
vapic_exit(vcpu);
+ cmn_err(CE_NOTE, "__vcpu_run: returning %d\n", r);
return r;
}
diff --git a/kvm.h b/kvm.h
index 431dff3..bcde313 100644
--- a/kvm.h
+++ b/kvm.h
@@ -1147,19 +1147,19 @@ struct kvm_irq_routing_table {
struct kvm_shadow_walk_iterator {
uint64_t addr;
hpa_t shadow_addr;
- int level;
uint64_t *sptep;
+ int level;
unsigned index;
};
extern void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
struct kvm_vcpu *vcpu, uint64_t addr);
-extern int shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator);
+extern int shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator, struct kvm_vcpu *vcpu);
extern void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator);
#define for_each_shadow_entry(_vcpu, _addr, _walker) \
for (shadow_walk_init(&(_walker), _vcpu, _addr); \
- shadow_walk_okay(&(_walker)); \
+ shadow_walk_okay(&(_walker), _vcpu); \
shadow_walk_next(&(_walker)))
struct kvm {
diff --git a/kvm_host.h b/kvm_host.h
index 85c27d4..caddd02 100644
--- a/kvm_host.h
+++ b/kvm_host.h
@@ -147,7 +147,7 @@ void kvm_release_pfn_dirty(pfn_t);
void kvm_release_pfn_clean(pfn_t pfn);
void kvm_set_pfn_dirty(pfn_t pfn);
void kvm_set_pfn_accessed(pfn_t pfn);
-void kvm_get_pfn(pfn_t pfn);
+void kvm_get_pfn(struct kvm_vcpu *vcpu, pfn_t pfn);
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
int len);
diff --git a/kvm_x86.c b/kvm_x86.c
index ceffb03..8cc8aaf 100644
--- a/kvm_x86.c
+++ b/kvm_x86.c
@@ -115,11 +115,13 @@ inline gpa_t gfn_to_gpa(gfn_t gfn)
return (gpa_t)gfn << PAGESHIFT;
}
+caddr_t pfn_to_page(struct kvm *kvm, pfn_t pfn);
+
void kvm_release_pfn_clean(pfn_t pfn)
{
-#ifdef XXX /*XXX probably just free the page */
+#ifdef XXX
if (!kvm_is_mmio_pfn(pfn))
- put_page(pfn_to_page(pfn));
+ put_page(pfn_to_page(kvm, pfn));
#endif /*XXX*/
}
@@ -208,13 +210,13 @@ kvm_arch_destroy_vm(struct kvm *kvm)
#ifdef XXX
kvm_free_vcpus(kvm);
kvm_free_physmem(kvm);
-#endif
#ifdef APIC
if (kvm->arch.apic_access_page)
put_page(kvm->arch.apic_access_page);
if (kvm->arch.ept_identity_pagetable)
put_page(kvm->arch.ept_identity_pagetable);
#endif /*APIC*/
+#endif /*XXX*/
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
cleanup_srcu_struct(&kvm->srcu);
#endif /*CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER*/
@@ -1454,8 +1456,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
if (r)
goto out;
- kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
- kvm->arch.ept_identity_map_addr >> PAGESHIFT);
+ kvm->arch.ept_identity_pagetable = (caddr_t)kvm_userspace_mem.userspace_addr;
out:
mutex_exit(&kvm->slots_lock);
return r;
@@ -1466,6 +1467,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
+ memset(&kvm_userspace_mem, 0, sizeof(struct kvm_userspace_memory_region));
mutex_enter(&kvm->slots_lock);
if (kvm->arch.apic_access_page)
goto out;
@@ -1477,7 +1479,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
if (r)
goto out;
- kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
+ kvm->arch.apic_access_page = (caddr_t)kvm_userspace_mem.userspace_addr;
out:
mutex_exit(&kvm->slots_lock);
return r;
@@ -1504,7 +1506,7 @@ vmx_create_vcpu(struct kvm *kvm, struct kvm_vcpu_ioc *arg, unsigned int id)
}
#endif /*NOTNOW*/
- vmx->guest_msrs = kmem_alloc(PAGESIZE, KM_SLEEP);
+ vmx->guest_msrs = kmem_zalloc(PAGESIZE, KM_SLEEP);
if (!vmx->guest_msrs) {
return NULL; /* XXX - need cleanup here */
}
@@ -1576,7 +1578,7 @@ void update_exception_bitmap(struct kvm_vcpu *vcpu)
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
(1u << NM_VECTOR) | (1u << DB_VECTOR);
-#ifdef XXX
+#ifndef XXX
if ((vcpu->guest_debug &
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
@@ -1607,10 +1609,8 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, uint64_t value)
return;
}
-#ifdef XXX
if (!kvm_vcpu_is_bsp(apic->vcpu))
value &= ~MSR_IA32_APICBASE_BSP;
-#endif /*XXX*/
vcpu->arch.apic_base = value;
if (apic_x2apic_mode(apic)) {
@@ -1868,7 +1868,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
/* Set up identity-mapping pagetable for EPT in real mode */
for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
tmp = (i << 22) + (PT_VALID | PT_WRITABLE | PT_USER |
- PT_REF | PT_MOD | PT_PAT_4K);
+ PT_REF | PT_MOD | PT_PAGESIZE);
r = kvm_write_guest_page(kvm, identity_map_pfn,
&tmp, i * sizeof(tmp), sizeof(tmp));
if (r < 0)
@@ -2118,7 +2118,7 @@ int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
*/
if (kvm_vcpu_is_bsp(&vmx->vcpu)) {
vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
-#ifdef XXX
+#ifndef XXX
vmcs_writel(GUEST_CS_BASE, 0x000f0000);
#else
vmcs_writel(GUEST_CS_BASE, 0xffff0000);
@@ -2220,10 +2220,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
vcpu->arch.switch_db_regs = 0;
memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
-#ifdef XXX
vcpu->arch.dr6 = DR6_FIXED_1;
vcpu->arch.dr7 = DR7_FIXED_1;
-#endif /*XXX*/
+
/* return kvm_x86_ops->vcpu_reset(vcpu);*/
return vmx_vcpu_reset(vcpu);
}
@@ -2262,8 +2261,6 @@ extern struct kmem_cache *pte_chain_cache;
extern struct kmem_cache *rmap_desc_cache;
extern struct kmem_cache *mmu_page_header_cache;
-/*XXX the following is called for tdp (two dimensional hardware paging */
-/* we dont support this right now */
static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
int min)
{
@@ -2515,11 +2512,47 @@ unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
return &slot->lpage_info[level - 2][idx].rmap_pde;
}
-void kvm_set_pfn_accessed(pfn_t pfn)
+extern inline unsigned long bad_hva(void);
+
+/*
+ * XXX The following routine is misnamed. Given a gfn
+ * or pfn, the routine returns a virtual address that points
+ * to the same page as the pfn. On linux, you just use
+ * the kernel area mapped 1-to-1 with physical addresses,
+ * or use the user address stored in the memslot array.
+ * Right now on Solaris, all memory is allocated by the
+ * user level (in which case, we can use the memslot array),
+ * or it's allocated by the kernel, in which case we'll walk
+ * the kvm_mmu_page structs looking for a match.
+ * Either way, this routine is expensive (but how often is
+ * it called???).
+ */
+caddr_t pfn_to_page(struct kvm *kvm, pfn_t pfn)
+{
+ unsigned long raddr;
+ struct kvm_mmu_page *sp;
+ /*
+ * XXX This routine takes a page frame number and
+ * returns a virtual address referring to the page.
+ */
+ raddr = gfn_to_hva(kvm, pfn); /* search memslot array */
+ if (raddr == bad_hva()) { /* not in memslots...*/
+ for (sp = list_head(&kvm->arch.active_mmu_pages); sp;
+ sp = list_next(&kvm->arch.active_mmu_pages, sp)) {
+ if ((sp->hpa>>PAGESHIFT) == pfn) {
+ raddr = *sp->spt;
+ break;
+ }
+ }
+ }
+ return((caddr_t)raddr);
+}
+
+void kvm_set_pfn_accessed(struct kvm *kvm, pfn_t pfn)
{
#ifdef XXX
if (!kvm_is_mmio_pfn(pfn))
- mark_page_accessed(pfn_to_page(pfn));
+ mark_page_accessed(pfn_to_page(kvm, pfn));
#endif /*XXX*/
}
@@ -2552,11 +2585,11 @@ static void rmap_desc_remove_entry(unsigned long *rmapp,
mmu_free_rmap_desc(desc);
}
-void kvm_set_pfn_dirty(pfn_t pfn)
+void kvm_set_pfn_dirty(struct kvm *kvm, pfn_t pfn)
{
#ifdef XXX
if (!kvm_is_mmio_pfn(pfn)) {
- struct page *page = pfn_to_page(pfn);
+ struct page *page = pfn_to_page(kvm, pfn);
if (!PageReserved(page))
SetPageDirty(page);
}
@@ -2591,9 +2624,9 @@ void rmap_remove(struct kvm *kvm, uint64_t *spte)
sp = page_header(kvm_va2pa((caddr_t)spte), kvm);
pfn = spte_to_pfn(*spte);
if (*spte & shadow_accessed_mask)
- kvm_set_pfn_accessed(pfn);
+ kvm_set_pfn_accessed(kvm, pfn);
if (is_writable_pte(*spte))
- kvm_set_pfn_dirty(pfn);
+ kvm_set_pfn_dirty(kvm, pfn);
rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
if (!*rmapp) {
cmn_err(CE_WARN, "rmap_remove: %p %lx 0->BUG\n", spte, *spte);
@@ -2719,9 +2752,9 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
}
-void kvm_release_pfn_dirty(pfn_t pfn)
+void kvm_release_pfn_dirty(struct kvm_vcpu *vcpu, pfn_t pfn)
{
- kvm_set_pfn_dirty(pfn);
+ kvm_set_pfn_dirty(vcpu->kvm, pfn);
kvm_release_pfn_clean(pfn);
}
@@ -2868,6 +2901,8 @@ int set_spte(struct kvm_vcpu *vcpu, uint64_t *sptep,
spte |= (uint64_t)pfn << PAGESHIFT;
+ cmn_err(CE_NOTE, "set_spte: spte = %lx\n", spte);
+
if ((pte_access & ACC_WRITE_MASK)
|| (write_fault && !is_write_protection(vcpu) && !user_fault)) {
@@ -2901,7 +2936,9 @@ int set_spte(struct kvm_vcpu *vcpu, uint64_t *sptep,
mark_page_dirty(vcpu->kvm, gfn);
set_pte:
+ cmn_err(CE_CONT, "set_spte: calling __set_spte with sptep = %p, spte = %lx\n", sptep, spte);
__set_spte(sptep, spte);
+ cmn_err(CE_CONT, "set_spte: returning %x\n", ret);
return ret;
}
@@ -2917,8 +2954,11 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, uint64_t *sptep,
int was_rmapped = 0;
int was_writable = is_writable_pte(*sptep);
int rmap_count;
-
+ cmn_err(CE_NOTE, "mmu_set_spte: vcpu = %p sptep = %p, level = %x, gfn = %lx\n",
+ vcpu, sptep, level, gfn);
+ cmn_err(CE_CONT, "mmu_set_spte: pfn = %lx, *sptep = %lx\n", pfn, *sptep);
if (is_rmap_spte(*sptep)) {
+ cmn_err(CE_CONT, "mmu_set_spte: is_rmap_spte is true\n");
/*
* If we overwrite a PTE page pointer with a 2MB PMD, unlink
* the parent of the now unreachable PTE.
@@ -2929,16 +2969,21 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, uint64_t *sptep,
uint64_t pte = *sptep;
child = page_header(pte & PT64_BASE_ADDR_MASK, vcpu->kvm);
+ cmn_err(CE_CONT, "mmu_set_spte: child = %p, pte %lx, removing parent\n", child, pte);
mmu_page_remove_parent_pte(child, sptep);
} else if (pfn != spte_to_pfn(*sptep)) {
+ cmn_err(CE_CONT, "mmu_set_spte: removing rmap for pfn = %lx, spte_to_pfn = %lx\n",
+ pfn, spte_to_pfn(*sptep));
rmap_remove(vcpu->kvm, sptep);
} else
was_rmapped = 1;
}
+ cmn_err(CE_CONT, "mmu_set_spte: calling set_spte...\n");
if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
dirty, level, gfn, pfn, speculative, 1,
reset_host_protection)) {
+ cmn_err(CE_CONT, "mmu_set_spte: set_spte returned non-null\n");
if (write_fault)
*ptwrite = 1;
kvm_x86_ops->tlb_flush(vcpu);
@@ -2949,17 +2994,22 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, uint64_t *sptep,
++vcpu->kvm->stat.lpages;
#endif /*XXX*/
+ cmn_err(CE_CONT, "mmu_set_spte: calling page_header_update_slot, kvm = %p, sptep = %p, gfn = %lx\n",
+ vcpu->kvm, sptep, gfn);
page_header_update_slot(vcpu->kvm, sptep, gfn);
if (!was_rmapped) {
rmap_count = rmap_add(vcpu, sptep, gfn);
+ cmn_err(CE_CONT, "mmu_set_spte: added rmap for vcpu = %p, sptep = %p, gfn = %lx, rmap_count = %d\n",
+ vcpu, sptep, gfn, rmap_count);
kvm_release_pfn_clean(pfn);
#ifdef XXX
if (rmap_count > RMAP_RECYCLE_THRESHOLD)
rmap_recycle(vcpu, sptep, gfn);
#endif /*XXX*/
} else {
+ cmn_err(CE_CONT, "mmu_set_spte: releasing pfn = %lx, was_writable = %x\n", pfn, was_writable);
if (was_writable)
- kvm_release_pfn_dirty(pfn);
+ kvm_release_pfn_dirty(vcpu, pfn);
else
kvm_release_pfn_clean(pfn);
}
@@ -3045,6 +3095,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
iterator.level - 1,
1, ACC_ALL, iterator.sptep);
if (!sp) {
+ cmn_err(CE_WARN, "nonpaging_map: ENOMEM\n");
kvm_release_pfn_clean(pfn);
return -ENOMEM;
}
@@ -3088,7 +3139,6 @@ inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
uint32_t error_code)
{
-#ifdef XXX
pfn_t pfn;
int r;
int level;
@@ -3106,8 +3156,10 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
+#ifdef XXX
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
+#endif /*XXX*/
pfn = gfn_to_pfn(vcpu->kvm, gfn);
if (is_error_pfn(pfn)) {
@@ -3129,7 +3181,6 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
out_unlock:
mutex_exit(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
-#endif /*XXX*/
return 0;
}
@@ -3282,18 +3333,6 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
}
}
-caddr_t pfn_to_page(pfn_t pfn)
-{
- /*
- * XXX This routine takes a page frame number and
- * returns a virtual address referring to the page.
- */
- return (caddr_t)NULL; /* XXX fix me!!! */
-}
-
-
-
-
void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
struct kvm_vcpu *vcpu, uint64_t addr)
{
@@ -3310,8 +3349,10 @@ void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
}
}
-int shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
+int shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator, struct kvm_vcpu *vcpu)
{
+ struct kvm_mmu_page *sp;
+
if (iterator->level < PT_PAGE_TABLE_LEVEL)
return 0;
@@ -3320,7 +3361,25 @@ int shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
return 0;
iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level);
- iterator->sptep = ((uint64_t *)(iterator->shadow_addr)) + iterator->index;
+ cmn_err(CE_NOTE, "iterator->level = %x, iterator->shadow_addr = %lx, iterator->addr = %lx\n",
+ iterator->level, iterator->shadow_addr, iterator->addr);
+ cmn_err(CE_CONT, "iterator->index = %x\n", iterator->index);
+#ifdef XXX
+ iterator->sptep = ((uint64_t *)__va(iterator->shadow_addr)) + iterator->index;
+#else
+ for (sp = list_head(&vcpu->kvm->arch.active_mmu_pages); sp;
+ sp = list_next(&vcpu->kvm->arch.active_mmu_pages, sp)) {
+ if (sp->hpa == iterator->shadow_addr) {
+ iterator->sptep = ((uint64_t *)sp->spt) + iterator->index ;
+ cmn_err(CE_CONT, "sp = %p, spt = %p, sptep = %p\n", sp, sp->spt, iterator->sptep);
+ break;
+ }
+ }
+ if (!sp) {
+ cmn_err(CE_NOTE, "shadow_addr %lx not in mmu_page_list\n", iterator->shadow_addr);
+ return 0;
+ }
+#endif /*XXX*/
return 1;
}
@@ -3425,12 +3484,10 @@ gfn_t pse36_gfn_delta(uint32_t gpte)
return (gpte & PT32_DIR_PSE36_MASK) << shift;
}
-void kvm_get_pfn(pfn_t pfn)
+void kvm_get_pfn(struct kvm_vcpu *vcpu, pfn_t pfn)
{
-#ifdef XXX
if (!kvm_is_mmio_pfn(pfn))
- get_page(pfn_to_page(pfn));
-#endif /*XXX*/
+ get_page(pfn_to_page(vcpu->kvm, pfn));
}
#define PTTYPE 64
@@ -3654,18 +3711,9 @@ int init_kvm_mmu(struct kvm_vcpu *vcpu)
{
vcpu->arch.update_pte.pfn = -1; /* bad_pfn */
-#ifdef XXX
- /*
- * XXX currently, we won't support 2 dimensional paging.
- * So the hardware will not do guest-virtual to guest-physical
- * and guest-physical to host physical. So we'll need to
- * implement "shadow" paging...
- */
-
if (tdp_enabled)
return init_kvm_tdp_mmu(vcpu);
else
-#endif
return init_kvm_softmmu(vcpu);
return 0;
}
@@ -3837,6 +3885,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (!userspace_addr)
return -ENOMEM;
memslot->userspace_addr = (unsigned long) userspace_addr;
+ mem->userspace_addr = (unsigned long) userspace_addr;
+
}
#endif /*DO_MMAP_SOLARIS*/
#endif /*XXX*/
diff --git a/kvm_x86host.h b/kvm_x86host.h
index bbbff31..dab0002 100644
--- a/kvm_x86host.h
+++ b/kvm_x86host.h
@@ -507,7 +507,7 @@ struct kvm_arch {
/*
* Hash table of struct kvm_mmu_page.
*/
- list_t active_mmu_pages;
+ list_t active_mmu_pages; /* list of all kvm_mmu_page */
list_t assigned_dev_head;
struct iommu_domain *iommu_domain;
int iommu_flags;
diff --git a/paging_tmpl.h b/paging_tmpl.h
index b8917ce..7c50fcd 100644
--- a/paging_tmpl.h
+++ b/paging_tmpl.h
@@ -303,7 +303,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
return;
#endif
- kvm_get_pfn(pfn);
+ kvm_get_pfn(vcpu, pfn);
/*
* we call mmu_set_spte() with reset_host_protection = 1 beacuse that
* vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).