summaryrefslogtreecommitdiff
path: root/linux/x86/kvm_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'linux/x86/kvm_main.c')
-rw-r--r--linux/x86/kvm_main.c607
1 files changed, 150 insertions, 457 deletions
diff --git a/linux/x86/kvm_main.c b/linux/x86/kvm_main.c
index 355948e..19f1924 100644
--- a/linux/x86/kvm_main.c
+++ b/linux/x86/kvm_main.c
@@ -45,7 +45,6 @@
* machines without emulation or binary translation.
*
* Copyright (C) 2006 Qumranet, Inc.
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
*
* Authors:
* Avi Kivity <avi@qumranet.com>
@@ -95,12 +94,11 @@
#include <asm-generic/bitops/le.h>
#include "coalesced_mmio.h"
-#include "async_pf.h"
#define CREATE_TRACE_POINTS
#include <trace/events/kvm.h>
-MODULE_INFO(version, "kvm-kmod-2.6.38-rc7");
+MODULE_INFO(version, "kvm-kmod-2.6.34");
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
@@ -131,40 +129,15 @@ static void hardware_disable_all(void);
static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
-bool kvm_rebooting;
-EXPORT_SYMBOL_GPL(kvm_rebooting);
+static bool kvm_rebooting;
static bool largepages_enabled = true;
-static struct page *hwpoison_page;
-static pfn_t hwpoison_pfn;
-
-static struct page *fault_page;
-static pfn_t fault_pfn;
-
inline int kvm_is_mmio_pfn(pfn_t pfn)
{
if (pfn_valid(pfn)) {
- int reserved;
- struct page *tail = pfn_to_page(pfn);
- struct page *head = compound_trans_head(tail);
- reserved = PageReserved(head);
- if (head != tail) {
- /*
- * "head" is not a dangling pointer
- * (compound_trans_head takes care of that)
- * but the hugepage may have been splitted
- * from under us (and we may not hold a
- * reference count on the head page so it can
- * be reused before we run PageReferenced), so
- * we've to check PageTail before returning
- * what we just read.
- */
- smp_rmb();
- if (PageTail(tail))
- return reserved;
- }
- return PageReserved(tail);
+ struct page *page = compound_head(pfn_to_page(pfn));
+ return PageReserved(page);
}
return true;
@@ -210,7 +183,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
raw_spin_lock(&kvm->requests_lock);
me = smp_processor_id();
kvm_for_each_vcpu(i, vcpu, kvm) {
- if (kvm_make_check_request(req, vcpu))
+ if (test_and_set_bit(req, &vcpu->requests))
continue;
cpu = vcpu->cpu;
if (cpus != NULL && cpu != -1 && cpu != me)
@@ -229,12 +202,8 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
- int dirty_count = kvm->tlbs_dirty;
-
- smp_mb();
if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
++kvm->stat.remote_tlb_flush;
- cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
}
void kvm_reload_remote_mmus(struct kvm *kvm)
@@ -252,7 +221,6 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
vcpu->kvm = kvm;
vcpu->vcpu_id = id;
init_waitqueue_head(&vcpu->wq);
- kvm_async_pf_vcpu_init(vcpu);
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page) {
@@ -311,12 +279,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
* pte after kvm_unmap_hva returned, without noticing the page
* is going to be freed.
*/
- idx = srcu_read_lock(&kvm->srcu);
+ idx = kvm_srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
kvm->mmu_notifier_seq++;
- need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty;
+ need_tlb_flush = kvm_unmap_hva(kvm, address);
spin_unlock(&kvm->mmu_lock);
- srcu_read_unlock(&kvm->srcu, idx);
+ kvm_srcu_read_unlock(&kvm->srcu, idx);
/* we've to flush the tlb before the pages can be freed */
if (need_tlb_flush)
@@ -335,12 +303,12 @@ void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int idx;
- idx = srcu_read_lock(&kvm->srcu);
+ idx = kvm_srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
kvm->mmu_notifier_seq++;
kvm_set_spte_hva(kvm, address, pte);
spin_unlock(&kvm->mmu_lock);
- srcu_read_unlock(&kvm->srcu, idx);
+ kvm_srcu_read_unlock(&kvm->srcu, idx);
}
static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
@@ -351,7 +319,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int need_tlb_flush = 0, idx;
- idx = srcu_read_lock(&kvm->srcu);
+ idx = kvm_srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
/*
* The count increase must become visible at unlock time as no
@@ -361,9 +329,8 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
kvm->mmu_notifier_count++;
for (; start < end; start += PAGE_SIZE)
need_tlb_flush |= kvm_unmap_hva(kvm, start);
- need_tlb_flush |= kvm->tlbs_dirty;
spin_unlock(&kvm->mmu_lock);
- srcu_read_unlock(&kvm->srcu, idx);
+ kvm_srcu_read_unlock(&kvm->srcu, idx);
/* we've to flush the tlb before the pages can be freed */
if (need_tlb_flush)
@@ -403,11 +370,11 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int young, idx;
- idx = srcu_read_lock(&kvm->srcu);
+ idx = kvm_srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
young = kvm_age_hva(kvm, address);
spin_unlock(&kvm->mmu_lock);
- srcu_read_unlock(&kvm->srcu, idx);
+ kvm_srcu_read_unlock(&kvm->srcu, idx);
if (young)
kvm_flush_remote_tlbs(kvm);
@@ -415,33 +382,15 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
return young;
}
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,38)
-static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long address)
-{
- struct kvm *kvm = mmu_notifier_to_kvm(mn);
- int young, idx;
-
- idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
- young = kvm_test_age_hva(kvm, address);
- spin_unlock(&kvm->mmu_lock);
- srcu_read_unlock(&kvm->srcu, idx);
-
- return young;
-}
-#endif
-
static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
struct mm_struct *mm)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int idx;
- idx = srcu_read_lock(&kvm->srcu);
+ idx = kvm_srcu_read_lock(&kvm->srcu);
kvm_arch_flush_shadow(kvm);
- srcu_read_unlock(&kvm->srcu, idx);
+ kvm_srcu_read_unlock(&kvm->srcu, idx);
}
static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
@@ -449,9 +398,6 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,38)
- .test_young = kvm_mmu_notifier_test_young,
-#endif
#ifdef MMU_NOTIFIER_HAS_CHANGE_PTE
.change_pte = kvm_mmu_notifier_change_pte,
#endif
@@ -475,15 +421,11 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
static struct kvm *kvm_create_vm(void)
{
- int r, i;
- struct kvm *kvm = kvm_arch_alloc_vm();
-
- if (!kvm)
- return ERR_PTR(-ENOMEM);
+ int r = 0, i;
+ struct kvm *kvm = kvm_arch_create_vm();
- r = kvm_arch_init_vm(kvm);
- if (r)
- goto out_err_nodisable;
+ if (IS_ERR(kvm))
+ goto out;
r = hardware_enable_all();
if (r)
@@ -497,19 +439,23 @@ static struct kvm *kvm_create_vm(void)
r = -ENOMEM;
kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
if (!kvm->memslots)
- goto out_err_nosrcu;
- if (init_srcu_struct(&kvm->srcu))
- goto out_err_nosrcu;
+ goto out_err;
+ if (kvm_init_srcu_struct(&kvm->srcu))
+ goto out_err;
for (i = 0; i < KVM_NR_BUSES; i++) {
kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
GFP_KERNEL);
- if (!kvm->buses[i])
+ if (!kvm->buses[i]) {
+ kvm_cleanup_srcu_struct(&kvm->srcu);
goto out_err;
+ }
}
r = kvm_init_mmu_notifier(kvm);
- if (r)
+ if (r) {
+ kvm_cleanup_srcu_struct(&kvm->srcu);
goto out_err;
+ }
kvm->mm = current->mm;
mmget(&kvm->mm->mm_count);
@@ -523,35 +469,22 @@ static struct kvm *kvm_create_vm(void)
spin_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
spin_unlock(&kvm_lock);
-
+#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+ kvm_coalesced_mmio_init(kvm);
+#endif
+out:
return kvm;
out_err:
- cleanup_srcu_struct(&kvm->srcu);
-out_err_nosrcu:
hardware_disable_all();
out_err_nodisable:
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm->buses[i]);
kfree(kvm->memslots);
- kvm_arch_free_vm(kvm);
+ kfree(kvm);
return ERR_PTR(r);
}
-static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
-{
- if (!memslot->dirty_bitmap)
- return;
-
- if (2 * kvm_dirty_bitmap_bytes(memslot) > PAGE_SIZE)
- vfree(memslot->dirty_bitmap_head);
- else
- kfree(memslot->dirty_bitmap_head);
-
- memslot->dirty_bitmap = NULL;
- memslot->dirty_bitmap_head = NULL;
-}
-
/*
* Free any memory in @free but not in @dont.
*/
@@ -564,7 +497,7 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
vfree(free->rmap);
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
- kvm_destroy_dirty_bitmap(free);
+ vfree(free->dirty_bitmap);
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
@@ -575,6 +508,7 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
}
free->npages = 0;
+ free->dirty_bitmap = NULL;
free->rmap = NULL;
}
@@ -608,9 +542,6 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_arch_flush_shadow(kvm);
#endif
kvm_arch_destroy_vm(kvm);
- kvm_free_physmem(kvm);
- cleanup_srcu_struct(&kvm->srcu);
- kvm_arch_free_vm(kvm);
hardware_disable_all();
mmdrop(mm);
}
@@ -640,27 +571,6 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
}
/*
- * Allocation size is twice as large as the actual dirty bitmap size.
- * This makes it possible to do double buffering: see x86's
- * kvm_vm_ioctl_get_dirty_log().
- */
-static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
-{
- unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
-
- if (dirty_bytes > PAGE_SIZE)
- memslot->dirty_bitmap = vzalloc(dirty_bytes);
- else
- memslot->dirty_bitmap = kzalloc(dirty_bytes, GFP_KERNEL);
-
- if (!memslot->dirty_bitmap)
- return -ENOMEM;
-
- memslot->dirty_bitmap_head = memslot->dirty_bitmap;
- return 0;
-}
-
-/*
* Allocate some memory and give it an address in the guest physical address
* space.
*
@@ -697,16 +607,11 @@ int __kvm_set_memory_region(struct kvm *kvm,
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
npages = mem->memory_size >> PAGE_SHIFT;
- r = -EINVAL;
- if (npages > KVM_MEM_MAX_NR_PAGES)
- goto out;
-
if (!npages)
mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
new = old = *memslot;
- new.id = mem->slot;
new.base_gfn = base_gfn;
new.npages = npages;
new.flags = mem->flags;
@@ -737,11 +642,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
/* Allocate if a slot is being created */
#ifndef CONFIG_S390
if (npages && !new.rmap) {
- new.rmap = vzalloc(npages * sizeof(*new.rmap));
+ new.rmap = vmalloc(npages * sizeof(struct page *));
if (!new.rmap)
goto out_free;
+ memset(new.rmap, 0, npages * sizeof(*new.rmap));
+
new.user_alloc = user_alloc;
new.userspace_addr = mem->userspace_addr;
}
@@ -760,18 +667,21 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (new.lpage_info[i])
continue;
- lpages = 1 + ((base_gfn + npages - 1)
- >> KVM_HPAGE_GFN_SHIFT(level));
- lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level);
+ lpages = 1 + (base_gfn + npages - 1) /
+ KVM_PAGES_PER_HPAGE(level);
+ lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level);
- new.lpage_info[i] = vzalloc(lpages * sizeof(*new.lpage_info[i]));
+ new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i]));
if (!new.lpage_info[i])
goto out_free;
- if (base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
+ memset(new.lpage_info[i], 0,
+ lpages * sizeof(*new.lpage_info[i]));
+
+ if (base_gfn % KVM_PAGES_PER_HPAGE(level))
new.lpage_info[i][0].write_count = 1;
- if ((base_gfn+npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
+ if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level))
new.lpage_info[i][lpages - 1].write_count = 1;
ugfn = new.userspace_addr >> PAGE_SHIFT;
/*
@@ -789,8 +699,12 @@ skip_lpage:
/* Allocate page dirty bitmap if needed */
if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
- if (kvm_create_dirty_bitmap(&new) < 0)
+ unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new);
+
+ new.dirty_bitmap = vmalloc(dirty_bytes);
+ if (!new.dirty_bitmap)
goto out_free;
+ memset(new.dirty_bitmap, 0, dirty_bytes);
/* destroy any largepage mappings for dirty tracking */
if (old.npages)
flush_shadow = 1;
@@ -809,7 +723,6 @@ skip_lpage:
memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
if (mem->slot >= slots->nmemslots)
slots->nmemslots = mem->slot + 1;
- slots->generation++;
slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
old_memslots = kvm->memslots;
@@ -830,12 +743,14 @@ skip_lpage:
if (r)
goto out_free;
+#ifdef CONFIG_DMAR
/* map the pages in iommu page table */
if (npages) {
r = kvm_iommu_map_pages(kvm, &new);
if (r)
goto out_free;
}
+#endif
r = -ENOMEM;
slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
@@ -844,7 +759,6 @@ skip_lpage:
memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
if (mem->slot >= slots->nmemslots)
slots->nmemslots = mem->slot + 1;
- slots->generation++;
/* actual memory is freed via old in kvm_free_physmem_slot below */
if (!npages) {
@@ -942,28 +856,16 @@ EXPORT_SYMBOL_GPL(kvm_disable_largepages);
int is_error_page(struct page *page)
{
- return page == bad_page || page == hwpoison_page || page == fault_page;
+ return page == bad_page;
}
EXPORT_SYMBOL_GPL(is_error_page);
int is_error_pfn(pfn_t pfn)
{
- return pfn == bad_pfn || pfn == hwpoison_pfn || pfn == fault_pfn;
+ return pfn == bad_pfn;
}
EXPORT_SYMBOL_GPL(is_error_pfn);
-int is_hwpoison_pfn(pfn_t pfn)
-{
- return pfn == hwpoison_pfn;
-}
-EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
-
-int is_fault_pfn(pfn_t pfn)
-{
- return pfn == fault_pfn;
-}
-EXPORT_SYMBOL_GPL(is_fault_pfn);
-
static inline unsigned long bad_hva(void)
{
return PAGE_OFFSET;
@@ -975,10 +877,10 @@ int kvm_is_error_hva(unsigned long addr)
}
EXPORT_SYMBOL_GPL(kvm_is_error_hva);
-static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots,
- gfn_t gfn)
+struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
{
int i;
+ struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
for (i = 0; i < slots->nmemslots; ++i) {
struct kvm_memory_slot *memslot = &slots->memslots[i];
@@ -989,18 +891,20 @@ static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots,
}
return NULL;
}
+EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
{
- return __gfn_to_memslot(kvm_memslots(kvm), gfn);
+ gfn = unalias_gfn(kvm, gfn);
+ return gfn_to_memslot_unaliased(kvm, gfn);
}
-EXPORT_SYMBOL_GPL(gfn_to_memslot);
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
{
int i;
- struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
+ gfn = unalias_gfn_instantiation(kvm, gfn);
for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
struct kvm_memory_slot *memslot = &slots->memslots[i];
@@ -1042,9 +946,10 @@ out:
int memslot_id(struct kvm *kvm, gfn_t gfn)
{
int i;
- struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
struct kvm_memory_slot *memslot = NULL;
+ gfn = unalias_gfn(kvm, gfn);
for (i = 0; i < slots->nmemslots; ++i) {
memslot = &slots->memslots[i];
@@ -1056,179 +961,76 @@ int memslot_id(struct kvm *kvm, gfn_t gfn)
return memslot - slots->memslots;
}
-static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
- gfn_t *nr_pages)
+unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
{
+ struct kvm_memory_slot *slot;
+
+ gfn = unalias_gfn_instantiation(kvm, gfn);
+ slot = gfn_to_memslot_unaliased(kvm, gfn);
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
return bad_hva();
-
- if (nr_pages)
- *nr_pages = slot->npages - (gfn - slot->base_gfn);
-
- return gfn_to_hva_memslot(slot, gfn);
-}
-
-unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
-{
- return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
+ return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
}
EXPORT_SYMBOL_GPL(gfn_to_hva);
-static pfn_t get_fault_pfn(void)
-{
- get_page(fault_page);
- return fault_pfn;
-}
-
-static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
- bool *async, bool write_fault, bool *writable)
+static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
{
struct page *page[1];
- int npages = 0;
+ int npages;
pfn_t pfn;
- /* we can do it either atomically or asynchronously, not both */
- BUG_ON(atomic && async);
-
- BUG_ON(!write_fault && !writable);
-
- if (writable)
- *writable = true;
+ might_sleep();
- if (atomic || async)
- npages = kvm___get_user_pages_fast(addr, 1, 1, page);
-
- if (unlikely(npages != 1) && !atomic) {
- might_sleep();
-
- if (writable)
- *writable = write_fault;
-
- npages = get_user_pages_fast(addr, 1, write_fault, page);
-
- /* map read fault as writable if possible */
- if (unlikely(!write_fault) && npages == 1) {
- struct page *wpage[1];
-
- npages = kvm___get_user_pages_fast(addr, 1, 1, wpage);
- if (npages == 1) {
- *writable = true;
- put_page(page[0]);
- page[0] = wpage[0];
- }
- npages = 1;
- }
- }
+ npages = get_user_pages_fast(addr, 1, 1, page);
if (unlikely(npages != 1)) {
struct vm_area_struct *vma;
- if (atomic)
- return get_fault_pfn();
-
down_read(&current->mm->mmap_sem);
- if (is_hwpoison_address(addr)) {
+ vma = find_vma(current->mm, addr);
+
+ if (vma == NULL || addr < vma->vm_start ||
+ !(vma->vm_flags & VM_PFNMAP)) {
up_read(&current->mm->mmap_sem);
- get_page(hwpoison_page);
- return page_to_pfn(hwpoison_page);
+ get_page(bad_page);
+ return page_to_pfn(bad_page);
}
- vma = find_vma_intersection(current->mm, addr, addr+1);
-
- if (vma == NULL)
- pfn = get_fault_pfn();
- else if ((vma->vm_flags & VM_PFNMAP)) {
- pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
- vma->vm_pgoff;
- BUG_ON(!kvm_is_mmio_pfn(pfn));
- } else {
- if (async && (vma->vm_flags & VM_WRITE))
- *async = true;
- pfn = get_fault_pfn();
- }
+ pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
up_read(&current->mm->mmap_sem);
+ BUG_ON(!kvm_is_mmio_pfn(pfn));
} else
pfn = page_to_pfn(page[0]);
return pfn;
}
-pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
-{
- return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
-}
-EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
-
-static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
- bool write_fault, bool *writable)
+pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
{
unsigned long addr;
- if (async)
- *async = false;
-
addr = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(addr)) {
get_page(bad_page);
return page_to_pfn(bad_page);
}
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
- async = NULL;
-#endif
- return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
-}
-
-pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
-{
- return __gfn_to_pfn(kvm, gfn, true, NULL, true, NULL);
-}
-EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
-
-pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
- bool write_fault, bool *writable)
-{
- return __gfn_to_pfn(kvm, gfn, false, async, write_fault, writable);
-}
-EXPORT_SYMBOL_GPL(gfn_to_pfn_async);
-
-pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
-{
- return __gfn_to_pfn(kvm, gfn, false, NULL, true, NULL);
+ return hva_to_pfn(kvm, addr);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn);
-pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
- bool *writable)
+static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
{
- return __gfn_to_pfn(kvm, gfn, false, NULL, write_fault, writable);
+ return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
}
-EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn)
{
unsigned long addr = gfn_to_hva_memslot(slot, gfn);
- return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
+ return hva_to_pfn(kvm, addr);
}
-int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
- int nr_pages)
-{
- unsigned long addr;
- gfn_t entry;
-
- addr = gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, &entry);
- if (kvm_is_error_hva(addr))
- return -1;
-
- if (entry < nr_pages)
- return 0;
-
- return kvm___get_user_pages_fast(addr, nr_pages, 1, pages);
-}
-EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
-
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
{
pfn_t pfn;
@@ -1402,51 +1204,9 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
return 0;
}
-int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- gpa_t gpa)
-{
- struct kvm_memslots *slots = kvm_memslots(kvm);
- int offset = offset_in_page(gpa);
- gfn_t gfn = gpa >> PAGE_SHIFT;
-
- ghc->gpa = gpa;
- ghc->generation = slots->generation;
- ghc->memslot = __gfn_to_memslot(slots, gfn);
- ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL);
- if (!kvm_is_error_hva(ghc->hva))
- ghc->hva += offset;
- else
- return -EFAULT;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init);
-
-int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
- void *data, unsigned long len)
-{
- struct kvm_memslots *slots = kvm_memslots(kvm);
- int r;
-
- if (slots->generation != ghc->generation)
- kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa);
-
- if (kvm_is_error_hva(ghc->hva))
- return -EFAULT;
-
- r = copy_to_user((void *)ghc->hva, data, len);
- if (r)
- return -EFAULT;
- mark_page_dirty_in_slot(kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(kvm_write_guest_cached);
-
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
{
- return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page,
- offset, len);
+ return kvm_write_guest_page(kvm, gfn, empty_zero_page, offset, len);
}
EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
@@ -1469,24 +1229,24 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
}
EXPORT_SYMBOL_GPL(kvm_clear_guest);
-void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
- gfn_t gfn)
+void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
{
+ struct kvm_memory_slot *memslot;
+
+ gfn = unalias_gfn(kvm, gfn);
+ memslot = gfn_to_memslot_unaliased(kvm, gfn);
if (memslot && memslot->dirty_bitmap) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
+ unsigned long *p = memslot->dirty_bitmap +
+ rel_gfn / BITS_PER_LONG;
+ int offset = rel_gfn % BITS_PER_LONG;
- generic___set_le_bit(rel_gfn, memslot->dirty_bitmap);
+ /* avoid RMW */
+ if (!generic_test_le_bit(offset, p))
+ generic___set_le_bit(offset, p);
}
}
-void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
-{
- struct kvm_memory_slot *memslot;
-
- memslot = gfn_to_memslot(kvm, gfn);
- mark_page_dirty_in_slot(kvm, memslot, gfn);
-}
-
/*
* The vCPU has executed a HLT instruction with in-kernel mode enabled.
*/
@@ -1498,7 +1258,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
if (kvm_arch_vcpu_runnable(vcpu)) {
- kvm_make_request(KVM_REQ_UNHALT, vcpu);
+ set_bit(KVM_REQ_UNHALT, &vcpu->requests);
break;
}
if (kvm_cpu_has_pending_timer(vcpu))
@@ -1558,7 +1318,7 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}
static struct vm_operations_struct kvm_vcpu_vm_ops = {
- .fault = kvm_vcpu_fault,
+ .VMA_OPS_FAULT(fault) = VMA_OPS_FAULT_FUNC(kvm_vcpu_fault),
};
static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
@@ -1580,9 +1340,6 @@ static struct file_operations kvm_vcpu_fops = {
.unlocked_ioctl = kvm_vcpu_ioctl,
.compat_ioctl = kvm_vcpu_ioctl,
.mmap = kvm_vcpu_mmap,
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
- .llseek = noop_llseek,
-#endif
};
/*
@@ -1590,7 +1347,7 @@ static struct file_operations kvm_vcpu_fops = {
*/
static int create_vcpu_fd(struct kvm_vcpu *vcpu)
{
- return kvm_anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
+ return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
}
/*
@@ -1672,25 +1429,12 @@ static long kvm_vcpu_ioctl(struct file *filp,
if (vcpu->kvm->mm != current->mm)
return -EIO;
-
-#if defined(CONFIG_S390) || defined(CONFIG_PPC)
- /*
- * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
- * so vcpu_load() would break it.
- */
- if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT)
- return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
-#endif
-
-
- vcpu_load(vcpu);
switch (ioctl) {
case KVM_RUN:
r = -EINVAL;
if (arg)
goto out;
r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
- trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
break;
case KVM_GET_REGS: {
struct kvm_regs *kvm_regs;
@@ -1827,7 +1571,7 @@ out_free2:
goto out;
p = &sigset;
}
- r = kvm_vcpu_ioctl_set_sigmask(vcpu, p);
+ r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
break;
}
case KVM_GET_FPU: {
@@ -1862,7 +1606,6 @@ out_free2:
r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
}
out:
- vcpu_put(vcpu);
kfree(fpu);
kfree(kvm_sregs);
return r;
@@ -1913,6 +1656,7 @@ static long kvm_vm_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&zone, argp, sizeof zone))
goto out;
+ r = -ENXIO;
r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
if (r)
goto out;
@@ -1924,6 +1668,7 @@ static long kvm_vm_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&zone, argp, sizeof zone))
goto out;
+ r = -ENXIO;
r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
if (r)
goto out;
@@ -2041,7 +1786,7 @@ static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}
static struct vm_operations_struct kvm_vm_vm_ops = {
- .fault = kvm_vm_fault,
+ .VMA_OPS_FAULT(fault) = VMA_OPS_FAULT_FUNC(kvm_vm_fault),
};
static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
@@ -2057,31 +1802,21 @@ static struct file_operations kvm_vm_fops = {
.compat_ioctl = kvm_vm_compat_ioctl,
#endif
.mmap = kvm_vm_mmap,
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
- .llseek = noop_llseek,
-#endif
};
static int kvm_dev_ioctl_create_vm(void)
{
- int r;
+ int fd;
struct kvm *kvm;
kvm = kvm_create_vm();
if (IS_ERR(kvm))
return PTR_ERR(kvm);
-#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
- r = kvm_coalesced_mmio_init(kvm);
- if (r < 0) {
- kvm_put_kvm(kvm);
- return r;
- }
-#endif
- r = kvm_anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
- if (r < 0)
+ fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
+ if (fd < 0)
kvm_put_kvm(kvm);
- return r;
+ return fd;
}
static long kvm_dev_ioctl_check_extension_generic(long arg)
@@ -2153,9 +1888,6 @@ out:
static struct file_operations kvm_chardev_ops = {
.unlocked_ioctl = kvm_dev_ioctl,
.compat_ioctl = kvm_dev_ioctl,
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
- .llseek = noop_llseek,
-#endif
};
static struct miscdevice kvm_dev = {
@@ -2164,7 +1896,7 @@ static struct miscdevice kvm_dev = {
&kvm_chardev_ops,
};
-static void hardware_enable_nolock(void *junk)
+static void hardware_enable(void *junk)
{
int cpu = raw_smp_processor_id();
int r;
@@ -2184,14 +1916,7 @@ static void hardware_enable_nolock(void *junk)
}
}
-static void hardware_enable(void *junk)
-{
- spin_lock(&kvm_lock);
- hardware_enable_nolock(junk);
- spin_unlock(&kvm_lock);
-}
-
-static void hardware_disable_nolock(void *junk)
+static void hardware_disable(void *junk)
{
int cpu = raw_smp_processor_id();
@@ -2201,20 +1926,13 @@ static void hardware_disable_nolock(void *junk)
kvm_arch_hardware_disable(NULL);
}
-static void hardware_disable(void *junk)
-{
- spin_lock(&kvm_lock);
- hardware_disable_nolock(junk);
- spin_unlock(&kvm_lock);
-}
-
static void hardware_disable_all_nolock(void)
{
BUG_ON(!kvm_usage_count);
kvm_usage_count--;
if (!kvm_usage_count)
- kvm_on_each_cpu(hardware_disable_nolock, NULL, 1);
+ kvm_on_each_cpu(hardware_disable, NULL, 1);
}
static void hardware_disable_all(void)
@@ -2233,7 +1951,7 @@ static int hardware_enable_all(void)
kvm_usage_count++;
if (kvm_usage_count == 1) {
atomic_set(&hardware_enable_failed, 0);
- kvm_on_each_cpu(hardware_enable_nolock, NULL, 1);
+ kvm_on_each_cpu(hardware_enable, NULL, 1);
if (atomic_read(&hardware_enable_failed)) {
hardware_disable_all_nolock();
@@ -2261,30 +1979,31 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
cpu);
hardware_disable(NULL);
break;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28)
- case CPU_STARTING:
-#else
+ case CPU_UP_CANCELED:
+ printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
+ cpu);
+ smp_call_function_single(cpu, hardware_disable, NULL, 1);
+ break;
case CPU_ONLINE:
-#endif
printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
cpu);
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28)
- hardware_enable(NULL);
-#else
smp_call_function_single(cpu, hardware_enable, NULL, 1);
-#endif
break;
}
return NOTIFY_OK;
}
-asmlinkage void kvm_spurious_fault(void)
+asmlinkage void kvm_handle_fault_on_reboot(void)
{
+ if (kvm_rebooting)
+ /* spin while reset goes on */
+ while (true)
+ ;
/* Fault while not rebooting. We want the trace. */
BUG();
}
-EXPORT_SYMBOL_GPL(kvm_spurious_fault);
+EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot);
static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
void *v)
@@ -2297,7 +2016,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
*/
printk(KERN_INFO "kvm: exiting hardware virtualization\n");
kvm_rebooting = true;
- kvm_on_each_cpu(hardware_disable_nolock, NULL, 1);
+ kvm_on_each_cpu(hardware_disable, NULL, 1);
return NOTIFY_OK;
}
@@ -2323,9 +2042,7 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val)
{
int i;
- struct kvm_io_bus *bus;
-
- bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+ struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
for (i = 0; i < bus->dev_count; i++)
if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
return 0;
@@ -2337,9 +2054,8 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, void *val)
{
int i;
- struct kvm_io_bus *bus;
+ struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
- bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
for (i = 0; i < bus->dev_count; i++)
if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
return 0;
@@ -2403,6 +2119,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
static struct notifier_block kvm_cpu_notifier = {
.notifier_call = kvm_cpu_hotplug,
+ .priority = 20, /* must be > scheduler priority */
};
static int __vm_stat_get(void *_offset, u64 *val)
@@ -2469,16 +2186,14 @@ static void kvm_exit_debug(void)
static int kvm_suspend(struct sys_device *dev, pm_message_t state)
{
if (kvm_usage_count)
- hardware_disable_nolock(NULL);
+ hardware_disable(NULL);
return 0;
}
static int kvm_resume(struct sys_device *dev)
{
- if (kvm_usage_count) {
- WARN_ON(spin_is_locked(&kvm_lock));
- hardware_enable_nolock(NULL);
- }
+ if (kvm_usage_count)
+ hardware_enable(NULL);
return 0;
}
@@ -2518,17 +2233,22 @@ static void kvm_sched_out(struct preempt_notifier *pn,
kvm_fire_urn();
}
-int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
+int kvm_init(void *opaque, unsigned int vcpu_size,
struct module *module)
{
int r;
int cpu;
- r = kvm_init_srcu();
+ r = kvm_init_anon_inodes();
if (r)
return r;
+ r = kvm_init_srcu();
+ if (r)
+ goto cleanup_anon_inodes;
+
preempt_notifier_sys_init();
+ hrtimer_kallsyms_resolve();
r = kvm_arch_init(opaque);
if (r)
@@ -2543,24 +2263,6 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
bad_pfn = page_to_pfn(bad_page);
- hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-
- if (hwpoison_page == NULL) {
- r = -ENOMEM;
- goto out_free_0;
- }
-
- hwpoison_pfn = page_to_pfn(hwpoison_page);
-
- fault_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-
- if (fault_page == NULL) {
- r = -ENOMEM;
- goto out_free_0;
- }
-
- fault_pfn = page_to_pfn(fault_page);
-
if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
r = -ENOMEM;
goto out_free_0;
@@ -2592,19 +2294,14 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
goto out_free_4;
/* A kmem cache lets us meet the alignment requirements of fx_save. */
- if (!vcpu_align)
- vcpu_align = __alignof__(struct kvm_vcpu);
- kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align,
+ kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size,
+ __alignof__(struct kvm_vcpu),
0, NULL);
if (!kvm_vcpu_cache) {
r = -ENOMEM;
goto out_free_5;
}
- r = kvm_async_pf_init();
- if (r)
- goto out_free;
-
kvm_chardev_ops.owner = module;
IF_ANON_INODES_DOES_REFCOUNTS( kvm_vm_fops.owner = module;)
IF_ANON_INODES_DOES_REFCOUNTS( kvm_vcpu_fops.owner = module;)
@@ -2612,7 +2309,7 @@ IF_ANON_INODES_DOES_REFCOUNTS( kvm_vcpu_fops.owner = module;)
r = misc_register(&kvm_dev);
if (r) {
printk(KERN_ERR "kvm: misc device register failed\n");
- goto out_unreg;
+ goto out_free;
}
kvm_preempt_ops.sched_in = kvm_sched_in;
@@ -2620,14 +2317,12 @@ IF_ANON_INODES_DOES_REFCOUNTS( kvm_vcpu_fops.owner = module;)
kvm_init_debug();
- printk("loaded kvm module (kvm-kmod-2.6.38-rc7)\n");
+ printk("loaded kvm module (kvm-kmod-2.6.34)\n");
kvm_clock_warn_suspend_bug();
return 0;
-out_unreg:
- kvm_async_pf_deinit();
out_free:
kmem_cache_destroy(kvm_vcpu_cache);
out_free_5:
@@ -2643,37 +2338,35 @@ out_free_1:
out_free_0a:
free_cpumask_var(cpus_hardware_enabled);
out_free_0:
- if (fault_page)
- __free_page(fault_page);
- if (hwpoison_page)
- __free_page(hwpoison_page);
__free_page(bad_page);
out:
kvm_arch_exit();
out_fail:
preempt_notifier_sys_exit();
kvm_exit_srcu();
+cleanup_anon_inodes:
+ kvm_exit_anon_inodes();
return r;
}
EXPORT_SYMBOL_GPL(kvm_init);
void kvm_exit(void)
{
+ tracepoint_synchronize_unregister();
kvm_exit_debug();
misc_deregister(&kvm_dev);
kmem_cache_destroy(kvm_vcpu_cache);
- kvm_async_pf_deinit();
sysdev_unregister(&kvm_sysdev);
sysdev_class_unregister(&kvm_sysdev_class);
unregister_reboot_notifier(&kvm_reboot_notifier);
unregister_cpu_notifier(&kvm_cpu_notifier);
- kvm_on_each_cpu(hardware_disable_nolock, NULL, 1);
+ kvm_on_each_cpu(hardware_disable, NULL, 1);
kvm_arch_hardware_unsetup();
kvm_arch_exit();
free_cpumask_var(cpus_hardware_enabled);
- __free_page(hwpoison_page);
__free_page(bad_page);
preempt_notifier_sys_exit();
kvm_exit_srcu();
+ kvm_exit_anon_inodes();
}
EXPORT_SYMBOL_GPL(kvm_exit);