Initial commit of d32e8d0b8d9e0ef7cf7ab2e74548982972789dfc from qemu-kvm

author: Robert Mustacchi <rm@joyent.com> 2011-06-24 13:49:54 -0700
committer: Robert Mustacchi <rm@joyent.com> 2011-06-24 13:49:54 -0700
commit: 68396ea9c0fe4f75ce30b1eba2c44c43c13344bb (patch)
tree: 802587d411d9db461e6500c5b635043315f81c27 /qemu-kvm.c
download: illumos-kvm-cmd-68396ea9c0fe4f75ce30b1eba2c44c43c13344bb.tar.gz
1 files changed, 1784 insertions, 0 deletions
diff --git a/qemu-kvm.c b/qemu-kvm.c
new file mode 100644
index 0000000..49cd683
--- /dev/null
+++ b/qemu-kvm.c
@@ -0,0 +1,1784 @@
+/*
+ * qemu/kvm integration
+ *
+ * Copyright (C) 2006-2008 Qumranet Technologies
+ *
+ * Licensed under the terms of the GNU GPL version 2 or higher.
+ */
+#include "config.h"
+#include "config-host.h"
+
+#include <assert.h>
+#include <string.h>
+#include "hw/hw.h"
+#include "sysemu.h"
+#include "qemu-common.h"
+#include "console.h"
+#include "block.h"
+#include "compatfd.h"
+#include "gdbstub.h"
+#include "monitor.h"
+
+#include "qemu-kvm.h"
+#include "libkvm.h"
+
+#include <pthread.h>
+#include <sys/utsname.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include "compatfd.h"
+#include <sys/prctl.h>
+
+#define false 0
+#define true 1
+
+#ifndef PR_MCE_KILL
+#define PR_MCE_KILL 33
+#endif
+
+#ifndef BUS_MCEERR_AR
+#define BUS_MCEERR_AR 4
+#endif
+#ifndef BUS_MCEERR_AO
+#define BUS_MCEERR_AO 5
+#endif
+
+#define EXPECTED_KVM_API_VERSION 12
+
+#if EXPECTED_KVM_API_VERSION != KVM_API_VERSION
+#error libkvm: userspace and kernel version mismatch
+#endif
+
+int kvm_irqchip = 1;
+int kvm_pit = 1;
+int kvm_pit_reinject = 1;
+int kvm_nested = 0;
+
+
+KVMState *kvm_state;
+kvm_context_t kvm_context;
+
+pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t qemu_vcpu_cond = PTHREAD_COND_INITIALIZER;
+pthread_cond_t qemu_system_cond = PTHREAD_COND_INITIALIZER;
+pthread_cond_t qemu_pause_cond = PTHREAD_COND_INITIALIZER;
+pthread_cond_t qemu_work_cond = PTHREAD_COND_INITIALIZER;
+__thread CPUState *current_env;
+
+static int qemu_system_ready;
+
+#define SIG_IPI (SIGRTMIN+4)
+
+pthread_t io_thread;
+static int io_thread_sigfd = -1;
+
+static CPUState *kvm_debug_cpu_requested;
+
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+/* The list of ioperm_data */
+static QLIST_HEAD(, ioperm_data) ioperm_head;
+#endif
+
+#define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
+
+int kvm_abi = EXPECTED_KVM_API_VERSION;
+int kvm_page_size;
+
+#ifdef KVM_CAP_SET_GUEST_DEBUG
+static int kvm_debug(CPUState *env,
+                     struct kvm_debug_exit_arch *arch_info)
+{
+    int handle = kvm_arch_debug(arch_info);
+
+    if (handle) {
+        kvm_debug_cpu_requested = env;
+        env->stopped = 1;
+    }
+    return handle;
+}
+#endif
+
+static int handle_unhandled(uint64_t reason)
+{
+    fprintf(stderr, "kvm: unhandled exit %" PRIx64 "\n", reason);
+    return -EINVAL;
+}
+
+#define VMX_INVALID_GUEST_STATE 0x80000021
+
+static int handle_failed_vmentry(uint64_t reason)
+{
+    fprintf(stderr, "kvm: vm entry failed with error 0x%" PRIx64 "\n\n", reason);
+
+    /* Perhaps we will need to check if this machine is intel since exit reason 0x21
+       has a different interpretation on SVM */
+    if (reason == VMX_INVALID_GUEST_STATE) {
+        fprintf(stderr, "If you're runnning a guest on an Intel machine without\n");
+        fprintf(stderr, "unrestricted mode support, the failure can be most likely\n");
+        fprintf(stderr, "due to the guest entering an invalid state for Intel VT.\n");
+        fprintf(stderr, "For example, the guest maybe running in big real mode\n");
+        fprintf(stderr, "which is not supported on less recent Intel processors.\n\n");
+    }
+
+    return -EINVAL;
+}
+
+static inline void set_gsi(kvm_context_t kvm, unsigned int gsi)
+{
+    uint32_t *bitmap = kvm->used_gsi_bitmap;
+
+    if (gsi < kvm->max_gsi)
+        bitmap[gsi / 32] |= 1U << (gsi % 32);
+    else
+        DPRINTF("Invalid GSI %u\n", gsi);
+}
+
+static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi)
+{
+    uint32_t *bitmap = kvm->used_gsi_bitmap;
+
+    if (gsi < kvm->max_gsi)
+        bitmap[gsi / 32] &= ~(1U << (gsi % 32));
+    else
+        DPRINTF("Invalid GSI %u\n", gsi);
+}
+
+static int kvm_create_context(void);
+
+int kvm_init(void)
+{
+    int fd;
+    int r, gsi_count;
+
+
+    fd = open("/dev/kvm", O_RDWR);
+    if (fd == -1) {
+        perror("open /dev/kvm");
+        return -1;
+    }
+    r = ioctl(fd, KVM_GET_API_VERSION, 0);
+    if (r == -1) {
+        fprintf(stderr,
+                "kvm kernel version too old: "
+                "KVM_GET_API_VERSION ioctl not supported\n");
+        goto out_close;
+    }
+    if (r < EXPECTED_KVM_API_VERSION) {
+        fprintf(stderr, "kvm kernel version too old: "
+                "We expect API version %d or newer, but got "
+                "version %d\n", EXPECTED_KVM_API_VERSION, r);
+        goto out_close;
+    }
+    if (r > EXPECTED_KVM_API_VERSION) {
+        fprintf(stderr, "kvm userspace version too old\n");
+        goto out_close;
+    }
+    kvm_abi = r;
+    kvm_page_size = getpagesize();
+    kvm_state = qemu_mallocz(sizeof(*kvm_state));
+    kvm_context = &kvm_state->kvm_context;
+
+    kvm_state->fd = fd;
+    kvm_state->vmfd = -1;
+    kvm_context->opaque = cpu_single_env;
+    kvm_context->dirty_pages_log_all = 0;
+    kvm_context->no_irqchip_creation = 0;
+    kvm_context->no_pit_creation = 0;
+
+#ifdef KVM_CAP_SET_GUEST_DEBUG
+    QTAILQ_INIT(&kvm_state->kvm_sw_breakpoints);
+#endif
+
+    gsi_count = kvm_get_gsi_count(kvm_context);
+    if (gsi_count > 0) {
+        int gsi_bits, i;
+
+        /* Round up so we can search ints using ffs */
+        gsi_bits = ALIGN(gsi_count, 32);
+        kvm_context->used_gsi_bitmap = qemu_mallocz(gsi_bits / 8);
+        kvm_context->max_gsi = gsi_bits;
+
+        /* Mark any over-allocated bits as already in use */
+        for (i = gsi_count; i < gsi_bits; i++) {
+            set_gsi(kvm_context, i);
+        }
+    }
+
+    kvm_cpu_register_phys_memory_client();
+
+    pthread_mutex_lock(&qemu_mutex);
+    return kvm_create_context();
+
+  out_close:
+    close(fd);
+    return -1;
+}
+
+static void kvm_finalize(KVMState *s)
+{
+    /* FIXME
+       if (kvm->vcpu_fd[0] != -1)
+           close(kvm->vcpu_fd[0]);
+       if (kvm->vm_fd != -1)
+           close(kvm->vm_fd);
+     */
+    close(s->fd);
+    free(s);
+}
+
+void kvm_disable_irqchip_creation(kvm_context_t kvm)
+{
+    kvm->no_irqchip_creation = 1;
+}
+
+void kvm_disable_pit_creation(kvm_context_t kvm)
+{
+    kvm->no_pit_creation = 1;
+}
+
+static void kvm_reset_vcpu(void *opaque)
+{
+    CPUState *env = opaque;
+
+    kvm_arch_cpu_reset(env);
+}
+
+static void kvm_create_vcpu(CPUState *env, int id)
+{
+    long mmap_size;
+    int r;
+    KVMState *s = kvm_state;
+
+    r = kvm_vm_ioctl(kvm_state, KVM_CREATE_VCPU, id);
+    if (r < 0) {
+        fprintf(stderr, "kvm_create_vcpu: %m\n");
+        fprintf(stderr, "Failed to create vCPU. Check the -smp parameter.\n");
+        goto err;
+    }
+
+    env->kvm_fd = r;
+    env->kvm_state = kvm_state;
+
+    mmap_size = kvm_ioctl(kvm_state, KVM_GET_VCPU_MMAP_SIZE, 0);
+    if (mmap_size < 0) {
+        fprintf(stderr, "get vcpu mmap size: %m\n");
+        goto err_fd;
+    }
+    env->kvm_run =
+        mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, env->kvm_fd,
+             0);
+    if (env->kvm_run == MAP_FAILED) {
+        fprintf(stderr, "mmap vcpu area: %m\n");
+        goto err_fd;
+    }
+
+#ifdef KVM_CAP_COALESCED_MMIO
+    if (s->coalesced_mmio && !s->coalesced_mmio_ring)
+        s->coalesced_mmio_ring = (void *) env->kvm_run +
+               s->coalesced_mmio * PAGE_SIZE;
+#endif
+
+    r = kvm_arch_init_vcpu(env);
+    if (r == 0) {
+        qemu_register_reset(kvm_reset_vcpu, env);
+    }
+
+    return;
+  err_fd:
+    close(env->kvm_fd);
+  err:
+    /* We're no good with semi-broken states. */
+    abort();
+}
+
+static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id)
+{
+#ifdef KVM_CAP_SET_BOOT_CPU_ID
+    int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID);
+    if (r > 0) {
+        return kvm_vm_ioctl(kvm_state, KVM_SET_BOOT_CPU_ID, id);
+    }
+    return -ENOSYS;
+#else
+    return -ENOSYS;
+#endif
+}
+
+int kvm_create_vm(kvm_context_t kvm)
+{
+    int fd;
+#ifdef KVM_CAP_IRQ_ROUTING
+    kvm->irq_routes = qemu_mallocz(sizeof(*kvm->irq_routes));
+    kvm->nr_allocated_irq_routes = 0;
+#endif
+
+    fd = kvm_ioctl(kvm_state, KVM_CREATE_VM, 0);
+    if (fd < 0) {
+        fprintf(stderr, "kvm_create_vm: %m\n");
+        return -1;
+    }
+    kvm_state->vmfd = fd;
+    return 0;
+}
+
+static int kvm_create_default_phys_mem(kvm_context_t kvm,
+                                       unsigned long phys_mem_bytes,
+                                       void **vm_mem)
+{
+#ifdef KVM_CAP_USER_MEMORY
+    int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
+    if (r > 0)
+        return 0;
+    fprintf(stderr,
+            "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n");
+#else
+#error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported
+#endif
+    return -1;
+}
+
+void kvm_create_irqchip(kvm_context_t kvm)
+{
+    int r;
+
+    kvm->irqchip_in_kernel = 0;
+#ifdef KVM_CAP_IRQCHIP
+    if (!kvm->no_irqchip_creation) {
+        r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
+        if (r > 0) {            /* kernel irqchip supported */
+            r = kvm_vm_ioctl(kvm_state, KVM_CREATE_IRQCHIP);
+            if (r >= 0) {
+                kvm->irqchip_inject_ioctl = KVM_IRQ_LINE;
+#if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS)
+                r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
+                              KVM_CAP_IRQ_INJECT_STATUS);
+                if (r > 0) {
+                    kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
+                }
+#endif
+                kvm->irqchip_in_kernel = 1;
+            } else
+                fprintf(stderr, "Create kernel PIC irqchip failed\n");
+        }
+    }
+#endif
+    kvm_state->irqchip_in_kernel = kvm->irqchip_in_kernel;
+}
+
+int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
+{
+    int r, i;
+
+    r = kvm_create_vm(kvm);
+    if (r < 0) {
+        return r;
+    }
+    r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem);
+    if (r < 0) {
+        return r;
+    }
+    for (i = 0; i < ARRAY_SIZE(kvm_state->slots); i++) {
+        kvm_state->slots[i].slot = i;
+    }
+
+    r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
+    if (r < 0) {
+        return r;
+    }
+
+    kvm_create_irqchip(kvm);
+
+    return 0;
+}
+
+#ifdef KVM_CAP_IRQCHIP
+
+int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status)
+{
+    struct kvm_irq_level event;
+    int r;
+
+    if (!kvm->irqchip_in_kernel) {
+        return 0;
+    }
+    event.level = level;
+    event.irq = irq;
+    r = kvm_vm_ioctl(kvm_state, kvm->irqchip_inject_ioctl, &event);
+    if (r < 0) {
+        perror("kvm_set_irq_level");
+    }
+
+    if (status) {
+#ifdef KVM_CAP_IRQ_INJECT_STATUS
+        *status =
+            (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
+#else
+        *status = 1;
+#endif
+    }
+
+    return 1;
+}
+
+int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
+{
+    int r;
+
+    if (!kvm->irqchip_in_kernel) {
+        return 0;
+    }
+    r = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip);
+    if (r < 0) {
+        perror("kvm_get_irqchip\n");
+    }
+    return r;
+}
+
+int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip)
+{
+    int r;
+
+    if (!kvm->irqchip_in_kernel) {
+        return 0;
+    }
+    r = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip);
+    if (r < 0) {
+        perror("kvm_set_irqchip\n");
+    }
+    return r;
+}
+
+#endif
+
+static int handle_debug(CPUState *env)
+{
+#ifdef KVM_CAP_SET_GUEST_DEBUG
+    struct kvm_run *run = env->kvm_run;
+
+    return kvm_debug(env, &run->debug.arch);
+#else
+    return 0;
+#endif
+}
+
+int kvm_get_regs(CPUState *env, struct kvm_regs *regs)
+{
+    return kvm_vcpu_ioctl(env, KVM_GET_REGS, regs);
+}
+
+int kvm_set_regs(CPUState *env, struct kvm_regs *regs)
+{
+    return kvm_vcpu_ioctl(env, KVM_SET_REGS, regs);
+}
+
+#ifdef KVM_CAP_MP_STATE
+int kvm_get_mpstate(CPUState *env, struct kvm_mp_state *mp_state)
+{
+    int r;
+
+    r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
+    if (r > 0) {
+        return kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, mp_state);
+    }
+    return -ENOSYS;
+}
+
+int kvm_set_mpstate(CPUState *env, struct kvm_mp_state *mp_state)
+{
+    int r;
+
+    r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE);
+    if (r > 0) {
+        return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, mp_state);
+    }
+    return -ENOSYS;
+}
+#endif
+
+static int handle_mmio(CPUState *env)
+{
+    unsigned long addr = env->kvm_run->mmio.phys_addr;
+    struct kvm_run *kvm_run = env->kvm_run;
+    void *data = kvm_run->mmio.data;
+
+    /* hack: Red Hat 7.1 generates these weird accesses. */
+    if ((addr > 0xa0000 - 4 && addr <= 0xa0000) && kvm_run->mmio.len == 3) {
+        return 0;
+    }
+
+    cpu_physical_memory_rw(addr, data, kvm_run->mmio.len, kvm_run->mmio.is_write);
+    return 0;
+}
+
+int handle_io_window(kvm_context_t kvm)
+{
+    return 1;
+}
+
+int handle_shutdown(kvm_context_t kvm, CPUState *env)
+{
+    /* stop the current vcpu from going back to guest mode */
+    env->stopped = 1;
+
+    qemu_system_reset_request();
+    return 1;
+}
+
+static inline void push_nmi(kvm_context_t kvm)
+{
+#ifdef KVM_CAP_USER_NMI
+    kvm_arch_push_nmi(kvm->opaque);
+#endif                          /* KVM_CAP_USER_NMI */
+}
+
+void post_kvm_run(kvm_context_t kvm, CPUState *env)
+{
+    pthread_mutex_lock(&qemu_mutex);
+    kvm_arch_post_run(env, env->kvm_run);
+    cpu_single_env = env;
+}
+
+int pre_kvm_run(kvm_context_t kvm, CPUState *env)
+{
+    kvm_arch_pre_run(env, env->kvm_run);
+
+    pthread_mutex_unlock(&qemu_mutex);
+    return 0;
+}
+
+int kvm_is_ready_for_interrupt_injection(CPUState *env)
+{
+    return env->kvm_run->ready_for_interrupt_injection;
+}
+
+int kvm_run(CPUState *env)
+{
+    int r;
+    kvm_context_t kvm = &env->kvm_state->kvm_context;
+    struct kvm_run *run = env->kvm_run;
+    int fd = env->kvm_fd;
+
+  again:
+    if (env->kvm_vcpu_dirty) {
+        kvm_arch_load_regs(env, KVM_PUT_RUNTIME_STATE);
+        env->kvm_vcpu_dirty = 0;
+    }
+    push_nmi(kvm);
+#if !defined(__s390__)
+    if (!kvm->irqchip_in_kernel) {
+        run->request_interrupt_window = kvm_arch_try_push_interrupts(env);
+    }
+#endif
+
+    r = pre_kvm_run(kvm, env);
+    if (r) {
+        return r;
+    }
+    if (env->exit_request) {
+        env->exit_request = 0;
+        pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
+    }
+    r = ioctl(fd, KVM_RUN, 0);
+
+    if (r == -1 && errno != EINTR && errno != EAGAIN) {
+        r = -errno;
+        post_kvm_run(kvm, env);
+        fprintf(stderr, "kvm_run: %s\n", strerror(-r));
+        return r;
+    }
+
+    post_kvm_run(kvm, env);
+
+    kvm_flush_coalesced_mmio_buffer();
+
+#if !defined(__s390__)
+    if (r == -1) {
+        r = handle_io_window(kvm);
+        goto more;
+    }
+#endif
+    if (1) {
+        switch (run->exit_reason) {
+        case KVM_EXIT_UNKNOWN:
+            r = handle_unhandled(run->hw.hardware_exit_reason);
+            break;
+        case KVM_EXIT_FAIL_ENTRY:
+            r = handle_failed_vmentry(run->fail_entry.hardware_entry_failure_reason);
+            break;
+        case KVM_EXIT_EXCEPTION:
+            fprintf(stderr, "exception %d (%x)\n", run->ex.exception,
+                    run->ex.error_code);
+            kvm_show_regs(env);
+            kvm_show_code(env);
+            abort();
+            break;
+        case KVM_EXIT_IO:
+            r = kvm_handle_io(run->io.port,
+                                (uint8_t *)run + run->io.data_offset,
+                                run->io.direction,
+                                run->io.size,
+                                run->io.count);
+            r = 0;
+            break;
+        case KVM_EXIT_DEBUG:
+            r = handle_debug(env);
+            break;
+        case KVM_EXIT_MMIO:
+            r = handle_mmio(env);
+            break;
+        case KVM_EXIT_HLT:
+            r = kvm_arch_halt(env);
+            break;
+        case KVM_EXIT_IRQ_WINDOW_OPEN:
+            break;
+        case KVM_EXIT_SHUTDOWN:
+            r = handle_shutdown(kvm, env);
+            break;
+#if defined(__s390__)
+        case KVM_EXIT_S390_SIEIC:
+            r = kvm_s390_handle_intercept(kvm, env, run);
+            break;
+        case KVM_EXIT_S390_RESET:
+            r = kvm_s390_handle_reset(kvm, env, run);
+            break;
+#endif
+	case KVM_EXIT_INTERNAL_ERROR:
+            kvm_handle_internal_error(env, run);
+            r = 1;
+	    break;
+        default:
+            if (kvm_arch_run(env)) {
+                fprintf(stderr, "unhandled vm exit: 0x%x\n", run->exit_reason);
+                kvm_show_regs(env);
+                abort();
+            }
+            break;
+        }
+    }
+more:
+    if (!r) {
+        goto again;
+    }
+    return r;
+}
+
+int kvm_inject_irq(CPUState *env, unsigned irq)
+{
+    struct kvm_interrupt intr;
+
+    intr.irq = irq;
+    return kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
+}
+
+int kvm_inject_nmi(CPUState *env)
+{
+#ifdef KVM_CAP_USER_NMI
+    return kvm_vcpu_ioctl(env, KVM_NMI);
+#else
+    return -ENOSYS;
+#endif
+}
+
+int kvm_init_coalesced_mmio(kvm_context_t kvm)
+{
+    int r = 0;
+    kvm_state->coalesced_mmio = 0;
+#ifdef KVM_CAP_COALESCED_MMIO
+    r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
+    if (r > 0) {
+        kvm_state->coalesced_mmio = r;
+        return 0;
+    }
+#endif
+    return r;
+}
+
+#ifdef KVM_CAP_DEVICE_ASSIGNMENT
+int kvm_assign_pci_device(kvm_context_t kvm,
+                          struct kvm_assigned_pci_dev *assigned_dev)
+{
+    return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_PCI_DEVICE, assigned_dev);
+}
+
+static int kvm_old_assign_irq(kvm_context_t kvm,
+                              struct kvm_assigned_irq *assigned_irq)
+{
+    return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_IRQ, assigned_irq);
+}
+
+#ifdef KVM_CAP_ASSIGN_DEV_IRQ
+int kvm_assign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
+{
+    int ret;
+
+    ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ);
+    if (ret > 0) {
+        return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_DEV_IRQ, assigned_irq);
+    }
+
+    return kvm_old_assign_irq(kvm, assigned_irq);
+}
+
+int kvm_deassign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
+{
+    return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_DEV_IRQ, assigned_irq);
+}
+#else
+int kvm_assign_irq(kvm_context_t kvm, struct kvm_assigned_irq *assigned_irq)
+{
+    return kvm_old_assign_irq(kvm, assigned_irq);
+}
+#endif
+#endif
+
+#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
+int kvm_deassign_pci_device(kvm_context_t kvm,
+                            struct kvm_assigned_pci_dev *assigned_dev)
+{
+    return kvm_vm_ioctl(kvm_state, KVM_DEASSIGN_PCI_DEVICE, assigned_dev);
+}
+#endif
+
+int kvm_reinject_control(kvm_context_t kvm, int pit_reinject)
+{
+#ifdef KVM_CAP_REINJECT_CONTROL
+    int r;
+    struct kvm_reinject_control control;
+
+    control.pit_reinject = pit_reinject;
+
+    r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL);
+    if (r > 0) {
+        return kvm_vm_ioctl(kvm_state, KVM_REINJECT_CONTROL, &control);
+    }
+#endif
+    return -ENOSYS;
+}
+
+int kvm_has_gsi_routing(void)
+{
+    int r = 0;
+
+#ifdef KVM_CAP_IRQ_ROUTING
+    r = kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
+#endif
+    return r;
+}
+
+int kvm_get_gsi_count(kvm_context_t kvm)
+{
+#ifdef KVM_CAP_IRQ_ROUTING
+    return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
+#else
+    return -EINVAL;
+#endif
+}
+
+int kvm_clear_gsi_routes(void)
+{
+#ifdef KVM_CAP_IRQ_ROUTING
+    kvm_context_t kvm = kvm_context;
+
+    kvm->irq_routes->nr = 0;
+    return 0;
+#else
+    return -EINVAL;
+#endif
+}
+
+int kvm_add_routing_entry(struct kvm_irq_routing_entry *entry)
+{
+#ifdef KVM_CAP_IRQ_ROUTING
+    kvm_context_t kvm = kvm_context;
+    struct kvm_irq_routing *z;
+    struct kvm_irq_routing_entry *new;
+    int n, size;
+
+    if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) {
+        n = kvm->nr_allocated_irq_routes * 2;
+        if (n < 64) {
+            n = 64;
+        }
+        size = sizeof(struct kvm_irq_routing);
+        size += n * sizeof(*new);
+        z = realloc(kvm->irq_routes, size);
+        if (!z) {
+            return -ENOMEM;
+        }
+        kvm->nr_allocated_irq_routes = n;
+        kvm->irq_routes = z;
+    }
+    n = kvm->irq_routes->nr++;
+    new = &kvm->irq_routes->entries[n];
+    memset(new, 0, sizeof(*new));
+    new->gsi = entry->gsi;
+    new->type = entry->type;
+    new->flags = entry->flags;
+    new->u = entry->u;
+
+    set_gsi(kvm, entry->gsi);
+
+    return 0;
+#else
+    return -ENOSYS;
+#endif
+}
+
+int kvm_add_irq_route(int gsi, int irqchip, int pin)
+{
+#ifdef KVM_CAP_IRQ_ROUTING
+    struct kvm_irq_routing_entry e;
+
+    e.gsi = gsi;
+    e.type = KVM_IRQ_ROUTING_IRQCHIP;
+    e.flags = 0;
+    e.u.irqchip.irqchip = irqchip;
+    e.u.irqchip.pin = pin;
+    return kvm_add_routing_entry(&e);
+#else
+    return -ENOSYS;
+#endif
+}
+
+int kvm_del_routing_entry(struct kvm_irq_routing_entry *entry)
+{
+#ifdef KVM_CAP_IRQ_ROUTING
+    kvm_context_t kvm = kvm_context;
+    struct kvm_irq_routing_entry *e, *p;
+    int i, gsi, found = 0;
+
+    gsi = entry->gsi;
+
+    for (i = 0; i < kvm->irq_routes->nr; ++i) {
+        e = &kvm->irq_routes->entries[i];
+        if (e->type == entry->type && e->gsi == gsi) {
+            switch (e->type) {
+            case KVM_IRQ_ROUTING_IRQCHIP:{
+                    if (e->u.irqchip.irqchip ==
+                        entry->u.irqchip.irqchip
+                        && e->u.irqchip.pin == entry->u.irqchip.pin) {
+                        p = &kvm->irq_routes->entries[--kvm->irq_routes->nr];
+                        *e = *p;
+                        found = 1;
+                    }
+                    break;
+                }
+            case KVM_IRQ_ROUTING_MSI:{
+                    if (e->u.msi.address_lo ==
+                        entry->u.msi.address_lo
+                        && e->u.msi.address_hi ==
+                        entry->u.msi.address_hi
+                        && e->u.msi.data == entry->u.msi.data) {
+                        p = &kvm->irq_routes->entries[--kvm->irq_routes->nr];
+                        *e = *p;
+                        found = 1;
+                    }
+                    break;
+                }
+            default:
+                break;
+            }
+            if (found) {
+                /* If there are no other users of this GSI
+                 * mark it available in the bitmap */
+                for (i = 0; i < kvm->irq_routes->nr; i++) {
+                    e = &kvm->irq_routes->entries[i];
+                    if (e->gsi == gsi)
+                        break;
+                }
+                if (i == kvm->irq_routes->nr) {
+                    clear_gsi(kvm, gsi);
+                }
+
+                return 0;
+            }
+        }
+    }
+    return -ESRCH;
+#else
+    return -ENOSYS;
+#endif
+}
+
+int kvm_update_routing_entry(struct kvm_irq_routing_entry *entry,
+                             struct kvm_irq_routing_entry *newentry)
+{
+#ifdef KVM_CAP_IRQ_ROUTING
+    kvm_context_t kvm = kvm_context;
+    struct kvm_irq_routing_entry *e;
+    int i;
+
+    if (entry->gsi != newentry->gsi || entry->type != newentry->type) {
+        return -EINVAL;
+    }
+
+    for (i = 0; i < kvm->irq_routes->nr; ++i) {
+        e = &kvm->irq_routes->entries[i];
+        if (e->type != entry->type || e->gsi != entry->gsi) {
+            continue;
+        }
+        switch (e->type) {
+        case KVM_IRQ_ROUTING_IRQCHIP:
+            if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip &&
+                e->u.irqchip.pin == entry->u.irqchip.pin) {
+                memcpy(&e->u.irqchip, &newentry->u.irqchip,
+                       sizeof e->u.irqchip);
+                return 0;
+            }
+            break;
+        case KVM_IRQ_ROUTING_MSI:
+            if (e->u.msi.address_lo == entry->u.msi.address_lo &&
+                e->u.msi.address_hi == entry->u.msi.address_hi &&
+                e->u.msi.data == entry->u.msi.data) {
+                memcpy(&e->u.msi, &newentry->u.msi, sizeof e->u.msi);
+                return 0;
+            }
+            break;
+        default:
+            break;
+        }
+    }
+    return -ESRCH;
+#else
+    return -ENOSYS;
+#endif
+}
+
+int kvm_del_irq_route(int gsi, int irqchip, int pin)
+{
+#ifdef KVM_CAP_IRQ_ROUTING
+    struct kvm_irq_routing_entry e;
+
+    e.gsi = gsi;
+    e.type = KVM_IRQ_ROUTING_IRQCHIP;
+    e.flags = 0;
+    e.u.irqchip.irqchip = irqchip;
+    e.u.irqchip.pin = pin;
+    return kvm_del_routing_entry(&e);
+#else
+    return -ENOSYS;
+#endif
+}
+
+int kvm_commit_irq_routes(void)
+{
+#ifdef KVM_CAP_IRQ_ROUTING
+    kvm_context_t kvm = kvm_context;
+
+    kvm->irq_routes->flags = 0;
+    return kvm_vm_ioctl(kvm_state, KVM_SET_GSI_ROUTING, kvm->irq_routes);
+#else
+    return -ENOSYS;
+#endif
+}
+
+int kvm_get_irq_route_gsi(void)
+{
+    kvm_context_t kvm = kvm_context;
+    int i, bit;
+    uint32_t *buf = kvm->used_gsi_bitmap;
+
+    /* Return the lowest unused GSI in the bitmap */
+    for (i = 0; i < kvm->max_gsi / 32; i++) {
+        bit = ffs(~buf[i]);
+        if (!bit) {
+            continue;
+        }
+
+        return bit - 1 + i * 32;
+    }
+
+    return -ENOSPC;
+}
+
+static void kvm_msix_routing_entry(struct kvm_irq_routing_entry *e,
+                                   uint32_t gsi, uint32_t addr_lo,
+                                   uint32_t addr_hi, uint32_t data)
+
+{
+    e->gsi = gsi;
+    e->type = KVM_IRQ_ROUTING_MSI;
+    e->flags = 0;
+    e->u.msi.address_lo = addr_lo;
+    e->u.msi.address_hi = addr_hi;
+    e->u.msi.data = data;
+}
+
+int kvm_add_msix(uint32_t gsi, uint32_t addr_lo,
+                        uint32_t addr_hi, uint32_t data)
+{
+    struct kvm_irq_routing_entry e;
+
+    kvm_msix_routing_entry(&e, gsi, addr_lo, addr_hi, data);
+    return kvm_add_routing_entry(&e);
+}
+
+int kvm_del_msix(uint32_t gsi, uint32_t addr_lo,
+                        uint32_t addr_hi, uint32_t data)
+{
+    struct kvm_irq_routing_entry e;
+
+    kvm_msix_routing_entry(&e, gsi, addr_lo, addr_hi, data);
+    return kvm_del_routing_entry(&e);
+}
+
+int kvm_update_msix(uint32_t old_gsi, uint32_t old_addr_lo,
+                    uint32_t old_addr_hi, uint32_t old_data,
+                    uint32_t new_gsi, uint32_t new_addr_lo,
+                    uint32_t new_addr_hi, uint32_t new_data)
+{
+    struct kvm_irq_routing_entry e1, e2;
+
+    kvm_msix_routing_entry(&e1, old_gsi, old_addr_lo, old_addr_hi, old_data);
+    kvm_msix_routing_entry(&e2, new_gsi, new_addr_lo, new_addr_hi, new_data);
+    return kvm_update_routing_entry(&e1, &e2);
+}
+
+
+#ifdef KVM_CAP_DEVICE_MSIX
+int kvm_assign_set_msix_nr(kvm_context_t kvm,
+                           struct kvm_assigned_msix_nr *msix_nr)
+{
+    return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_NR, msix_nr);
+}
+
+int kvm_assign_set_msix_entry(kvm_context_t kvm,
+                              struct kvm_assigned_msix_entry *entry)
+{
+    return kvm_vm_ioctl(kvm_state, KVM_ASSIGN_SET_MSIX_ENTRY, entry);
+}
+#endif
+
+#if defined(KVM_CAP_IRQFD) && defined(CONFIG_EVENTFD)
+
+#include <sys/eventfd.h>
+
+static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags)
+{
+    struct kvm_irqfd data = {
+        .fd = fd,
+        .gsi = gsi,
+        .flags = flags,
+    };
+
+    return kvm_vm_ioctl(kvm_state, KVM_IRQFD, &data);
+}
+
+int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
+{
+    int r;
+    int fd;
+
+    if (!kvm_check_extension(kvm_state, KVM_CAP_IRQFD))
+        return -ENOENT;
+
+    fd = eventfd(0, 0);
+    if (fd < 0) {
+        return -errno;
+    }
+
+    r = _kvm_irqfd(kvm, fd, gsi, 0);
+    if (r < 0) {
+        close(fd);
+        return -errno;
+    }
+
+    return fd;
+}
+
+#else                           /* KVM_CAP_IRQFD */
+
+int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
+{
+    return -ENOSYS;
+}
+
+#endif                          /* KVM_CAP_IRQFD */
+unsigned long kvm_get_thread_id(void)
+{
+    return syscall(SYS_gettid);
+}
+
+static void qemu_cond_wait(pthread_cond_t *cond)
+{
+    CPUState *env = cpu_single_env;
+
+    pthread_cond_wait(cond, &qemu_mutex);
+    cpu_single_env = env;
+}
+
+static void sig_ipi_handler(int n)
+{
+}
+
+static void sigbus_reraise(void)
+{
+    sigset_t set;
+    struct sigaction action;
+
+    memset(&action, 0, sizeof(action));
+    action.sa_handler = SIG_DFL;
+    if (!sigaction(SIGBUS, &action, NULL)) {
+        raise(SIGBUS);
+        sigemptyset(&set);
+        sigaddset(&set, SIGBUS);
+        sigprocmask(SIG_UNBLOCK, &set, NULL);
+    }
+    perror("Failed to re-raise SIGBUS!\n");
+    abort();
+}
+
+static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
+                           void *ctx)
+{
+    if (kvm_on_sigbus(siginfo->ssi_code, (void *)(intptr_t)siginfo->ssi_addr))
+        sigbus_reraise();
+}
+
+void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
+{
+    struct qemu_work_item wi;
+
+    if (env == current_env) {
+        func(data);
+        return;
+    }
+
+    wi.func = func;
+    wi.data = data;
+    if (!env->kvm_cpu_state.queued_work_first) {
+        env->kvm_cpu_state.queued_work_first = &wi;
+    } else {
+        env->kvm_cpu_state.queued_work_last->next = &wi;
+    }
+    env->kvm_cpu_state.queued_work_last = &wi;
+    wi.next = NULL;
+    wi.done = false;
+
+    pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
+    while (!wi.done) {
+        qemu_cond_wait(&qemu_work_cond);
+    }
+}
+
+static void do_kvm_cpu_synchronize_state(void *_env)
+{
+    CPUState *env = _env;
+
+    if (!env->kvm_vcpu_dirty) {
+        kvm_arch_save_regs(env);
+        env->kvm_vcpu_dirty = 1;
+    }
+}
+
+void kvm_cpu_synchronize_state(CPUState *env)
+{
+    if (!env->kvm_vcpu_dirty) {
+        on_vcpu(env, do_kvm_cpu_synchronize_state, env);
+    }
+}
+
+void kvm_cpu_synchronize_post_reset(CPUState *env)
+{
+    kvm_arch_load_regs(env, KVM_PUT_RESET_STATE);
+    env->kvm_vcpu_dirty = 0;
+}
+
+void kvm_cpu_synchronize_post_init(CPUState *env)
+{
+    kvm_arch_load_regs(env, KVM_PUT_FULL_STATE);
+    env->kvm_vcpu_dirty = 0;
+}
+
+static void inject_interrupt(void *data)
+{
+    cpu_interrupt(current_env, (long) data);
+}
+
+void kvm_inject_interrupt(CPUState *env, int mask)
+{
+    on_vcpu(env, inject_interrupt, (void *) (long) mask);
+}
+
+void kvm_update_interrupt_request(CPUState *env)
+{
+    int signal = 0;
+
+    if (env) {
+        if (!current_env || !current_env->created) {
+            signal = 1;
+        }
+        /*
+         * Testing for created here is really redundant
+         */
+        if (current_env && current_env->created &&
+            env != current_env && !env->kvm_cpu_state.signalled) {
+            signal = 1;
+        }
+
+        if (signal) {
+            env->kvm_cpu_state.signalled = 1;
+            if (env->kvm_cpu_state.thread) {
+                pthread_kill(env->kvm_cpu_state.thread, SIG_IPI);
+            }
+        }
+    }
+}
+
+int kvm_cpu_exec(CPUState *env)
+{
+    int r;
+
+    r = kvm_run(env);
+    if (r < 0) {
+        printf("kvm_run returned %d\n", r);
+        vm_stop(0);
+    }
+
+    return 0;
+}
+
+int kvm_cpu_is_stopped(CPUState *env)
+{
+    return !vm_running || env->stopped;
+}
+
+static void flush_queued_work(CPUState *env)
+{
+    struct qemu_work_item *wi;
+
+    if (!env->kvm_cpu_state.queued_work_first) {
+        return;
+    }
+
+    while ((wi = env->kvm_cpu_state.queued_work_first)) {
+        env->kvm_cpu_state.queued_work_first = wi->next;
+        wi->func(wi->data);
+        wi->done = true;
+    }
+    env->kvm_cpu_state.queued_work_last = NULL;
+    pthread_cond_broadcast(&qemu_work_cond);
+}
+
+static void kvm_main_loop_wait(CPUState *env, int timeout)
+{
+    struct timespec ts;
+    int r, e;
+    siginfo_t siginfo;
+    sigset_t waitset;
+    sigset_t chkset;
+
+    ts.tv_sec = timeout / 1000;
+    ts.tv_nsec = (timeout % 1000) * 1000000;
+    sigemptyset(&waitset);
+    sigaddset(&waitset, SIG_IPI);
+    sigaddset(&waitset, SIGBUS);
+
+    do {
+        pthread_mutex_unlock(&qemu_mutex);
+
+        r = sigtimedwait(&waitset, &siginfo, &ts);
+        e = errno;
+
+        pthread_mutex_lock(&qemu_mutex);
+
+        if (r == -1 && !(e == EAGAIN || e == EINTR)) {
+            printf("sigtimedwait: %s\n", strerror(e));
+            exit(1);
+        }
+
+        switch (r) {
+        case SIGBUS:
+            if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr))
+                sigbus_reraise();
+            break;
+        default:
+            break;
+        }
+
+        r = sigpending(&chkset);
+        if (r == -1) {
+            printf("sigpending: %s\n", strerror(e));
+            exit(1);
+        }
+    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
+
+    cpu_single_env = env;
+    flush_queued_work(env);
+
+    if (env->stop) {
+        env->stop = 0;
+        env->stopped = 1;
+        pthread_cond_signal(&qemu_pause_cond);
+    }
+
+    env->kvm_cpu_state.signalled = 0;
+}
+
+static int all_threads_paused(void)
+{
+    CPUState *penv = first_cpu;
+
+    while (penv) {
+        if (penv->stop) {
+            return 0;
+        }
+        penv = (CPUState *) penv->next_cpu;
+    }
+
+    return 1;
+}
+
+static void pause_all_threads(void)
+{
+    CPUState *penv = first_cpu;
+
+    while (penv) {
+        if (penv != cpu_single_env) {
+            penv->stop = 1;
+            pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
+        } else {
+            penv->stop = 0;
+            penv->stopped = 1;
+            cpu_exit(penv);
+        }
+        penv = (CPUState *) penv->next_cpu;
+    }
+
+    while (!all_threads_paused()) {
+        qemu_cond_wait(&qemu_pause_cond);
+    }
+}
+
+static void resume_all_threads(void)
+{
+    CPUState *penv = first_cpu;
+
+    assert(!cpu_single_env);
+
+    while (penv) {
+        penv->stop = 0;
+        penv->stopped = 0;
+        pthread_kill(penv->kvm_cpu_state.thread, SIG_IPI);
+        penv = (CPUState *) penv->next_cpu;
+    }
+}
+
+static void kvm_vm_state_change_handler(void *context, int running, int reason)
+{
+    if (running) {
+        resume_all_threads();
+    } else {
+        pause_all_threads();
+    }
+}
+
+static void setup_kernel_sigmask(CPUState *env)
+{
+    sigset_t set;
+
+    sigemptyset(&set);
+    sigaddset(&set, SIGUSR2);
+    sigaddset(&set, SIGIO);
+    sigaddset(&set, SIGALRM);
+    sigprocmask(SIG_BLOCK, &set, NULL);
+
+    sigprocmask(SIG_BLOCK, NULL, &set);
+    sigdelset(&set, SIG_IPI);
+    sigdelset(&set, SIGBUS);
+
+    kvm_set_signal_mask(env, &set);
+}
+
+static void qemu_kvm_system_reset(void)
+{
+    pause_all_threads();
+
+    qemu_system_reset();
+
+    resume_all_threads();
+}
+
+static void process_irqchip_events(CPUState *env)
+{
+    kvm_arch_process_irqchip_events(env);
+    if (kvm_arch_has_work(env))
+        env->halted = 0;
+}
+
+static int kvm_main_loop_cpu(CPUState *env)
+{
+    while (1) {
+        int run_cpu = !kvm_cpu_is_stopped(env);
+        if (run_cpu && !kvm_irqchip_in_kernel()) {
+            process_irqchip_events(env);
+            run_cpu = !env->halted;
+        }
+        if (run_cpu) {
+            kvm_cpu_exec(env);
+            kvm_main_loop_wait(env, 0);
+        } else {
+            kvm_main_loop_wait(env, 1000);
+        }
+    }
+    pthread_mutex_unlock(&qemu_mutex);
+    return 0;
+}
+
+static void *ap_main_loop(void *_env)
+{
+    CPUState *env = _env;
+    sigset_t signals;
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+    struct ioperm_data *data = NULL;
+#endif
+
+    current_env = env;
+    env->thread_id = kvm_get_thread_id();
+    sigfillset(&signals);
+    sigprocmask(SIG_BLOCK, &signals, NULL);
+
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+    /* do ioperm for io ports of assigned devices */
+    QLIST_FOREACH(data, &ioperm_head, entries)
+        on_vcpu(env, kvm_arch_do_ioperm, data);
+#endif
+
+    pthread_mutex_lock(&qemu_mutex);
+    cpu_single_env = env;
+
+    kvm_create_vcpu(env, env->cpu_index);
+    setup_kernel_sigmask(env);
+
+    /* signal VCPU creation */
+    current_env->created = 1;
+    pthread_cond_signal(&qemu_vcpu_cond);
+
+    /* and wait for machine initialization */
+    while (!qemu_system_ready) {
+        qemu_cond_wait(&qemu_system_cond);
+    }
+
+    /* re-initialize cpu_single_env after re-acquiring qemu_mutex */
+    cpu_single_env = env;
+
+    kvm_main_loop_cpu(env);
+    return NULL;
+}
+
+int kvm_init_vcpu(CPUState *env)
+{
+    pthread_create(&env->kvm_cpu_state.thread, NULL, ap_main_loop, env);
+
+    while (env->created == 0) {
+        qemu_cond_wait(&qemu_vcpu_cond);
+    }
+
+    return 0;
+}
+
+int kvm_vcpu_inited(CPUState *env)
+{
+    return env->created;
+}
+
+#ifdef TARGET_I386
+void kvm_hpet_disable_kpit(void)
+{
+    struct kvm_pit_state2 ps2;
+
+    kvm_get_pit2(kvm_context, &ps2);
+    ps2.flags |= KVM_PIT_FLAGS_HPET_LEGACY;
+    kvm_set_pit2(kvm_context, &ps2);
+}
+
+void kvm_hpet_enable_kpit(void)
+{
+    struct kvm_pit_state2 ps2;
+
+    kvm_get_pit2(kvm_context, &ps2);
+    ps2.flags &= ~KVM_PIT_FLAGS_HPET_LEGACY;
+    kvm_set_pit2(kvm_context, &ps2);
+}
+#endif
+
+int kvm_init_ap(void)
+{
+    struct sigaction action;
+
+    qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
+
+    signal(SIG_IPI, sig_ipi_handler);
+
+    memset(&action, 0, sizeof(action));
+    action.sa_flags = SA_SIGINFO;
+    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
+    sigaction(SIGBUS, &action, NULL);
+    prctl(PR_MCE_KILL, 1, 1, 0, 0);
+    return 0;
+}
+
+/* If we have signalfd, we mask out the signals we want to handle and then
+ * use signalfd to listen for them.  We rely on whatever the current signal
+ * handler is to dispatch the signals when we receive them.
+ */
+
+static void sigfd_handler(void *opaque)
+{
+    int fd = (unsigned long) opaque;
+    struct qemu_signalfd_siginfo info;
+    struct sigaction action;
+    ssize_t len;
+
+    while (1) {
+        do {
+            len = read(fd, &info, sizeof(info));
+        } while (len == -1 && errno == EINTR);
+
+        if (len == -1 && errno == EAGAIN) {
+            break;
+        }
+
+        if (len != sizeof(info)) {
+            printf("read from sigfd returned %zd: %m\n", len);
+            return;
+        }
+
+        sigaction(info.ssi_signo, NULL, &action);
+        if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
+            action.sa_sigaction(info.ssi_signo,
+                                (siginfo_t *)&info, NULL);
+        } else if (action.sa_handler) {
+            action.sa_handler(info.ssi_signo);
+        }
+    }
+}
+
+int kvm_main_loop(void)
+{
+    sigset_t mask;
+    int sigfd;
+
+    io_thread = pthread_self();
+    qemu_system_ready = 1;
+
+    sigemptyset(&mask);
+    sigaddset(&mask, SIGIO);
+    sigaddset(&mask, SIGALRM);
+    sigaddset(&mask, SIGBUS);
+    sigprocmask(SIG_BLOCK, &mask, NULL);
+
+    sigfd = qemu_signalfd(&mask);
+    if (sigfd == -1) {
+        fprintf(stderr, "failed to create signalfd\n");
+        return -errno;
+    }
+
+    fcntl(sigfd, F_SETFL, O_NONBLOCK);
+
+    qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
+                         (void *)(unsigned long) sigfd);
+
+    pthread_cond_broadcast(&qemu_system_cond);
+
+    io_thread_sigfd = sigfd;
+    cpu_single_env = NULL;
+
+    while (1) {
+        main_loop_wait(0);
+        if (qemu_shutdown_requested()) {
+            monitor_protocol_event(QEVENT_SHUTDOWN, NULL);
+            if (qemu_no_shutdown()) {
+                vm_stop(0);
+            } else {
+                break;
+            }
+        } else if (qemu_powerdown_requested()) {
+            monitor_protocol_event(QEVENT_POWERDOWN, NULL);
+            qemu_irq_raise(qemu_system_powerdown);
+        } else if (qemu_reset_requested()) {
+            qemu_kvm_system_reset();
+        } else if (kvm_debug_cpu_requested) {
+            gdb_set_stop_cpu(kvm_debug_cpu_requested);
+            vm_stop(EXCP_DEBUG);
+            kvm_debug_cpu_requested = NULL;
+        }
+    }
+
+    bdrv_close_all();
+    pause_all_threads();
+    pthread_mutex_unlock(&qemu_mutex);
+
+    return 0;
+}
+
+#if !defined(TARGET_I386)
+int kvm_arch_init_irq_routing(void)
+{
+    return 0;
+}
+#endif
+
+extern int no_hpet;
+
+static int kvm_create_context(void)
+{
+    static const char upgrade_note[] =
+    "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
+    "(see http://sourceforge.net/projects/kvm).\n";
+
+    int r;
+
+    if (!kvm_irqchip) {
+        kvm_disable_irqchip_creation(kvm_context);
+    }
+    if (!kvm_pit) {
+        kvm_disable_pit_creation(kvm_context);
+    }
+    if (kvm_create(kvm_context, 0, NULL) < 0) {
+        kvm_finalize(kvm_state);
+        return -1;
+    }
+    r = kvm_arch_qemu_create_context();
+    if (r < 0) {
+        kvm_finalize(kvm_state);
+        return -1;
+    }
+    if (kvm_pit && !kvm_pit_reinject) {
+        if (kvm_reinject_control(kvm_context, 0)) {
+            fprintf(stderr, "failure to disable in-kernel PIT reinjection\n");
+            return -1;
+        }
+    }
+
+    /* There was a nasty bug in < kvm-80 that prevents memory slots from being
+     * destroyed properly.  Since we rely on this capability, refuse to work
+     * with any kernel without this capability. */
+    if (!kvm_check_extension(kvm_state, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
+        fprintf(stderr,
+                "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
+                upgrade_note);
+        return -EINVAL;
+    }
+
+    r = kvm_arch_init_irq_routing();
+    if (r < 0) {
+        return r;
+    }
+
+    kvm_state->vcpu_events = 0;
+#ifdef KVM_CAP_VCPU_EVENTS
+    kvm_state->vcpu_events = kvm_check_extension(kvm_state, KVM_CAP_VCPU_EVENTS);
+#endif
+
+    kvm_state->debugregs = 0;
+#ifdef KVM_CAP_DEBUGREGS
+    kvm_state->debugregs = kvm_check_extension(kvm_state, KVM_CAP_DEBUGREGS);
+#endif
+
+    kvm_state->xsave = 0;
+#ifdef KVM_CAP_XSAVE
+    kvm_state->xsave = kvm_check_extension(kvm_state, KVM_CAP_XSAVE);
+#endif
+
+    kvm_state->xcrs = 0;
+#ifdef KVM_CAP_XCRS
+    kvm_state->xcrs = kvm_check_extension(kvm_state, KVM_CAP_XCRS);
+#endif
+
+    kvm_state->many_ioeventfds = kvm_check_many_ioeventfds();
+
+    kvm_init_ap();
+    if (kvm_irqchip) {
+        if (!qemu_kvm_has_gsi_routing()) {
+            irq0override = 0;
+#ifdef TARGET_I386
+            /* if kernel can't do irq routing, interrupt source
+             * override 0->2 can not be set up as required by hpet,
+             * so disable hpet.
+             */
+            no_hpet = 1;
+        } else if (!qemu_kvm_has_pit_state2()) {
+            no_hpet = 1;
+        }
+#else
+        }
+#endif
+    }
+
+    return 0;
+}
+
+#ifdef KVM_CAP_IRQCHIP
+
+int kvm_set_irq(int irq, int level, int *status)
+{
+    return kvm_set_irq_level(kvm_context, irq, level, status);
+}
+
+#endif
+
+static void kvm_mutex_unlock(void)
+{
+    assert(!cpu_single_env);
+    pthread_mutex_unlock(&qemu_mutex);
+}
+
+static void kvm_mutex_lock(void)
+{
+    pthread_mutex_lock(&qemu_mutex);
+    cpu_single_env = NULL;
+}
+
+void qemu_mutex_unlock_iothread(void)
+{
+    if (kvm_enabled()) {
+        kvm_mutex_unlock();
+    }
+}
+
+void qemu_mutex_lock_iothread(void)
+{
+    if (kvm_enabled()) {
+        kvm_mutex_lock();
+    }
+}
+
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+void kvm_add_ioperm_data(struct ioperm_data *data)
+{
+    QLIST_INSERT_HEAD(&ioperm_head, data, entries);
+}
+
+void kvm_remove_ioperm_data(unsigned long start_port, unsigned long num)
+{
+    struct ioperm_data *data;
+
+    data = QLIST_FIRST(&ioperm_head);
+    while (data) {
+        struct ioperm_data *next = QLIST_NEXT(data, entries);
+
+        if (data->start_port == start_port && data->num == num) {
+            QLIST_REMOVE(data, entries);
+            qemu_free(data);
+        }
+
+        data = next;
+    }
+}
+
+void kvm_ioperm(CPUState *env, void *data)
+{
+    if (kvm_enabled() && qemu_system_ready) {
+        on_vcpu(env, kvm_arch_do_ioperm, data);
+    }
+}
+
+#endif
+
+int kvm_set_boot_cpu_id(uint32_t id)
+{
+    return kvm_set_boot_vcpu_id(kvm_context, id);
+}
+
author	Robert Mustacchi <rm@joyent.com>	2011-06-24 13:49:54 -0700
committer	Robert Mustacchi <rm@joyent.com>	2011-06-24 13:49:54 -0700
commit	68396ea9c0fe4f75ce30b1eba2c44c43c13344bb (patch)
tree	802587d411d9db461e6500c5b635043315f81c27 /qemu-kvm.c
download	illumos-kvm-cmd-68396ea9c0fe4f75ce30b1eba2c44c43c13344bb.tar.gz