diff options
author | Robert Mustacchi <rm@joyent.com> | 2012-08-06 16:43:45 +0000 |
---|---|---|
committer | Robert Mustacchi <rm@joyent.com> | 2012-08-06 16:43:45 +0000 |
commit | d145a69b03243d4ec0db78fd70ce2fd2ebcb8bea (patch) | |
tree | 3ec5c11b088d06bcbb360ece102eff9f758bb4ac | |
parent | d61d4b6312d7d5cb1c2194d4b2d90f99857472a5 (diff) | |
download | illumos-kvm-cmd-d145a69b03243d4ec0db78fd70ce2fd2ebcb8bea.tar.gz |
HVM-740 want a basic dmod for qemu
-rw-r--r-- | Makefile.joyent | 91 | ||||
-rw-r--r-- | qemu_mdb.c | 534 |
2 files changed, 617 insertions, 8 deletions
diff --git a/Makefile.joyent b/Makefile.joyent index 1796b5b..46c4b1c 100644 --- a/Makefile.joyent +++ b/Makefile.joyent @@ -1,5 +1,5 @@ # -# Copyright (c) 2011 Joyent, Inc. +# Copyright (c) 2012 Joyent, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -20,16 +20,88 @@ # THE SOFTWARE. # -KERNEL_SOURCE=$(PWD)/../../illumos -CTFBINDIR=$(KERNEL_SOURCE)/usr/src/tools/proto/*/opt/onbld/bin/i386 +PROTO_AREA = $(PWD)/../../../proto +KERNEL_SOURCE = $(PWD)/../../illumos +MDB_SOURCE = $(KERNEL_SOURCE)/usr/src/cmd/mdb +CTFBINDIR= $(KERNEL_SOURCE)/usr/src/tools/proto/*/opt/onbld/bin/i386 +CSTYLE = $(KERNEL_SOURCE)/usr/src/tools/scripts/cstyle +CC = $(PROTO_AREA)/usr/bin/gcc -world: config-host.mak - echo "Building world" - PATH=$(CTFBINDIR):$(PATH) V=1 gmake all +QEMU_CPPFLAGS = \ + -DTARGET_PHYS_ADDR_BITS=64 \ + -D__EXTENSIONS__ + +DMOD_CPPFLAGS = \ + $(QEMU_CPPFLAGS) \ + -D_KERNEL \ + -DTEXT_DOMAIN="SUNW_OST_OSCMD" \ + -D_TS_ERRNO \ + -D_ELF64 \ + -Ui386 \ + -U__i386 \ + -isystem $(PROTO_AREA)/usr/include \ + -I. \ + -Ihw + +ALWAYS_CFLAGS = \ + -fident \ + -fno-builtin \ + -nodefaultlibs \ + -Wall \ + -Werror \ + -fno-inline-functions + +USER_CFLAGS = \ + -finline \ + -gdwarf-2 \ + -std=gnu89 + +DMOD_CFLAGS = \ + $(ALWAYS_CFLAGS) \ + $(USER_CFLAGS) \ + -m64 \ + -fno-strict-aliasing \ + -fno-unit-at-a-time \ + -fno-optimize-sibling-calls \ + -O2 \ + -fno-inline-small-functions \ + -fno-inline-functions-called-once \ + -mtune=opteron \ + -ffreestanding \ + -fPIC + +DMOD_LDFLAGS = \ + -m64 \ + -shared \ + -nodefaultlibs \ + -Wl,-M$(KERNEL_SOURCE)/usr/src/common/mapfiles/common/map.pagealign \ + -Wl,-M$(KERNEL_SOURCE)/usr/src/common/mapfiles/common/map.noexdata \ + -Wl,-ztext \ + -Wl,-zdefs \ + -Wl,-M$(MDB_SOURCE)/common/modules/conf/mapfile-extern \ + -L$(PROTO_AREA)/lib \ + -L$(PROTO_AREA)/usr/lib + +DMOD_LIBS = \ + -lc + +DMOD_SRCS = \ + qemu_mdb.c + +world: qemu qemu.so config-host.mak: PATH=$(CTFBINDIR):$(PATH) CONFIGURE_ONLY=1 ./build.sh +qemu: config-host.mak + echo "Building world" + PATH=$(CTFBINDIR):$(PATH) V=1 gmake all + +qemu.so: $(DMOD_SRCS) + $(CC) $(DMOD_CPPFLAGS) $(DMOD_CFLAGS) $(DMOD_LDFLAGS) -o $@ \ + $(DMOD_SRCS) $(DMOD_LIBS) + + update: git pull --rebase @@ -37,14 +109,17 @@ manifest: cp manifest $(DESTDIR)/$(DESTNAME) install: world - echo "Building install" DESTDIR=$(DESTDIR) PATH=$(CTFBINDIR):$(PATH) V=1 gmake install clean: gmake clean + rm -f qemu.so xref: find . -type f -name '*.[ch]' > cscope.files cscope-fast -bq -.PHONY: manifest xref +check: + $(CSTYLE) qemu_mdb.c + +.PHONY: manifest xref check clean diff --git a/qemu_mdb.c b/qemu_mdb.c new file mode 100644 index 0000000..69b33db --- /dev/null +++ b/qemu_mdb.c @@ -0,0 +1,534 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2012 Joyent, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/mdb_modapi.h> + +#include "hw.h" +#include "pci.h" +#include "hw/virtio.h" +#include "hw/virtio-net.h" +#include "pci_internals.h" +#include "qemu-queue.h" + +/* + * Sigh, this isn't of course defined in any header file, so we just have to + * #include this ourselves. + */ +struct PCIHostBus { + int domain; + struct PCIBus *bus; + QLIST_ENTRY(PCIHostBus) next; +}; + +typedef struct { + PCIDevice pci_dev; + VirtIODevice *vdev; + uint32_t flags; + uint32_t addr; + uint32_t class_code; + uint32_t nvectors; + BlockConf block; + NICConf nic; + uint32_t host_features; +#ifdef CONFIG_LINUX + V9fsConf fsconf; +#endif + /* Max. number of ports we can have for a the virtio-serial device */ + uint32_t max_virtserial_ports; + virtio_net_conf net; + bool ioeventfd_disabled; + bool ioeventfd_started; +} VirtIOPCIProxy; + +typedef struct RAMBlock { + uint8_t *host; + ram_addr_t offset; + ram_addr_t length; + char idstr[256]; + QLIST_ENTRY(RAMBlock) next; +#if defined(__linux__) && !defined(TARGET_S390X) + int fd; +#endif +} RAMBlock; + +typedef struct RAMList { + uint8_t *phys_dirty; + QLIST_HEAD(ram, RAMBlock) blocks; +} RAMList; + +/* + * NDEVICES comes from the PCIDevice structure and should be changed if this + * does ever change. + */ +#define NDEVICES 256 +typedef struct pci_dev_wdata { + struct PCIDevice *pdw_devs[NDEVICES]; + int pdw_idx; +} pci_dev_wdata_t; + +static int +qemu_mdb_host_bus_init(mdb_walk_state_t *wsp) +{ + struct PCIHostBus *head; + GElf_Sym sym; + + if (wsp->walk_addr != NULL) { + mdb_printf("qemu_host_bus does not support local walks"); + return (WALK_ERR); + } + + /* + * The root of the host busses is defined in QEMU as: + * static QLIST_HEAD(, PCIHostBus) host_buses; + * + * However we don't really get a type, it's basically an anoynmous + * struct to mdb. So instead, since the head of the queue list points to + * the first one, we tread it as a struct PCIHOSTBus**. + */ + if (mdb_lookup_by_name("host_buses", &sym) != 0) { + mdb_warn("unable to locate host_buse"); + return (WALK_ERR); + } + + if (mdb_vread(&head, sizeof (head), sym.st_value) != sizeof (head)) { + mdb_warn("failed to read host_buses"); + return (WALK_ERR); + } + + wsp->walk_addr = (uintptr_t)head; + if (head == NULL) + return (WALK_DONE); + + return (WALK_NEXT); +} + + +static int +qemu_mdb_host_bus_step(mdb_walk_state_t *wsp) +{ + struct PCIHostBus bus; + uintptr_t addr = wsp->walk_addr; + + if (addr == NULL) + return (WALK_DONE); + + if (mdb_vread(&bus, sizeof (bus), addr) != sizeof (bus)) { + mdb_warn("failed to read struct PCIHostBus"); + return (WALK_ERR); + } + + wsp->walk_addr = (uintptr_t)bus.next.le_next; + return (wsp->walk_callback(addr, &bus, wsp->walk_cbdata)); +} + +static int +qemu_mdb_pci_device_init(mdb_walk_state_t *wsp) +{ + int ii; + struct PCIBus bus; + struct PCIHostBus host; + pci_dev_wdata_t *pdw; + struct PCIHostBus *headp; + GElf_Sym sym; + uintptr_t baddr; + + /* + * We're going to make some great assumptions here, which in practice + * have been proven true in so far as we care about. Basically that + * there is only one HostBus and that that HostBus in reality only has + * one PCIBus which is the one we care about. So that's what we do here. + */ + if (wsp->walk_addr == NULL) { + if (mdb_lookup_by_name("host_buses", &sym) != 0) { + mdb_warn("unable to locate host_buse"); + return (WALK_ERR); + } + + if (mdb_vread(&headp, sizeof (headp), sym.st_value) != + sizeof (headp)) { + mdb_warn("failed to read host_buses"); + return (WALK_ERR); + } + + if (mdb_vread(&host, sizeof (host), (uintptr_t)headp) != + sizeof (host)) { + mdb_warn("failed to read host bus"); + return (WALK_ERR); + } + + baddr = (uintptr_t)host.bus; + } else { + baddr = wsp->walk_addr; + } + + if (mdb_vread(&bus, sizeof (bus), baddr) != sizeof (bus)) { + mdb_warn("failed to read PCIBus\n"); + return (WALK_ERR); + } + + pdw = mdb_zalloc(sizeof (pci_dev_wdata_t), UM_SLEEP | UM_GC); + (void) bcopy(bus.devices, pdw->pdw_devs, sizeof (bus.devices)); + + /* + * Find the first device. + */ + for (ii = 0; ii < NDEVICES; ii++) + if (pdw->pdw_devs[ii] != NULL) + break; + + if (ii == NDEVICES) + return (WALK_DONE); + + pdw->pdw_idx = ii; + wsp->walk_addr = (uintptr_t)pdw->pdw_devs[ii]; + wsp->walk_data = pdw; + + return (WALK_NEXT); +} + +static int +qemu_mdb_pci_device_step(mdb_walk_state_t *wsp) +{ + PCIDevice dev; + pci_dev_wdata_t *pdw = wsp->walk_data; + uintptr_t addr = wsp->walk_addr; + int ii; + + if (pdw->pdw_idx == NDEVICES) + return (WALK_DONE); + + if (mdb_vread(&dev, sizeof (dev), addr) != sizeof (dev)) { + mdb_warn("couldn't read PCIDevice at %p", addr); + return (WALK_ERR); + } + + for (ii = pdw->pdw_idx + 1; ii < NDEVICES; ii++) + if (pdw->pdw_devs[ii] != NULL) + break; + + pdw->pdw_idx = ii; + if (ii == NDEVICES) + wsp->walk_addr = NULL; + else + wsp->walk_addr = (uintptr_t)pdw->pdw_devs[ii]; + + return (wsp->walk_callback(addr, &dev, wsp->walk_cbdata)); +} + + +static int +qemu_mdb_pci_dev_type_init(mdb_walk_state_t *wsp) +{ + if (wsp->walk_addr != NULL) { + mdb_warn("local walks not supported"); + return (WALK_ERR); + } + + if (wsp->walk_arg == NULL) { + mdb_warn("called into qemu_mdb_pci_dev_type_init with no arg"); + return (WALK_ERR); + } + + if (mdb_layered_walk("qemu_pci_device", wsp) == -1) { + mdb_warn("failed to init layered walk"); + return (WALK_ERR); + } + + return (WALK_NEXT); +} + +static int +qemu_mdb_pci_dev_type_step(mdb_walk_state_t *wsp) +{ + PCIDevice dev; + + if (wsp->walk_addr == NULL) { + mdb_warn("found unexpected null device pointer"); + return (WALK_ERR); + } + + if (mdb_vread(&dev, sizeof (dev), wsp->walk_addr) != sizeof (dev)) { + mdb_warn("failed to read device: %p", wsp->walk_addr); + return (WALK_ERR); + } + + if (strcmp(wsp->walk_arg, dev.name) != 0) + return (WALK_NEXT); + + return (wsp->walk_callback(wsp->walk_addr, &dev, wsp->walk_cbdata)); +} + + +/* + * XXX There is a subtle mdb memory leak here. We're duping the string name for + * the walkers as initial arguments so we can use it as a filter when doing the + * larger walk. This is fine, but right now we're being rather lazy and not + * cleaning up that these exist which means that we need some way to keep track + * of them at some point and free it when we unload. + */ +/*ARGSUSED*/ +static int +qemu_mdb_init_walkers(uintptr_t addr, const PCIDevice *d, void *ignored) +{ + mdb_walker_t w; + size_t len; + char *ndup; + char wname[64]; + char descr[64]; + + (void) mdb_snprintf(descr, sizeof (descr), + "walk the qemu %s devices", d->name); + (void) mdb_snprintf(wname, sizeof (wname), + "qemu_%s", d->name); + + /* Don't forget your null terminator */ + len = strlen(d->name) + 1; + ndup = mdb_alloc(sizeof (char) * len, UM_SLEEP); + (void) strcpy(ndup, d->name); + w.walk_name = wname; + w.walk_descr = descr; + w.walk_init = qemu_mdb_pci_dev_type_init; + w.walk_step = qemu_mdb_pci_dev_type_step; + w.walk_fini = NULL; + w.walk_init_arg = (void *)ndup; + + /* + * XXX Normally this failure would be bad, but we're purposefully being + * lazy and recreating walkers with the same name as ones which already + * exist, e.g. when we have multiple devices of the same general type. + * + * Unfortunately, errno's aren't part of the module API so we have no + * way of distinguishing them. So we just swallow all of them for now. + */ + if (mdb_add_walker(&w) == -1) + mdb_free(ndup, len * sizeof (char)); + + return (0); +} + +static int +qemu_mdb_init(void) +{ + + mdb_walker_t w = { "qemu_pci_device", + "walk a PCI Bus's attached devices", qemu_mdb_pci_device_init, + qemu_mdb_pci_device_step, NULL }; + + if (mdb_add_walker(&w) == -1) { + mdb_warn("failed to add qemu_pci_device walker"); + return (-1); + } + + (void) mdb_walk("qemu_pci_device", (mdb_walk_cb_t)qemu_mdb_init_walkers, + NULL); + return (0); +} + +static int +qemu_mdb_pcidev2virtio(uintptr_t addr, uint_t flags, int argc, + const mdb_arg_t *argv) +{ + VirtIOPCIProxy v; + + if (!(flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); + + if (argc > 1) + return (DCMD_USAGE); + + if (mdb_vread(&v, sizeof (v), addr) != sizeof (v)) { + mdb_warn("failed to read Virtio Proxy structure"); + return (DCMD_ERR); + } + + mdb_printf("%lr\n", v.vdev); + + return (DCMD_OK); +} + +/* + * These are a series of definitions that we need for qemu_mdb_tpa2qva. Note + * that while most of them have the same name, unofrutnately qemu has a #pragma + * poinson on some of them that prevents us from using them without changing the + * name. + */ +typedef struct PhysPageDesc { + /* offset in host memory of the page + io_index in the low bits */ + ram_addr_t phys_offset; + ram_addr_t region_offset; +} PhysPageDesc; + +#define MDB_TARGET_PAGE_BITS 12 +#define MDB_TARGET_PAGE_SIZE (1 << MDB_TARGET_PAGE_BITS) +#define MDB_TARGET_PAGE_MASK ~(MDB_TARGET_PAGE_SIZE - 1) +#define TARGET_VIRT_ADDR_SPACE_BITS 47 +#define TARGET_PHYS_ADDR_SPACE_BITS 52 +#define L2_BITS 10 +#define L2_SIZE (1 << L2_BITS) +#define P_L1_BITS_REM \ + ((TARGET_PHYS_ADDR_SPACE_BITS - MDB_TARGET_PAGE_BITS) % L2_BITS) +#if P_L1_BITS_REM < 4 +#define P_L1_BITS (P_L1_BITS_REM + L2_BITS) +#else +#define P_L1_BITS P_L1_BITS_REM +#endif +#define P_L1_SIZE ((uintptr_t)1 << P_L1_BITS) +#define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - MDB_TARGET_PAGE_BITS - \ + P_L1_BITS) + +static uintptr_t +qemu_mdb_get_ram_ptr(uintptr_t addr) +{ + GElf_Sym sym; + RAMList rl; + uintptr_t rbp; + RAMBlock rb; + + if (mdb_lookup_by_name("ram_list", &sym) != 0) { + mdb_warn("failed to look up ram_list"); + return (0); + } + + if (mdb_vread(&rl, sizeof (rl), sym.st_value) != sizeof (rl)) { + mdb_warn("failed to read ram_list"); + return (0); + } + + rbp = (uintptr_t)rl.blocks.lh_first; + for (;;) { + if (rbp == NULL) { + mdb_warn("failed to find RAMBlock for address"); + return (0); + } + + if (mdb_vread(&rb, sizeof (rb), rbp) != sizeof (rb)) { + mdb_warn("failed to read RAMBlock %p", rbp); + return (0); + } + + if (addr - rb.offset < rb.length) + break; + + rbp = (uintptr_t)rb.next.le_next; + } + + return ((uintptr_t)(rb.host + (addr - rb.offset))); +} + +static int +qemu_mdb_tpa2qva(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + GElf_Sym sym; + void **lp, **p; + int ii; + PhysPageDesc *pdp, pd; + uintptr_t paddr, pfaddr, vptr; + + if (!(flags & DCMD_ADDRSPEC)) + return (DCMD_USAGE); + + if (argc > 1) + return (DCMD_USAGE); + + if (mdb_lookup_by_name("l1_phys_map", &sym) != 0) { + mdb_warn("unable to locate host_buse"); + return (DCMD_ERR); + } + + lp = (void **)sym.st_value; + pfaddr = addr >> MDB_TARGET_PAGE_BITS; + lp += ((pfaddr >> P_L1_SHIFT) & (P_L1_SIZE - 1)); + + for (ii = P_L1_SHIFT / L2_BITS - 1; ii > 0; ii--) { + if (mdb_vread(&p, sizeof (p), (uintptr_t)lp) != sizeof (p)) { + mdb_warn("failed to read into l1 page table"); + return (DCMD_ERR); + } + + if (p == NULL) { + mdb_warn("found a null entry, bailing"); + return (DCMD_ERR); + } + + lp = p + ((pfaddr >> (ii * L2_BITS)) & (L2_SIZE - 1)); + } + + if (mdb_vread(&pdp, sizeof (pdp), (uintptr_t)lp) != sizeof (pdp)) { + mdb_warn("failed to read into the PhysPageDesc"); + return (DCMD_ERR); + } + + if (pdp == NULL) { + mdb_warn("found null PhysPageDesc, bailing"); + return (DCMD_ERR); + } + + pdp += (pfaddr & (L2_SIZE - 1)); + if (mdb_vread(&pd, sizeof (pd), (uintptr_t)pdp) != sizeof (pd)) { + mdb_warn("failed to read pdp"); + return (DCMD_ERR); + } + + paddr = pd.phys_offset; + if ((paddr & ~MDB_TARGET_PAGE_MASK) > IO_MEM_ROM && + !(paddr & IO_MEM_ROMD)) { + mdb_printf("Address is in I/O space. Not touching it."); + return (DCMD_OK); + } + + vptr = qemu_mdb_get_ram_ptr(paddr & MDB_TARGET_PAGE_MASK); + if (vptr == 0) + return (DCMD_ERR); + vptr += addr & ~MDB_TARGET_PAGE_MASK; + mdb_printf("%lr\n", vptr); + + return (DCMD_OK); +} + +static const mdb_dcmd_t qemu_dcmds[] = { + { "pcidev2virtio", NULL, "translate a virtio PCI device to its " + "virtio equivalent", qemu_mdb_pcidev2virtio }, + { "qemu_tpa2qva", NULL, "translate a target physical address to a " + "QEMU virtual address", qemu_mdb_tpa2qva }, + { NULL } +}; + +static const mdb_walker_t qemu_walkers[] = { + { "qemu_host_bus", "walk qemu PCIHostBus structures", + qemu_mdb_host_bus_init, qemu_mdb_host_bus_step, NULL }, + { NULL } +}; + +static const mdb_modinfo_t qemu_mdb_modinfo = { MDB_API_VERSION, qemu_dcmds, + qemu_walkers }; + +const mdb_modinfo_t * +_mdb_init(void) +{ + if (qemu_mdb_init() != 0) + return (NULL); + + return (&qemu_mdb_modinfo); +} |