diff options
author | Patrick Mooney <pmooney@pfmooney.com> | 2020-05-27 16:07:43 +0000 |
---|---|---|
committer | Patrick Mooney <pmooney@pfmooney.com> | 2020-06-12 15:11:53 +0000 |
commit | 154972aff898a787b38af3bab5b8d754b5a42447 (patch) | |
tree | 9b2711fa2ad290471317b3edc1824a4f68f1a197 | |
parent | 440a8a36792bdf9ef51639066aab0b7771ffcab8 (diff) | |
download | illumos-joyent-154972aff898a787b38af3bab5b8d754b5a42447.tar.gz |
12792 bhyve upstream sync 2020 May
Reviewed by: Mike Zeller <mike.zeller@joyent.com>
Approved by: Robert Mustacchi <rm@fingolfin.org>
89 files changed, 3378 insertions, 1233 deletions
diff --git a/exception_lists/cstyle b/exception_lists/cstyle index b57430207d..2572af785d 100644 --- a/exception_lists/cstyle +++ b/exception_lists/cstyle @@ -1340,6 +1340,7 @@ usr/src/cmd/bhyve/bootrom.[ch] usr/src/cmd/bhyve/console.[ch] usr/src/cmd/bhyve/consport.c usr/src/cmd/bhyve/dbgport.[ch] +usr/src/cmd/bhyve/debug.h usr/src/cmd/bhyve/fwctl.[ch] usr/src/cmd/bhyve/gdb.[ch] usr/src/cmd/bhyve/hda_codec.c @@ -1348,6 +1349,7 @@ usr/src/cmd/bhyve/hdac_reg.h usr/src/cmd/bhyve/inout.[ch] usr/src/cmd/bhyve/ioapic.[ch] usr/src/cmd/bhyve/iov.[ch] +usr/src/cmd/bhyve/kernemu_dev.[ch] usr/src/cmd/bhyve/mem.[ch] usr/src/cmd/bhyve/mevent.[ch] usr/src/cmd/bhyve/mevent_test.c @@ -1386,6 +1388,7 @@ usr/src/cmd/bhyve/uart_emul.[ch] usr/src/cmd/bhyve/usb_emul.[ch] usr/src/cmd/bhyve/usb_mouse.c usr/src/cmd/bhyve/vga.[ch] +usr/src/cmd/bhyve/vmgenc.[ch] usr/src/cmd/bhyve/virtio.[ch] usr/src/cmd/bhyve/xmsr.[ch] usr/src/cmd/bhyveconsole/bhyveconsole.c diff --git a/exception_lists/hdrchk b/exception_lists/hdrchk index 0889b92d8a..79d1e7af52 100644 --- a/exception_lists/hdrchk +++ b/exception_lists/hdrchk @@ -387,8 +387,10 @@ usr/src/cmd/bhyve/block_if.h usr/src/cmd/bhyve/bootrom.h usr/src/cmd/bhyve/console.h usr/src/cmd/bhyve/dbgport.h +usr/src/cmd/bhyve/debug.h usr/src/cmd/bhyve/inout.h usr/src/cmd/bhyve/ioapic.h +usr/src/cmd/bhyve/kernemu_dev.h usr/src/cmd/bhyve/mem.h usr/src/cmd/bhyve/mptbl.h usr/src/cmd/bhyve/pci_emul.h @@ -404,6 +406,7 @@ usr/src/cmd/bhyve/sockstream.h usr/src/cmd/bhyve/spinup_ap.h usr/src/cmd/bhyve/uart_emul.h usr/src/cmd/bhyve/vga.h +usr/src/cmd/bhyve/vmgenc.h usr/src/cmd/bhyve/virtio.h usr/src/cmd/bhyve/xmsr.h usr/src/compat/bhyve/* diff --git a/usr/src/cmd/bhyve/Makefile b/usr/src/cmd/bhyve/Makefile index 4cff9d9cf9..348a8988fe 100644 --- a/usr/src/cmd/bhyve/Makefile +++ b/usr/src/cmd/bhyve/Makefile @@ -75,11 +75,16 @@ SRCS = acpi.c \ vga.c \ virtio.c \ vmm_instruction_emul.c \ + vmgenc.c \ xmsr.c \ spinup_ap.c \ iov.c \ bhyve_sol_glue.c +# We are not yet performing instruction emulation in userspace, so going to the +# trouble of fixing the header tangle for this is not worth the complexity. + #kernemu_dev.c \ + # The virtio-scsi driver appears to include a slew of materials from FreeBSD's # native SCSI implementation. We will omit that complexity for now. #ctl_util.c \ @@ -122,6 +127,7 @@ CPPFLAGS = -I$(COMPAT)/bhyve -I$(CONTRIB)/bhyve \ CPPFLAGS += -DNO_OPENSSL pci_nvme.o := CERRWARN += -_gcc=-Wno-pointer-sign +pci_nvme.o := SMOFF += kmalloc_wrong_size SMOFF += all_func_returns,leaks,no_if_block diff --git a/usr/src/cmd/bhyve/acpi.c b/usr/src/cmd/bhyve/acpi.c index 862f4512f8..76ddf5f5f6 100644 --- a/usr/src/cmd/bhyve/acpi.c +++ b/usr/src/cmd/bhyve/acpi.c @@ -74,6 +74,7 @@ __FBSDID("$FreeBSD$"); #include "bhyverun.h" #include "acpi.h" #include "pci_emul.h" +#include "vmgenc.h" /* * Define the base address of the ACPI tables, the sizes of some tables, @@ -317,11 +318,11 @@ basl_fwrite_madt(FILE *fp) /* Local APIC NMI is connected to LINT 1 on all CPUs */ EFPRINTF(fp, "[0001]\t\tSubtable Type : 04\n"); EFPRINTF(fp, "[0001]\t\tLength : 06\n"); - EFPRINTF(fp, "[0001]\t\tProcessorId : FF\n"); + EFPRINTF(fp, "[0001]\t\tProcessor ID : FF\n"); EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0005\n"); EFPRINTF(fp, "\t\t\tPolarity : 1\n"); EFPRINTF(fp, "\t\t\tTrigger Mode : 1\n"); - EFPRINTF(fp, "[0001]\t\tInterrupt : 01\n"); + EFPRINTF(fp, "[0001]\t\tInterrupt Input LINT : 01\n"); EFPRINTF(fp, "\n"); EFFLUSH(fp); @@ -375,13 +376,13 @@ basl_fwrite_fadt(FILE *fp) EFPRINTF(fp, "[0004]\t\tPM2 Control Block Address : 00000000\n"); EFPRINTF(fp, "[0004]\t\tPM Timer Block Address : %08X\n", IO_PMTMR); - EFPRINTF(fp, "[0004]\t\tGPE0 Block Address : 00000000\n"); + EFPRINTF(fp, "[0004]\t\tGPE0 Block Address : %08X\n", IO_GPE0_BLK); EFPRINTF(fp, "[0004]\t\tGPE1 Block Address : 00000000\n"); EFPRINTF(fp, "[0001]\t\tPM1 Event Block Length : 04\n"); EFPRINTF(fp, "[0001]\t\tPM1 Control Block Length : 02\n"); EFPRINTF(fp, "[0001]\t\tPM2 Control Block Length : 00\n"); EFPRINTF(fp, "[0001]\t\tPM Timer Block Length : 04\n"); - EFPRINTF(fp, "[0001]\t\tGPE0 Block Length : 00\n"); + EFPRINTF(fp, "[0001]\t\tGPE0 Block Length : %02x\n", IO_GPE0_LEN); EFPRINTF(fp, "[0001]\t\tGPE1 Block Length : 00\n"); EFPRINTF(fp, "[0001]\t\tGPE1 Base Offset : 00\n"); EFPRINTF(fp, "[0001]\t\t_CST Support : 00\n"); @@ -509,10 +510,10 @@ basl_fwrite_fadt(FILE *fp) EFPRINTF(fp, "[0012]\t\tGPE0 Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); - EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); + EFPRINTF(fp, "[0001]\t\tBit Width : %02x\n", IO_GPE0_LEN * 8); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); - EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); + EFPRINTF(fp, "[0008]\t\tAddress : %016X\n", IO_GPE0_BLK); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tGPE1 Block : [Generic Address Structure]\n"); @@ -568,7 +569,7 @@ basl_fwrite_hpet(FILE *fp) EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); EFPRINTF(fp, "\n"); - EFPRINTF(fp, "[0004]\t\tTimer Block ID : %08X\n", hpet_capabilities); + EFPRINTF(fp, "[0004]\t\tHardware Block ID : %08X\n", hpet_capabilities); EFPRINTF(fp, "[0012]\t\tTimer Block Register : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 00 [SystemMemory]\n"); @@ -579,7 +580,7 @@ basl_fwrite_hpet(FILE *fp) EFPRINTF(fp, "[0008]\t\tAddress : 00000000FED00000\n"); EFPRINTF(fp, "\n"); - EFPRINTF(fp, "[0001]\t\tHPET Number : 00\n"); + EFPRINTF(fp, "[0001]\t\tSequence Number : 00\n"); EFPRINTF(fp, "[0002]\t\tMinimum Clock Ticks : 0000\n"); EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n"); EFPRINTF(fp, "\t\t\t4K Page Protect : 1\n"); @@ -615,9 +616,9 @@ basl_fwrite_mcfg(FILE *fp) EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0008]\t\tBase Address : %016lX\n", pci_ecfg_base()); - EFPRINTF(fp, "[0002]\t\tSegment Group: 0000\n"); - EFPRINTF(fp, "[0001]\t\tStart Bus: 00\n"); - EFPRINTF(fp, "[0001]\t\tEnd Bus: FF\n"); + EFPRINTF(fp, "[0002]\t\tSegment Group Number : 0000\n"); + EFPRINTF(fp, "[0001]\t\tStart Bus Number : 00\n"); + EFPRINTF(fp, "[0001]\t\tEnd Bus Number : FF\n"); EFPRINTF(fp, "[0004]\t\tReserved : 0\n"); EFFLUSH(fp); return (0); @@ -764,6 +765,9 @@ basl_fwrite_dsdt(FILE *fp) dsdt_line(" })"); dsdt_line(" }"); dsdt_line(" }"); + + vmgenc_write_dsdt(); + dsdt_line("}"); if (dsdt_error != 0) diff --git a/usr/src/cmd/bhyve/acpi.h b/usr/src/cmd/bhyve/acpi.h index 4c6d86d091..50e5337f33 100644 --- a/usr/src/cmd/bhyve/acpi.h +++ b/usr/src/cmd/bhyve/acpi.h @@ -42,9 +42,19 @@ #define IO_PMTMR 0x408 /* 4-byte i/o port for the timer */ +#define IO_GPE0_BLK 0x40c /* 2x 1-byte IO port for GPE0_STS/EN */ +#define IO_GPE0_LEN 0x2 + +#define IO_GPE0_STS IO_GPE0_BLK +#define IO_GPE0_EN (IO_GPE0_BLK + (IO_GPE0_LEN / 2)) + +/* Allocated GPE bits. */ +#define GPE_VMGENC 0 + struct vmctx; int acpi_build(struct vmctx *ctx, int ncpu); +void acpi_raise_gpe(struct vmctx *ctx, unsigned bit); void dsdt_line(const char *fmt, ...); void dsdt_fixed_ioport(uint16_t iobase, uint16_t length); void dsdt_fixed_irq(uint8_t irq); diff --git a/usr/src/cmd/bhyve/audio.c b/usr/src/cmd/bhyve/audio.c index 15e370284e..ee6bdabc54 100644 --- a/usr/src/cmd/bhyve/audio.c +++ b/usr/src/cmd/bhyve/audio.c @@ -92,7 +92,7 @@ audio_init(const char *dev_name, uint8_t dir) if (strlen(dev_name) < sizeof(aud->dev_name)) memcpy(aud->dev_name, dev_name, strlen(dev_name) + 1); else { - DPRINTF("dev_name too big\n"); + DPRINTF("dev_name too big"); free(aud); return NULL; } @@ -101,7 +101,7 @@ audio_init(const char *dev_name, uint8_t dir) aud->fd = open(aud->dev_name, aud->dir ? O_WRONLY : O_RDONLY, 0); if (aud->fd == -1) { - DPRINTF("Failed to open dev: %s, errno: %d\n", + DPRINTF("Failed to open dev: %s, errno: %d", aud->dev_name, errno); free(aud); return (NULL); @@ -137,7 +137,7 @@ audio_set_params(struct audio *aud, struct audio_params *params) assert(params); if ((audio_fd = aud->fd) < 0) { - DPRINTF("Incorrect audio device descriptor for %s\n", + DPRINTF("Incorrect audio device descriptor for %s", aud->dev_name); return (-1); } @@ -146,7 +146,7 @@ audio_set_params(struct audio *aud, struct audio_params *params) if (aud->inited) { err = ioctl(audio_fd, SNDCTL_DSP_RESET, NULL); if (err == -1) { - DPRINTF("Failed to reset fd: %d, errno: %d\n", + DPRINTF("Failed to reset fd: %d, errno: %d", aud->fd, errno); return (-1); } @@ -157,14 +157,14 @@ audio_set_params(struct audio *aud, struct audio_params *params) format = params->format; err = ioctl(audio_fd, SNDCTL_DSP_SETFMT, &format); if (err == -1) { - DPRINTF("Fail to set fmt: 0x%x errno: %d\n", + DPRINTF("Fail to set fmt: 0x%x errno: %d", params->format, errno); return -1; } /* The device does not support the requested audio format */ if (format != params->format) { - DPRINTF("Mismatch format: 0x%x params->format: 0x%x\n", + DPRINTF("Mismatch format: 0x%x params->format: 0x%x", format, params->format); return -1; } @@ -173,14 +173,14 @@ audio_set_params(struct audio *aud, struct audio_params *params) channels = params->channels; err = ioctl(audio_fd, SNDCTL_DSP_CHANNELS, &channels); if (err == -1) { - DPRINTF("Fail to set channels: %d errno: %d\n", + DPRINTF("Fail to set channels: %d errno: %d", params->channels, errno); return -1; } /* The device does not support the requested no. of channels */ if (channels != params->channels) { - DPRINTF("Mismatch channels: %d params->channels: %d\n", + DPRINTF("Mismatch channels: %d params->channels: %d", channels, params->channels); return -1; } @@ -189,14 +189,14 @@ audio_set_params(struct audio *aud, struct audio_params *params) rate = params->rate; err = ioctl(audio_fd, SNDCTL_DSP_SPEED, &rate); if (err == -1) { - DPRINTF("Fail to set speed: %d errno: %d\n", + DPRINTF("Fail to set speed: %d errno: %d", params->rate, errno); return -1; } /* The device does not support the requested rate / speed */ if (rate != params->rate) { - DPRINTF("Mismatch rate: %d params->rate: %d\n", + DPRINTF("Mismatch rate: %d params->rate: %d", rate, params->rate); return -1; } @@ -205,10 +205,10 @@ audio_set_params(struct audio *aud, struct audio_params *params) err = ioctl(audio_fd, aud->dir ? SNDCTL_DSP_GETOSPACE : SNDCTL_DSP_GETISPACE, &info); if (err == -1) { - DPRINTF("Fail to get audio buf info errno: %d\n", errno); + DPRINTF("Fail to get audio buf info errno: %d", errno); return -1; } - DPRINTF("fragstotal: 0x%x fragsize: 0x%x\n", + DPRINTF("fragstotal: 0x%x fragsize: 0x%x", info.fragstotal, info.fragsize); #endif return 0; @@ -237,7 +237,7 @@ audio_playback(struct audio *aud, const void *buf, size_t count) while (total < count) { len = write(audio_fd, buf + total, count - total); if (len == -1) { - DPRINTF("Fail to write to fd: %d, errno: %d\n", + DPRINTF("Fail to write to fd: %d, errno: %d", audio_fd, errno); return -1; } @@ -273,7 +273,7 @@ audio_record(struct audio *aud, void *buf, size_t count) while (total < count) { len = read(audio_fd, buf + total, count - total); if (len == -1) { - DPRINTF("Fail to write to fd: %d, errno: %d\n", + DPRINTF("Fail to write to fd: %d, errno: %d", audio_fd, errno); return -1; } diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c index 9a7fc859e4..378cde054b 100644 --- a/usr/src/cmd/bhyve/bhyverun.c +++ b/usr/src/cmd/bhyve/bhyverun.c @@ -88,11 +88,14 @@ __FBSDID("$FreeBSD$"); #include "acpi.h" #include "atkbdc.h" #include "console.h" +#include "bootrom.h" #include "inout.h" #include "dbgport.h" +#include "debug.h" #include "fwctl.h" #include "gdb.h" #include "ioapic.h" +#include "kernemu_dev.h" #include "mem.h" #include "mevent.h" #include "mptbl.h" @@ -105,6 +108,7 @@ __FBSDID("$FreeBSD$"); #include "rfb.h" #include "rtc.h" #include "vga.h" +#include "vmgenc.h" #define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ @@ -188,6 +192,9 @@ uint16_t cores, maxcpus, sockets, threads; char *guest_uuid_str; +int raw_stdio = 0; + +static int gdb_port = 0; static int guest_vmexit_on_hlt, guest_vmexit_on_pause; static int virtio_msix = 1; static int x2apic_mode = 0; /* default is xAPIC */ @@ -487,7 +494,8 @@ fbsdrun_start_thread(void *param) snprintf(tname, sizeof(tname), "vcpu %d", vcpu); pthread_set_name_np(mtp->mt_thr, tname); - gdb_cpu_add(vcpu); + if (gdb_port != 0) + gdb_cpu_add(vcpu); vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); @@ -772,8 +780,11 @@ vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) stats.vmexit_mtrap++; + if (gdb_port == 0) { + fprintf(stderr, "vm_loop: unexpected VMEXIT_MTRAP\n"); + exit(4); + } gdb_cpu_mtrap(*pvcpu); - return (VMEXIT_CONTINUE); } @@ -791,16 +802,14 @@ vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) if (err) { if (err == ESRCH) { - fprintf(stderr, "Unhandled memory access to 0x%lx\n", + EPRINTLN("Unhandled memory access to 0x%lx\n", vmexit->u.inst_emul.gpa); } - fprintf(stderr, "Failed to emulate instruction ["); - for (i = 0; i < vie->num_valid; i++) { - fprintf(stderr, "0x%02x%s", vie->inst[i], - i != (vie->num_valid - 1) ? " " : ""); - } - fprintf(stderr, "] at 0x%lx\n", vmexit->rip); + fprintf(stderr, "Failed to emulate instruction sequence [ "); + for (i = 0; i < vie->num_valid; i++) + fprintf(stderr, "%02x", vie->inst[i]); + FPRINTLN(stderr, " ] at 0x%lx", vmexit->rip); return (VMEXIT_ABORT); } @@ -852,10 +861,26 @@ static int vmexit_debug(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { + if (gdb_port == 0) { + fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n"); + exit(4); + } gdb_cpu_suspend(*pvcpu); return (VMEXIT_CONTINUE); } +static int +vmexit_breakpoint(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) +{ + + if (gdb_port == 0) { + fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n"); + exit(4); + } + gdb_cpu_breakpoint(*pvcpu, vmexit); + return (VMEXIT_CONTINUE); +} + static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_INOUT] = vmexit_inout, [VM_EXITCODE_INOUT_STR] = vmexit_inout, @@ -871,6 +896,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_SUSPENDED] = vmexit_suspend, [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, [VM_EXITCODE_DEBUG] = vmexit_debug, + [VM_EXITCODE_BPT] = vmexit_breakpoint, }; static void @@ -1060,7 +1086,7 @@ do_open(const char *vmname) int main(int argc, char *argv[]) { - int c, error, dbg_port, gdb_port, err, bvmcons; + int c, error, dbg_port, err, bvmcons; int max_vcpus, mptgen, memflags; int rtc_localtime; bool gdb_stop; @@ -1075,7 +1101,6 @@ main(int argc, char *argv[]) bvmcons = 0; progname = basename(argv[0]); dbg_port = 0; - gdb_port = 0; gdb_stop = false; guest_ncpus = 1; sockets = cores = threads = 1; @@ -1251,6 +1276,10 @@ main(int argc, char *argv[]) init_mem(); init_inout(); +#ifdef __FreeBSD__ + kernemu_dev_init(); +#endif + init_bootrom(ctx); atkbdc_init(ctx); pci_irq_init(ctx); ioapic_init(ctx); @@ -1266,6 +1295,13 @@ main(int argc, char *argv[]) exit(4); } + /* + * Initialize after PCI, to allow a bootrom file to reserve the high + * region. + */ + if (acpi) + vmgenc_init(ctx); + if (dbg_port != 0) init_dbgport(dbg_port); diff --git a/usr/src/cmd/bhyve/block_if.c b/usr/src/cmd/bhyve/block_if.c index 8278bf3f92..23a04c0f5b 100644 --- a/usr/src/cmd/bhyve/block_if.c +++ b/usr/src/cmd/bhyve/block_if.c @@ -3,6 +3,7 @@ * * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> * All rights reserved. + * Copyright 2020 Joyent, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -68,6 +69,7 @@ __FBSDID("$FreeBSD$"); #include <machine/atomic.h> #include "bhyverun.h" +#include "debug.h" #ifdef __FreeBSD__ #include "mevent.h" #endif @@ -544,7 +546,7 @@ blockif_open(const char *optstr, const char *ident) else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1) pssopt = ssopt; else { - fprintf(stderr, "Invalid device option \"%s\"\n", cp); + EPRINTLN("Invalid device option \"%s\"", cp); goto err; } } @@ -692,7 +694,7 @@ blockif_open(const char *optstr, const char *ident) if (ssopt != 0) { if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 || ssopt > pssopt) { - fprintf(stderr, "Invalid sector size %d/%d\n", + EPRINTLN("Invalid sector size %d/%d", ssopt, pssopt); goto err; } @@ -706,8 +708,8 @@ blockif_open(const char *optstr, const char *ident) */ if (S_ISCHR(sbuf.st_mode)) { if (ssopt < sectsz || (ssopt % sectsz) != 0) { - fprintf(stderr, "Sector size %d incompatible " - "with underlying device sector size %d\n", + EPRINTLN("Sector size %d incompatible " + "with underlying device sector size %d", ssopt, sectsz); goto err; } diff --git a/usr/src/cmd/bhyve/bootrom.c b/usr/src/cmd/bhyve/bootrom.c index b8c63828c8..38a50490eb 100644 --- a/usr/src/cmd/bhyve/bootrom.c +++ b/usr/src/cmd/bhyve/bootrom.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include <machine/vmm.h> +#include <err.h> #include <errno.h> #include <fcntl.h> #include <stdio.h> @@ -45,63 +46,130 @@ __FBSDID("$FreeBSD$"); #include <vmmapi.h> #include "bhyverun.h" #include "bootrom.h" +#include "debug.h" -#define MAX_BOOTROM_SIZE (16 * 1024 * 1024) /* 16 MB */ +#define BOOTROM_SIZE (16 * 1024 * 1024) /* 16 MB */ + +/* + * ROM region is 16 MB at the top of 4GB ("low") memory. + * + * The size is limited so it doesn't encroach into reserved MMIO space (e.g., + * APIC, HPET, MSI). + * + * It is allocated in page-multiple blocks on a first-come first-serve basis, + * from high to low, during initialization, and does not change at runtime. + */ +static char *romptr; /* Pointer to userspace-mapped bootrom region. */ +static vm_paddr_t gpa_base; /* GPA of low end of region. */ +static vm_paddr_t gpa_allocbot; /* Low GPA of free region. */ +static vm_paddr_t gpa_alloctop; /* High GPA, minus 1, of free region. */ + +void +init_bootrom(struct vmctx *ctx) +{ + romptr = vm_create_devmem(ctx, VM_BOOTROM, "bootrom", BOOTROM_SIZE); + if (romptr == MAP_FAILED) + err(4, "%s: vm_create_devmem", __func__); + gpa_base = (1ULL << 32) - BOOTROM_SIZE; + gpa_allocbot = gpa_base; + gpa_alloctop = (1ULL << 32) - 1; +} int -bootrom_init(struct vmctx *ctx, const char *romfile) +bootrom_alloc(struct vmctx *ctx, size_t len, int prot, int flags, + char **region_out, uint64_t *gpa_out) { - struct stat sbuf; + static const int bootrom_valid_flags = BOOTROM_ALLOC_TOP; + vm_paddr_t gpa; + vm_ooffset_t segoff; + + if (flags & ~bootrom_valid_flags) { + warnx("%s: Invalid flags: %x", __func__, + flags & ~bootrom_valid_flags); + return (EINVAL); + } + if (prot & ~_PROT_ALL) { + warnx("%s: Invalid protection: %x", __func__, + prot & ~_PROT_ALL); + return (EINVAL); + } + + if (len == 0 || len > BOOTROM_SIZE) { + warnx("ROM size %zu is invalid", len); + return (EINVAL); + } + if (len & PAGE_MASK) { + warnx("ROM size %zu is not a multiple of the page size", + len); + return (EINVAL); + } + + if (flags & BOOTROM_ALLOC_TOP) { + gpa = (gpa_alloctop - len) + 1; + if (gpa < gpa_allocbot) { + warnx("No room for %zu ROM in bootrom region", len); + return (ENOMEM); + } + } else { + gpa = gpa_allocbot; + if (gpa > (gpa_alloctop - len) + 1) { + warnx("No room for %zu ROM in bootrom region", len); + return (ENOMEM); + } + } + + segoff = gpa - gpa_base; + if (vm_mmap_memseg(ctx, gpa, VM_BOOTROM, segoff, len, prot) != 0) { + int serrno = errno; + warn("%s: vm_mmap_mapseg", __func__); + return (serrno); + } + + if (flags & BOOTROM_ALLOC_TOP) + gpa_alloctop = gpa - 1; + else + gpa_allocbot = gpa + len; + + *region_out = romptr + segoff; + if (gpa_out != NULL) + *gpa_out = gpa; + return (0); +} + +int +bootrom_loadrom(struct vmctx *ctx, const char *romfile) +{ + struct stat sbuf; ssize_t rlen; char *ptr; - int fd, i, rv, prot; + int fd, i, rv; rv = -1; fd = open(romfile, O_RDONLY); if (fd < 0) { - fprintf(stderr, "Error opening bootrom \"%s\": %s\n", + EPRINTLN("Error opening bootrom \"%s\": %s", romfile, strerror(errno)); goto done; } if (fstat(fd, &sbuf) < 0) { - fprintf(stderr, "Could not fstat bootrom file \"%s\": %s\n", + EPRINTLN("Could not fstat bootrom file \"%s\": %s", romfile, strerror(errno)); goto done; } - /* - * Limit bootrom size to 16MB so it doesn't encroach into reserved - * MMIO space (e.g. APIC, HPET, MSI). - */ - if (sbuf.st_size > MAX_BOOTROM_SIZE || sbuf.st_size < PAGE_SIZE) { - fprintf(stderr, "Invalid bootrom size %ld\n", sbuf.st_size); - goto done; - } - - if (sbuf.st_size & PAGE_MASK) { - fprintf(stderr, "Bootrom size %ld is not a multiple of the " - "page size\n", sbuf.st_size); - goto done; - } - - ptr = vm_create_devmem(ctx, VM_BOOTROM, "bootrom", sbuf.st_size); - if (ptr == MAP_FAILED) - goto done; - /* Map the bootrom into the guest address space */ - prot = PROT_READ | PROT_EXEC; - gpa = (1ULL << 32) - sbuf.st_size; - if (vm_mmap_memseg(ctx, gpa, VM_BOOTROM, 0, sbuf.st_size, prot) != 0) + if (bootrom_alloc(ctx, sbuf.st_size, PROT_READ | PROT_EXEC, + BOOTROM_ALLOC_TOP, &ptr, NULL) != 0) goto done; /* Read 'romfile' into the guest address space */ for (i = 0; i < sbuf.st_size / PAGE_SIZE; i++) { rlen = read(fd, ptr + i * PAGE_SIZE, PAGE_SIZE); if (rlen != PAGE_SIZE) { - fprintf(stderr, "Incomplete read of page %d of bootrom " - "file %s: %ld bytes\n", i, romfile, rlen); + EPRINTLN("Incomplete read of page %d of bootrom " + "file %s: %ld bytes", i, romfile, rlen); goto done; } } diff --git a/usr/src/cmd/bhyve/bootrom.h b/usr/src/cmd/bhyve/bootrom.h index 7fb12181dd..da802343ee 100644 --- a/usr/src/cmd/bhyve/bootrom.h +++ b/usr/src/cmd/bhyve/bootrom.h @@ -32,9 +32,19 @@ #define _BOOTROM_H_ #include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <limits.h> struct vmctx; -int bootrom_init(struct vmctx *ctx, const char *romfile); +void init_bootrom(struct vmctx *ctx); +enum { + BOOTROM_ALLOC_TOP = 0x80, + _FORCE_INT = INT_MIN, +}; +int bootrom_alloc(struct vmctx *ctx, size_t len, int prot, int flags, + char **region_out, uint64_t *gpa_out); +int bootrom_loadrom(struct vmctx *ctx, const char *romfile); #endif diff --git a/usr/src/cmd/bhyve/consport.c b/usr/src/cmd/bhyve/consport.c index cda2df2414..42ba910f76 100644 --- a/usr/src/cmd/bhyve/consport.c +++ b/usr/src/cmd/bhyve/consport.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include "inout.h" #include "pci_lpc.h" +#include "debug.h" #define BVM_CONSOLE_PORT 0x220 #define BVM_CONS_SIG ('b' << 8 | 'v') @@ -73,6 +74,7 @@ ttyopen(void) cfmakeraw(&tio_new); tcsetattr(STDIN_FILENO, TCSANOW, &tio_new); + raw_stdio = 1; atexit(ttyclose); #endif diff --git a/usr/src/cmd/bhyve/debug.h b/usr/src/cmd/bhyve/debug.h new file mode 100644 index 0000000000..f63e0a9ef1 --- /dev/null +++ b/usr/src/cmd/bhyve/debug.h @@ -0,0 +1,47 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Vincenzo Maffione <vmaffione@freebsd.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _DEBUG_H_ +#define _DEBUG_H_ + + +extern int raw_stdio; + +#define FPRINTLN(filep, fmt, arg...) \ + do { \ + if (raw_stdio) \ + fprintf(filep, fmt "\r\n", ##arg); \ + else \ + fprintf(filep, fmt "\n", ##arg); \ + } while (0) + +#define PRINTLN(fmt, arg...) FPRINTLN(stdout, fmt, ##arg) +#define EPRINTLN(fmt, arg...) FPRINTLN(stderr, fmt, ##arg) + +#endif diff --git a/usr/src/cmd/bhyve/gdb.c b/usr/src/cmd/bhyve/gdb.c index 06809860c6..6f6884eee8 100644 --- a/usr/src/cmd/bhyve/gdb.c +++ b/usr/src/cmd/bhyve/gdb.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #endif #include <sys/ioctl.h> #include <sys/mman.h> +#include <sys/queue.h> #include <sys/socket.h> #include <machine/atomic.h> #include <machine/specialreg.h> @@ -62,6 +63,7 @@ __FBSDID("$FreeBSD$"); #include <vmmapi.h> #include "bhyverun.h" +#include "gdb.h" #include "mem.h" #include "mevent.h" @@ -79,12 +81,7 @@ static struct mevent *read_event, *write_event; static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting; static pthread_mutex_t gdb_lock; static pthread_cond_t idle_vcpus; -static bool stop_pending, first_stop; -#ifdef __FreeBSD__ -static int stepping_vcpu, stopped_vcpu; -#else -static int stepping_vcpu = -1, stopped_vcpu = -1; -#endif +static bool first_stop, report_next_stop, swbreak_enabled; /* * An I/O buffer contains 'capacity' bytes of room at 'data'. For a @@ -100,11 +97,44 @@ struct io_buffer { size_t len; }; +struct breakpoint { + uint64_t gpa; + uint8_t shadow_inst; + TAILQ_ENTRY(breakpoint) link; +}; + +/* + * When a vCPU stops to due to an event that should be reported to the + * debugger, information about the event is stored in this structure. + * The vCPU thread then sets 'stopped_vcpu' if it is not already set + * and stops other vCPUs so the event can be reported. The + * report_stop() function reports the event for the 'stopped_vcpu' + * vCPU. When the debugger resumes execution via continue or step, + * the event for 'stopped_vcpu' is cleared. vCPUs will loop in their + * event handlers until the associated event is reported or disabled. + * + * An idle vCPU will have all of the boolean fields set to false. + * + * When a vCPU is stepped, 'stepping' is set to true when the vCPU is + * released to execute the stepped instruction. When the vCPU reports + * the stepping trap, 'stepped' is set. + * + * When a vCPU hits a breakpoint set by the debug server, + * 'hit_swbreak' is set to true. + */ +struct vcpu_state { + bool stepping; + bool stepped; + bool hit_swbreak; +}; + static struct io_buffer cur_comm, cur_resp; static uint8_t cur_csum; -static int cur_vcpu; static struct vmctx *ctx; static int cur_fd = -1; +static TAILQ_HEAD(, breakpoint) breakpoints; +static struct vcpu_state *vcpu_state; +static int cur_vcpu, stopped_vcpu; const int gdb_regset[] = { VM_REG_GUEST_RAX, @@ -188,8 +218,18 @@ debug(const char *fmt, ...) va_end(ap); } #else +#ifndef __FreeBSD__ +/* + * A totally empty debug() makes the compiler grumpy due to how its used with + * some control flow here. + */ +#define debug(...) do { } while (0) +#else #define debug(...) #endif +#endif + +static void remove_all_sw_breakpoints(void); static int guest_paging_info(int vcpu, struct vm_guest_paging *paging) @@ -359,6 +399,11 @@ close_connection(void) io_buffer_reset(&cur_resp); cur_fd = -1; + remove_all_sw_breakpoints(); + + /* Clear any pending events. */ + memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state)); + /* Resume any stopped vCPUs. */ gdb_resume_vcpus(); pthread_mutex_unlock(&gdb_lock); @@ -576,7 +621,7 @@ append_integer(unsigned int value) if (value == 0) append_char('0'); else - append_unsigned_be(value, fls(value) + 7 / 8); + append_unsigned_be(value, (fls(value) + 7) / 8); } static void @@ -629,23 +674,60 @@ parse_threadid(const uint8_t *data, size_t len) return (parse_integer(data, len)); } +/* + * Report the current stop event to the debugger. If the stop is due + * to an event triggered on a specific vCPU such as a breakpoint or + * stepping trap, stopped_vcpu will be set to the vCPU triggering the + * stop. If 'set_cur_vcpu' is true, then cur_vcpu will be updated to + * the reporting vCPU for vCPU events. + */ static void -report_stop(void) +report_stop(bool set_cur_vcpu) { + struct vcpu_state *vs; start_packet(); - if (stopped_vcpu == -1) + if (stopped_vcpu == -1) { append_char('S'); - else + append_byte(GDB_SIGNAL_TRAP); + } else { + vs = &vcpu_state[stopped_vcpu]; + if (set_cur_vcpu) + cur_vcpu = stopped_vcpu; append_char('T'); - append_byte(GDB_SIGNAL_TRAP); - if (stopped_vcpu != -1) { + append_byte(GDB_SIGNAL_TRAP); append_string("thread:"); append_integer(stopped_vcpu + 1); append_char(';'); + if (vs->hit_swbreak) { + debug("$vCPU %d reporting swbreak\n", stopped_vcpu); + if (swbreak_enabled) + append_string("swbreak:;"); + } else if (vs->stepped) + debug("$vCPU %d reporting step\n", stopped_vcpu); + else + debug("$vCPU %d reporting ???\n", stopped_vcpu); } - stopped_vcpu = -1; finish_packet(); + report_next_stop = false; +} + +/* + * If this stop is due to a vCPU event, clear that event to mark it as + * acknowledged. + */ +static void +discard_stop(void) +{ + struct vcpu_state *vs; + + if (stopped_vcpu != -1) { + vs = &vcpu_state[stopped_vcpu]; + vs->hit_swbreak = false; + vs->stepped = false; + stopped_vcpu = -1; + } + report_next_stop = true; } static void @@ -655,14 +737,18 @@ gdb_finish_suspend_vcpus(void) if (first_stop) { first_stop = false; stopped_vcpu = -1; - } else if (response_pending()) - stop_pending = true; - else { - report_stop(); + } else if (report_next_stop) { + assert(!response_pending()); + report_stop(true); send_pending_data(cur_fd); } } +/* + * vCPU threads invoke this function whenever the vCPU enters the + * debug server to pause or report an event. vCPU threads wait here + * as long as the debug server keeps them suspended. + */ static void _gdb_cpu_suspend(int vcpu, bool report_stop) { @@ -671,19 +757,28 @@ _gdb_cpu_suspend(int vcpu, bool report_stop) CPU_SET(vcpu, &vcpus_waiting); if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0) gdb_finish_suspend_vcpus(); - while (CPU_ISSET(vcpu, &vcpus_suspended) && vcpu != stepping_vcpu) + while (CPU_ISSET(vcpu, &vcpus_suspended)) pthread_cond_wait(&idle_vcpus, &gdb_lock); CPU_CLR(vcpu, &vcpus_waiting); debug("$vCPU %d resuming\n", vcpu); } +/* + * Invoked at the start of a vCPU thread's execution to inform the + * debug server about the new thread. + */ void gdb_cpu_add(int vcpu) { debug("$vCPU %d starting\n", vcpu); pthread_mutex_lock(&gdb_lock); + assert(vcpu < guest_ncpus); CPU_SET(vcpu, &vcpus_active); + if (!TAILQ_EMPTY(&breakpoints)) { + vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT, 1); + debug("$vCPU %d enabled breakpoint exits\n", vcpu); + } /* * If a vcpu is added while vcpus are stopped, suspend the new @@ -697,42 +792,147 @@ gdb_cpu_add(int vcpu) pthread_mutex_unlock(&gdb_lock); } +/* + * Invoked by vCPU before resuming execution. This enables stepping + * if the vCPU is marked as stepping. + */ +static void +gdb_cpu_resume(int vcpu) +{ + struct vcpu_state *vs; + int error; + + vs = &vcpu_state[vcpu]; + + /* + * Any pending event should already be reported before + * resuming. + */ + assert(vs->hit_swbreak == false); + assert(vs->stepped == false); + if (vs->stepping) { + error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1); + assert(error == 0); + } +} + +/* + * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest + * has been suspended due to an event on different vCPU or in response + * to a guest-wide suspend such as Ctrl-C or the stop on attach. + */ void gdb_cpu_suspend(int vcpu) { pthread_mutex_lock(&gdb_lock); _gdb_cpu_suspend(vcpu, true); + gdb_cpu_resume(vcpu); pthread_mutex_unlock(&gdb_lock); } +static void +gdb_suspend_vcpus(void) +{ + + assert(pthread_mutex_isowned_np(&gdb_lock)); + debug("suspending all CPUs\n"); + vcpus_suspended = vcpus_active; + vm_suspend_cpu(ctx, -1); + if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0) + gdb_finish_suspend_vcpus(); +} + +/* + * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via + * the VT-x-specific MTRAP exit. + */ void gdb_cpu_mtrap(int vcpu) { + struct vcpu_state *vs; debug("$vCPU %d MTRAP\n", vcpu); pthread_mutex_lock(&gdb_lock); - if (vcpu == stepping_vcpu) { - stepping_vcpu = -1; + vs = &vcpu_state[vcpu]; + if (vs->stepping) { + vs->stepping = false; + vs->stepped = true; vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0); - vm_suspend_cpu(ctx, vcpu); - assert(stopped_vcpu == -1); - stopped_vcpu = vcpu; - _gdb_cpu_suspend(vcpu, true); + while (vs->stepped) { + if (stopped_vcpu == -1) { + debug("$vCPU %d reporting step\n", vcpu); + stopped_vcpu = vcpu; + gdb_suspend_vcpus(); + } + _gdb_cpu_suspend(vcpu, true); + } + gdb_cpu_resume(vcpu); } pthread_mutex_unlock(&gdb_lock); } -static void -gdb_suspend_vcpus(void) +static struct breakpoint * +find_breakpoint(uint64_t gpa) { + struct breakpoint *bp; - assert(pthread_mutex_isowned_np(&gdb_lock)); - debug("suspending all CPUs\n"); - vcpus_suspended = vcpus_active; - vm_suspend_cpu(ctx, -1); - if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0) - gdb_finish_suspend_vcpus(); + TAILQ_FOREACH(bp, &breakpoints, link) { + if (bp->gpa == gpa) + return (bp); + } + return (NULL); +} + +void +gdb_cpu_breakpoint(int vcpu, struct vm_exit *vmexit) +{ + struct breakpoint *bp; + struct vcpu_state *vs; + uint64_t gpa; + int error; + + pthread_mutex_lock(&gdb_lock); + error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa); + assert(error == 1); + bp = find_breakpoint(gpa); + if (bp != NULL) { + vs = &vcpu_state[vcpu]; + assert(vs->stepping == false); + assert(vs->stepped == false); + assert(vs->hit_swbreak == false); + vs->hit_swbreak = true; + vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, vmexit->rip); + for (;;) { + if (stopped_vcpu == -1) { + debug("$vCPU %d reporting breakpoint at rip %#lx\n", vcpu, + vmexit->rip); + stopped_vcpu = vcpu; + gdb_suspend_vcpus(); + } + _gdb_cpu_suspend(vcpu, true); + if (!vs->hit_swbreak) { + /* Breakpoint reported. */ + break; + } + bp = find_breakpoint(gpa); + if (bp == NULL) { + /* Breakpoint was removed. */ + vs->hit_swbreak = false; + break; + } + } + gdb_cpu_resume(vcpu); + } else { + debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpu, + vmexit->rip); + error = vm_set_register(ctx, vcpu, + VM_REG_GUEST_ENTRY_INST_LENGTH, vmexit->u.bpt.inst_length); + assert(error == 0); + error = vm_inject_exception(ctx, vcpu, IDT_BP, 0, 0, 0); + assert(error == 0); + } + pthread_mutex_unlock(&gdb_lock); } static bool @@ -744,9 +944,11 @@ gdb_step_vcpu(int vcpu) error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val); if (error < 0) return (false); - error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1); + + discard_stop(); + vcpu_state[vcpu].stepping = true; vm_resume_cpu(ctx, vcpu); - stepping_vcpu = vcpu; + CPU_CLR(vcpu, &vcpus_suspended); pthread_cond_broadcast(&idle_vcpus); return (true); } @@ -1001,6 +1203,174 @@ gdb_write_mem(const uint8_t *data, size_t len) } static bool +set_breakpoint_caps(bool enable) +{ + cpuset_t mask; + int vcpu; + + mask = vcpus_active; + while (!CPU_EMPTY(&mask)) { + vcpu = CPU_FFS(&mask) - 1; + CPU_CLR(vcpu, &mask); + if (vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT, + enable ? 1 : 0) < 0) + return (false); + debug("$vCPU %d %sabled breakpoint exits\n", vcpu, + enable ? "en" : "dis"); + } + return (true); +} + +static void +remove_all_sw_breakpoints(void) +{ + struct breakpoint *bp, *nbp; + uint8_t *cp; + + if (TAILQ_EMPTY(&breakpoints)) + return; + + TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) { + debug("remove breakpoint at %#lx\n", bp->gpa); + cp = paddr_guest2host(ctx, bp->gpa, 1); + *cp = bp->shadow_inst; + TAILQ_REMOVE(&breakpoints, bp, link); + free(bp); + } + TAILQ_INIT(&breakpoints); + set_breakpoint_caps(false); +} + +static void +update_sw_breakpoint(uint64_t gva, int kind, bool insert) +{ + struct breakpoint *bp; + uint64_t gpa; + uint8_t *cp; + int error; + + if (kind != 1) { + send_error(EINVAL); + return; + } + + error = guest_vaddr2paddr(cur_vcpu, gva, &gpa); + if (error == -1) { + send_error(errno); + return; + } + if (error == 0) { + send_error(EFAULT); + return; + } + + cp = paddr_guest2host(ctx, gpa, 1); + + /* Only permit breakpoints in guest RAM. */ + if (cp == NULL) { + send_error(EFAULT); + return; + } + + /* Find any existing breakpoint. */ + bp = find_breakpoint(gpa); + + /* + * Silently ignore duplicate commands since the protocol + * requires these packets to be idempotent. + */ + if (insert) { + if (bp == NULL) { + if (TAILQ_EMPTY(&breakpoints) && + !set_breakpoint_caps(true)) { + send_empty_response(); + return; + } + bp = malloc(sizeof(*bp)); + bp->gpa = gpa; + bp->shadow_inst = *cp; + *cp = 0xcc; /* INT 3 */ + TAILQ_INSERT_TAIL(&breakpoints, bp, link); + debug("new breakpoint at %#lx\n", gpa); + } + } else { + if (bp != NULL) { + debug("remove breakpoint at %#lx\n", gpa); + *cp = bp->shadow_inst; + TAILQ_REMOVE(&breakpoints, bp, link); + free(bp); + if (TAILQ_EMPTY(&breakpoints)) + set_breakpoint_caps(false); + } + } + send_ok(); +} + +static void +parse_breakpoint(const uint8_t *data, size_t len) +{ + uint64_t gva; + uint8_t *cp; + bool insert; + int kind, type; + + insert = data[0] == 'Z'; + + /* Skip 'Z/z' */ + data += 1; + len -= 1; + + /* Parse and consume type. */ + cp = memchr(data, ',', len); + if (cp == NULL || cp == data) { + send_error(EINVAL); + return; + } + type = parse_integer(data, cp - data); + len -= (cp - data) + 1; + data += (cp - data) + 1; + + /* Parse and consume address. */ + cp = memchr(data, ',', len); + if (cp == NULL || cp == data) { + send_error(EINVAL); + return; + } + gva = parse_integer(data, cp - data); + len -= (cp - data) + 1; + data += (cp - data) + 1; + + /* Parse and consume kind. */ + cp = memchr(data, ';', len); + if (cp == data) { + send_error(EINVAL); + return; + } + if (cp != NULL) { + /* + * We do not advertise support for either the + * ConditionalBreakpoints or BreakpointCommands + * features, so we should not be getting conditions or + * commands from the remote end. + */ + send_empty_response(); + return; + } + kind = parse_integer(data, len); + data += len; + len = 0; + + switch (type) { + case 0: + update_sw_breakpoint(gva, kind, insert); + break; + default: + send_empty_response(); + break; + } +} + +static bool command_equals(const uint8_t *data, size_t len, const char *cmd) { @@ -1058,7 +1428,9 @@ check_features(const uint8_t *data, size_t len) value = NULL; } - /* No currently supported features. */ + if (strcmp(feature, "swbreak") == 0) + swbreak_enabled = supported; + #ifndef __FreeBSD__ /* * The compiler dislikes 'supported' being set but never used. @@ -1075,6 +1447,7 @@ check_features(const uint8_t *data, size_t len) /* This is an arbitrary limit. */ append_string("PacketSize=4096"); + append_string(";swbreak+"); finish_packet(); } @@ -1168,7 +1541,7 @@ handle_command(const uint8_t *data, size_t len) break; } - /* Don't send a reply until a stop occurs. */ + discard_stop(); gdb_resume_vcpus(); break; case 'D': @@ -1240,13 +1613,12 @@ handle_command(const uint8_t *data, size_t len) break; } break; + case 'z': + case 'Z': + parse_breakpoint(data, len); + break; case '?': - /* XXX: Only if stopped? */ - /* For now, just report that we are always stopped. */ - start_packet(); - append_char('S'); - append_byte(GDB_SIGNAL_TRAP); - finish_packet(); + report_stop(false); break; case 'G': /* TODO */ case 'v': @@ -1257,8 +1629,6 @@ handle_command(const uint8_t *data, size_t len) case 'Q': /* TODO */ case 't': /* TODO */ case 'X': /* TODO */ - case 'z': /* TODO */ - case 'Z': /* TODO */ default: send_empty_response(); } @@ -1289,9 +1659,8 @@ check_command(int fd) if (response_pending()) io_buffer_reset(&cur_resp); io_buffer_consume(&cur_comm, 1); - if (stop_pending) { - stop_pending = false; - report_stop(); + if (stopped_vcpu != -1 && report_next_stop) { + report_stop(true); send_pending_data(fd); } break; @@ -1446,12 +1815,11 @@ new_connection(int fd, enum ev_type event, void *arg) cur_fd = s; cur_vcpu = 0; - stepping_vcpu = -1; stopped_vcpu = -1; - stop_pending = false; /* Break on attach. */ first_stop = true; + report_next_stop = false; gdb_suspend_vcpus(); pthread_mutex_unlock(&gdb_lock); } @@ -1505,6 +1873,9 @@ init_gdb(struct vmctx *_ctx, int sport, bool wait) if (listen(s, 1) < 0) err(1, "gdb socket listen"); + stopped_vcpu = -1; + TAILQ_INIT(&breakpoints); + vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state)); if (wait) { /* * Set vcpu 0 in vcpus_suspended. This will trigger the @@ -1512,9 +1883,8 @@ init_gdb(struct vmctx *_ctx, int sport, bool wait) * it starts execution. The vcpu will remain suspended * until a debugger connects. */ - stepping_vcpu = -1; - stopped_vcpu = -1; CPU_SET(0, &vcpus_suspended); + stopped_vcpu = 0; } flags = fcntl(s, F_GETFL); diff --git a/usr/src/cmd/bhyve/gdb.h b/usr/src/cmd/bhyve/gdb.h index 09ebc34f24..93396c1c67 100644 --- a/usr/src/cmd/bhyve/gdb.h +++ b/usr/src/cmd/bhyve/gdb.h @@ -31,6 +31,7 @@ #define __GDB_H__ void gdb_cpu_add(int vcpu); +void gdb_cpu_breakpoint(int vcpu, struct vm_exit *vmexit); void gdb_cpu_mtrap(int vcpu); void gdb_cpu_suspend(int vcpu); void init_gdb(struct vmctx *ctx, int sport, bool wait); diff --git a/usr/src/cmd/bhyve/hda_codec.c b/usr/src/cmd/bhyve/hda_codec.c index 82f5fb1eed..41e8121ae2 100644 --- a/usr/src/cmd/bhyve/hda_codec.c +++ b/usr/src/cmd/bhyve/hda_codec.c @@ -400,7 +400,7 @@ hda_codec_init(struct hda_codec_inst *hci, const char *play, if (!(play || rec)) return (-1); - DPRINTF("cad: 0x%x opts: %s\n", hci->cad, opts); + DPRINTF("cad: 0x%x opts: %s", hci->cad, opts); sc = calloc(1, sizeof(*sc)); if (!sc) @@ -420,7 +420,7 @@ hda_codec_init(struct hda_codec_inst *hci, const char *play, sc->conf_default = hda_codec_conf_default; sc->pin_ctrl_default = hda_codec_pin_ctrl_default; sc->verb_handlers = hda_codec_verb_handlers; - DPRINTF("HDA Codec nodes: %d\n", sc->no_nodes); + DPRINTF("HDA Codec nodes: %d", sc->no_nodes); /* * Initialize the Audio Output stream @@ -435,7 +435,7 @@ hda_codec_init(struct hda_codec_inst *hci, const char *play, st->aud = audio_init(play, 1); if (!st->aud) { - DPRINTF("Fail to init the output audio player\n"); + DPRINTF("Fail to init the output audio player"); return (-1); } } @@ -453,7 +453,7 @@ hda_codec_init(struct hda_codec_inst *hci, const char *play, st->aud = audio_init(rec, 0); if (!st->aud) { - DPRINTF("Fail to init the input audio player\n"); + DPRINTF("Fail to init the input audio player"); return (-1); } } @@ -488,11 +488,11 @@ hda_codec_reset(struct hda_codec_inst *hci) st->right_mute = HDA_CODEC_SET_AMP_GAIN_MUTE_MUTE; } - DPRINTF("cad: 0x%x\n", hci->cad); + DPRINTF("cad: 0x%x", hci->cad); if (!hops->signal) { DPRINTF("The controller ops does not implement \ - the signal function\n"); + the signal function"); return (-1); } @@ -538,7 +538,7 @@ hda_codec_command(struct hda_codec_inst *hci, uint32_t cmd_data) if (!hops->response) { DPRINTF("The controller ops does not implement \ - the response function\n"); + the response function"); return (-1); } @@ -566,11 +566,11 @@ hda_codec_command(struct hda_codec_inst *hci, uint32_t cmd_data) if (sc->verb_handlers[nid]) res = sc->verb_handlers[nid](sc, verb, payload); else - DPRINTF("Unknown VERB: 0x%x\n", verb); + DPRINTF("Unknown VERB: 0x%x", verb); break; } - DPRINTF("cad: 0x%x nid: 0x%x verb: 0x%x payload: 0x%x response: 0x%x\n", + DPRINTF("cad: 0x%x nid: 0x%x verb: 0x%x payload: 0x%x response: 0x%x", cad, nid, verb, payload, res); return (hops->response(hci, res, HDA_CODEC_RESPONSE_EX_SOL)); @@ -595,11 +595,11 @@ hda_codec_notify(struct hda_codec_inst *hci, uint8_t run, i = dir ? HDA_CODEC_STREAM_OUTPUT : HDA_CODEC_STREAM_INPUT; st = &sc->streams[i]; - DPRINTF("run: %d, stream: 0x%x, st->stream: 0x%x dir: %d\n", + DPRINTF("run: %d, stream: 0x%x, st->stream: 0x%x dir: %d", run, stream, st->stream, dir); if (stream != st->stream) { - DPRINTF("Stream not found\n"); + DPRINTF("Stream not found"); return (0); } @@ -653,7 +653,7 @@ hda_codec_parse_format(uint16_t fmt, struct audio_params *params) params->format = AFMT_S32_LE; break; default: - DPRINTF("Unknown format bits: 0x%x\n", + DPRINTF("Unknown format bits: 0x%x", fmt & HDA_CODEC_FMT_BITS_MASK); return (-1); } @@ -719,7 +719,7 @@ hda_codec_audio_output_do_setup(void *arg) if (err) return (-1); - DPRINTF("rate: %d, channels: %d, format: 0x%x\n", + DPRINTF("rate: %d, channels: %d, format: 0x%x", params.rate, params.channels, params.format); return (audio_set_params(aud, ¶ms)); @@ -778,7 +778,7 @@ hda_codec_audio_input_do_setup(void *arg) if (err) return (-1); - DPRINTF("rate: %d, channels: %d, format: 0x%x\n", + DPRINTF("rate: %d, channels: %d, format: 0x%x", params.rate, params.channels, params.format); return (audio_set_params(aud, ¶ms)); @@ -792,7 +792,7 @@ hda_codec_audio_inout_nid(struct hda_codec_stream *st, uint16_t verb, uint8_t mute = 0; uint8_t gain = 0; - DPRINTF("%s verb: 0x%x, payload, 0x%x\n", st->actx.name, verb, payload); + DPRINTF("%s verb: 0x%x, payload, 0x%x", st->actx.name, verb, payload); switch (verb) { case HDA_CMD_VERB_GET_CONV_FMT: @@ -804,10 +804,10 @@ hda_codec_audio_inout_nid(struct hda_codec_stream *st, uint16_t verb, case HDA_CMD_VERB_GET_AMP_GAIN_MUTE: if (payload & HDA_CMD_GET_AMP_GAIN_MUTE_LEFT) { res = st->left_gain | st->left_mute; - DPRINTF("GET_AMP_GAIN_MUTE_LEFT: 0x%x\n", res); + DPRINTF("GET_AMP_GAIN_MUTE_LEFT: 0x%x", res); } else { res = st->right_gain | st->right_mute; - DPRINTF("GET_AMP_GAIN_MUTE_RIGHT: 0x%x\n", res); + DPRINTF("GET_AMP_GAIN_MUTE_RIGHT: 0x%x", res); } break; case HDA_CMD_VERB_SET_AMP_GAIN_MUTE: @@ -818,14 +818,14 @@ hda_codec_audio_inout_nid(struct hda_codec_stream *st, uint16_t verb, st->left_mute = mute; st->left_gain = gain; DPRINTF("SET_AMP_GAIN_MUTE_LEFT: \ - mute: 0x%x gain: 0x%x\n", mute, gain); + mute: 0x%x gain: 0x%x", mute, gain); } if (payload & HDA_CMD_SET_AMP_GAIN_MUTE_RIGHT) { st->right_mute = mute; st->right_gain = gain; DPRINTF("SET_AMP_GAIN_MUTE_RIGHT: \ - mute: 0x%x gain: 0x%x\n", mute, gain); + mute: 0x%x gain: 0x%x", mute, gain); } break; case HDA_CMD_VERB_GET_CONV_STREAM_CHAN: @@ -834,13 +834,13 @@ hda_codec_audio_inout_nid(struct hda_codec_stream *st, uint16_t verb, case HDA_CMD_VERB_SET_CONV_STREAM_CHAN: st->channel = payload & 0x0f; st->stream = (payload >> 4) & 0x0f; - DPRINTF("st->channel: 0x%x st->stream: 0x%x\n", + DPRINTF("st->channel: 0x%x st->stream: 0x%x", st->channel, st->stream); if (!st->stream) hda_audio_ctxt_stop(&st->actx); break; default: - DPRINTF("Unknown VERB: 0x%x\n", verb); + DPRINTF("Unknown VERB: 0x%x", verb); break; } @@ -867,7 +867,7 @@ hda_audio_ctxt_thr(void *arg) { struct hda_audio_ctxt *actx = arg; - DPRINTF("Start Thread: %s\n", actx->name); + DPRINTF("Start Thread: %s", actx->name); pthread_mutex_lock(&actx->mtx); while (1) { diff --git a/usr/src/cmd/bhyve/iov.c b/usr/src/cmd/bhyve/iov.c index 54ea22aa94..2fa58ef9aa 100644 --- a/usr/src/cmd/bhyve/iov.c +++ b/usr/src/cmd/bhyve/iov.c @@ -119,24 +119,29 @@ iov_to_buf(const struct iovec *iov, int niov, void **buf) } ssize_t -buf_to_iov(const void *buf, size_t buflen, struct iovec *iov, int niov, +buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, int niov, size_t seek) { struct iovec *diov; - int ndiov, i; size_t off = 0, len; + int i; + +#ifndef __FreeBSD__ + diov = NULL; +#endif if (seek > 0) { + int ndiov; + diov = malloc(sizeof(struct iovec) * niov); seek_iov(iov, niov, diov, &ndiov, seek); - } else { - diov = iov; - ndiov = niov; + iov = diov; + niov = ndiov; } - for (i = 0; i < ndiov && off < buflen; i++) { - len = MIN(diov[i].iov_len, buflen - off); - memcpy(diov[i].iov_base, buf + off, len); + for (i = 0; i < niov && off < buflen; i++) { + len = MIN(iov[i].iov_len, buflen - off); + memcpy(iov[i].iov_base, buf + off, len); off += len; } diff --git a/usr/src/cmd/bhyve/iov.h b/usr/src/cmd/bhyve/iov.h index e3b5916edb..f46b04b71e 100644 --- a/usr/src/cmd/bhyve/iov.h +++ b/usr/src/cmd/bhyve/iov.h @@ -38,7 +38,7 @@ void seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2, void truncate_iov(struct iovec *iov, int *niov, size_t length); size_t count_iov(const struct iovec *iov, int niov); ssize_t iov_to_buf(const struct iovec *iov, int niov, void **buf); -ssize_t buf_to_iov(const void *buf, size_t buflen, struct iovec *iov, int niov, - size_t seek); +ssize_t buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, + int niov, size_t seek); #endif /* _IOV_H_ */ diff --git a/usr/src/cmd/bhyve/kernemu_dev.c b/usr/src/cmd/bhyve/kernemu_dev.c new file mode 100644 index 0000000000..2fa0c3dc1f --- /dev/null +++ b/usr/src/cmd/bhyve/kernemu_dev.c @@ -0,0 +1,98 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright 2020 Conrad Meyer <cem@FreeBSD.org>. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/errno.h> +#include <sys/tree.h> + +#include <amd64/include/vmm.h> +#include <x86/include/apicreg.h> +struct vm; +struct vm_hpet_cap; +#include <vmm/io/vioapic.h> +#include <vmm/io/vhpet.h> + +#include <err.h> +#include <errno.h> +#include <vmmapi.h> + +#include "kernemu_dev.h" +#include "mem.h" + +static int +apic_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, int size, + uint64_t *val, void *arg1 __unused, long arg2 __unused) +{ + if (vm_readwrite_kernemu_device(ctx, vcpu, addr, (dir == MEM_F_WRITE), + size, val) != 0) + return (errno); + return (0); +} + +static struct mem_range lapic_mmio = { + .name = "kern-lapic-mmio", + .base = DEFAULT_APIC_BASE, + .size = PAGE_SIZE, + .flags = MEM_F_RW | MEM_F_IMMUTABLE, + .handler = apic_handler, + +}; +static struct mem_range ioapic_mmio = { + .name = "kern-ioapic-mmio", + .base = VIOAPIC_BASE, + .size = VIOAPIC_SIZE, + .flags = MEM_F_RW | MEM_F_IMMUTABLE, + .handler = apic_handler, +}; +static struct mem_range hpet_mmio = { + .name = "kern-hpet-mmio", + .base = VHPET_BASE, + .size = VHPET_SIZE, + .flags = MEM_F_RW | MEM_F_IMMUTABLE, + .handler = apic_handler, +}; + +void +kernemu_dev_init(void) +{ + int rc; + + rc = register_mem(&lapic_mmio); + if (rc != 0) + errc(4, rc, "register_mem: LAPIC (0x%08x)", + (unsigned)lapic_mmio.base); + rc = register_mem(&ioapic_mmio); + if (rc != 0) + errc(4, rc, "register_mem: IOAPIC (0x%08x)", + (unsigned)ioapic_mmio.base); + rc = register_mem(&hpet_mmio); + if (rc != 0) + errc(4, rc, "register_mem: HPET (0x%08x)", + (unsigned)hpet_mmio.base); +} diff --git a/usr/src/cmd/bhyve/kernemu_dev.h b/usr/src/cmd/bhyve/kernemu_dev.h new file mode 100644 index 0000000000..7927855da0 --- /dev/null +++ b/usr/src/cmd/bhyve/kernemu_dev.h @@ -0,0 +1,32 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright 2020 Conrad Meyer <cem@FreeBSD.org>. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#pragma once + +void kernemu_dev_init(void); diff --git a/usr/src/cmd/bhyve/mevent.c b/usr/src/cmd/bhyve/mevent.c index d604039e1b..408a648a96 100644 --- a/usr/src/cmd/bhyve/mevent.c +++ b/usr/src/cmd/bhyve/mevent.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #endif #include <err.h> #include <errno.h> +#include <stdbool.h> #include <stdlib.h> #include <stdio.h> #include <string.h> @@ -63,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include <sys/poll.h> #include <sys/siginfo.h> #include <sys/queue.h> +#include <sys/debug.h> #endif #include <sys/time.h> @@ -73,10 +75,12 @@ __FBSDID("$FreeBSD$"); #define MEVENT_MAX 64 -#define MEV_ADD 1 -#define MEV_ENABLE 2 -#define MEV_DISABLE 3 -#define MEV_DEL_PENDING 4 +#ifndef __FreeBSD__ +#define EV_ENABLE 0x01 +#define EV_ADD EV_ENABLE +#define EV_DISABLE 0x02 +#define EV_DELETE 0x04 +#endif extern char *vmname; @@ -97,7 +101,7 @@ struct mevent { enum ev_type me_type; void *me_param; int me_cq; - int me_state; + int me_state; /* Desired kevent flags. */ int me_closefd; #ifndef __FreeBSD__ port_notify_t me_notify; @@ -175,27 +179,7 @@ mevent_kq_filter(struct mevent *mevp) static int mevent_kq_flags(struct mevent *mevp) { - int ret; - - switch (mevp->me_state) { - case MEV_ADD: - ret = EV_ADD; /* implicitly enabled */ - break; - case MEV_ENABLE: - ret = EV_ENABLE; - break; - case MEV_DISABLE: - ret = EV_DISABLE; - break; - case MEV_DEL_PENDING: - ret = EV_DELETE; - break; - default: - assert(0); - break; - } - - return (ret); + return (mevp->me_state); } static int @@ -240,9 +224,15 @@ mevent_build(int mfd, struct kevent *kev) mevp->me_cq = 0; LIST_REMOVE(mevp, me_list); - if (mevp->me_state == MEV_DEL_PENDING) { + if (mevp->me_state & EV_DELETE) { free(mevp); } else { + /* + * We need to add the event only once, so we can + * reset the EV_ADD bit after it has been propagated + * to the kevent() arguments the first time. + */ + mevp->me_state &= ~EV_ADD; LIST_INSERT_HEAD(&global_head, mevp, me_list); } @@ -271,6 +261,34 @@ mevent_handle(struct kevent *kev, int numev) #else /* __FreeBSD__ */ +static boolean_t +mevent_clarify_state(struct mevent *mevp) +{ + const int state = mevp->me_state; + + if ((state & EV_DELETE) != 0) { + /* All other intents are overriden by delete. */ + mevp->me_state = EV_DELETE; + return (B_TRUE); + } + + /* + * Without a distinction between EV_ADD and EV_ENABLE in our emulation, + * handling the add-disabled case means eliding the portfs operation + * when both flags are present. + * + * This is not a concern for subsequent enable/disable operations, as + * mevent_update() toggles the flags properly so they are not left in + * conflict. + */ + if (state == (EV_ENABLE|EV_DISABLE)) { + mevp->me_state = EV_DISABLE; + return (B_FALSE); + } + + return (B_TRUE); +} + static void mevent_update_one(struct mevent *mevp) { @@ -282,8 +300,7 @@ mevent_update_one(struct mevent *mevp) mevp->me_auto_requeue = B_FALSE; switch (mevp->me_state) { - case MEV_ADD: - case MEV_ENABLE: + case EV_ENABLE: { int events; @@ -297,8 +314,8 @@ mevent_update_one(struct mevent *mevp) } return; } - case MEV_DISABLE: - case MEV_DEL_PENDING: + case EV_DISABLE: + case EV_DELETE: /* * A disable that comes in while an event is being * handled will result in an ENOENT. @@ -318,8 +335,7 @@ mevent_update_one(struct mevent *mevp) mevp->me_auto_requeue = B_TRUE; switch (mevp->me_state) { - case MEV_ADD: - case MEV_ENABLE: + case EV_ENABLE: { struct itimerspec it = { 0 }; @@ -346,8 +362,8 @@ mevent_update_one(struct mevent *mevp) } return; } - case MEV_DISABLE: - case MEV_DEL_PENDING: + case EV_DISABLE: + case EV_DELETE: if (timer_delete(mevp->me_timid) != 0) { (void) fprintf(stderr, "timer_delete failed: " "%s", strerror(errno)); @@ -385,13 +401,15 @@ mevent_update_pending(int portfd) (void) close(mevp->me_fd); mevp->me_fd = -1; } else { - mevent_update_one(mevp); + if (mevent_clarify_state(mevp)) { + mevent_update_one(mevp); + } } mevp->me_cq = 0; LIST_REMOVE(mevp, me_list); - if (mevp->me_state == MEV_DEL_PENDING) { + if (mevp->me_state & EV_DELETE) { free(mevp); } else { LIST_INSERT_HEAD(&global_head, mevp, me_list); @@ -418,9 +436,10 @@ mevent_handle_pe(port_event_t *pe) } #endif -struct mevent * -mevent_add(int tfd, enum ev_type type, - void (*func)(int, enum ev_type, void *), void *param) +static struct mevent * +mevent_add_state(int tfd, enum ev_type type, + void (*func)(int, enum ev_type, void *), void *param, + int state) { struct mevent *lp, *mevp; @@ -468,7 +487,7 @@ mevent_add(int tfd, enum ev_type type, LIST_INSERT_HEAD(&change_head, mevp, me_list); mevp->me_cq = 1; - mevp->me_state = MEV_ADD; + mevp->me_state = state; mevent_notify(); exit: @@ -477,33 +496,59 @@ exit: return (mevp); } +struct mevent * +mevent_add(int tfd, enum ev_type type, + void (*func)(int, enum ev_type, void *), void *param) +{ + + return (mevent_add_state(tfd, type, func, param, EV_ADD)); +} + +struct mevent * +mevent_add_disabled(int tfd, enum ev_type type, + void (*func)(int, enum ev_type, void *), void *param) +{ + + return (mevent_add_state(tfd, type, func, param, EV_ADD | EV_DISABLE)); +} + static int -mevent_update(struct mevent *evp, int newstate) +mevent_update(struct mevent *evp, bool enable) { + int newstate; + + mevent_qlock(); + /* * It's not possible to enable/disable a deleted event */ - if (evp->me_state == MEV_DEL_PENDING) - return (EINVAL); + assert((evp->me_state & EV_DELETE) == 0); + + newstate = evp->me_state; + if (enable) { + newstate |= EV_ENABLE; + newstate &= ~EV_DISABLE; + } else { + newstate |= EV_DISABLE; + newstate &= ~EV_ENABLE; + } /* * No update needed if state isn't changing */ - if (evp->me_state == newstate) - return (0); - - mevent_qlock(); - - evp->me_state = newstate; + if (evp->me_state != newstate) { + evp->me_state = newstate; - /* - * Place the entry onto the changed list if not already there. - */ - if (evp->me_cq == 0) { - evp->me_cq = 1; - LIST_REMOVE(evp, me_list); - LIST_INSERT_HEAD(&change_head, evp, me_list); - mevent_notify(); + /* + * Place the entry onto the changed list if not + * already there. + */ + if (evp->me_cq == 0) { + evp->me_cq = 1; + LIST_REMOVE(evp, me_list); + LIST_INSERT_HEAD(&change_head, evp, me_list); + mevent_notify(); + } } mevent_qunlock(); @@ -515,14 +560,14 @@ int mevent_enable(struct mevent *evp) { - return (mevent_update(evp, MEV_ENABLE)); + return (mevent_update(evp, true)); } int mevent_disable(struct mevent *evp) { - return (mevent_update(evp, MEV_DISABLE)); + return (mevent_update(evp, false)); } static int @@ -540,7 +585,7 @@ mevent_delete_event(struct mevent *evp, int closefd) LIST_INSERT_HEAD(&change_head, evp, me_list); mevent_notify(); } - evp->me_state = MEV_DEL_PENDING; + evp->me_state = EV_DELETE; if (closefd) evp->me_closefd = 1; diff --git a/usr/src/cmd/bhyve/mevent.h b/usr/src/cmd/bhyve/mevent.h index e6b96f0a7c..503ec415a3 100644 --- a/usr/src/cmd/bhyve/mevent.h +++ b/usr/src/cmd/bhyve/mevent.h @@ -43,6 +43,9 @@ struct mevent; struct mevent *mevent_add(int fd, enum ev_type type, void (*func)(int, enum ev_type, void *), void *param); +struct mevent *mevent_add_disabled(int fd, enum ev_type type, + void (*func)(int, enum ev_type, void *), + void *param); int mevent_enable(struct mevent *evp); int mevent_disable(struct mevent *evp); int mevent_delete(struct mevent *evp); diff --git a/usr/src/cmd/bhyve/mptbl.c b/usr/src/cmd/bhyve/mptbl.c index e78f88f074..fc82faad89 100644 --- a/usr/src/cmd/bhyve/mptbl.c +++ b/usr/src/cmd/bhyve/mptbl.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include <string.h> #include "acpi.h" +#include "debug.h" #include "bhyverun.h" #include "mptbl.h" #include "pci_emul.h" @@ -312,7 +313,7 @@ mptable_build(struct vmctx *ctx, int ncpu) startaddr = paddr_guest2host(ctx, MPTABLE_BASE, MPTABLE_MAX_LENGTH); if (startaddr == NULL) { - fprintf(stderr, "mptable requires mapped mem\n"); + EPRINTLN("mptable requires mapped mem"); return (ENOMEM); } @@ -323,10 +324,10 @@ mptable_build(struct vmctx *ctx, int ncpu) */ for (bus = 1; bus <= PCI_BUSMAX; bus++) { if (pci_bus_configured(bus)) { - fprintf(stderr, "MPtable is incompatible with " - "multiple PCI hierarchies.\r\n"); - fprintf(stderr, "MPtable generation can be disabled " - "by passing the -Y option to bhyve(8).\r\n"); + EPRINTLN("MPtable is incompatible with " + "multiple PCI hierarchies."); + EPRINTLN("MPtable generation can be disabled " + "by passing the -Y option to bhyve(8)."); return (EINVAL); } } diff --git a/usr/src/cmd/bhyve/net_backends.c b/usr/src/cmd/bhyve/net_backends.c index 88afaca4b1..884ffb8241 100644 --- a/usr/src/cmd/bhyve/net_backends.c +++ b/usr/src/cmd/bhyve/net_backends.c @@ -69,7 +69,13 @@ __FBSDID("$FreeBSD$"); #include <poll.h> #include <assert.h> +#ifdef NETGRAPH +#include <sys/param.h> +#include <sys/sysctl.h> +#include <netgraph.h> +#endif +#include "debug.h" #include "iov.h" #include "mevent.h" #include "net_backends.h" @@ -90,7 +96,7 @@ struct net_backend { * and should not be called by the frontend. */ int (*init)(struct net_backend *be, const char *devname, - net_be_rxeof_t cb, void *param); + const char *opts, net_be_rxeof_t cb, void *param); void (*cleanup)(struct net_backend *be); /* @@ -98,7 +104,15 @@ struct net_backend { * vector provided by the caller has 'iovcnt' elements and contains * the packet to send. */ - ssize_t (*send)(struct net_backend *be, struct iovec *iov, int iovcnt); + ssize_t (*send)(struct net_backend *be, const struct iovec *iov, + int iovcnt); + + /* + * Get the length of the next packet that can be received from + * the backend. If no packets are currently available, this + * function returns 0. + */ + ssize_t (*peek_recvlen)(struct net_backend *be); /* * Called to receive a packet from the backend. When the function @@ -107,7 +121,19 @@ struct net_backend { * The function returns 0 if the backend doesn't have a new packet to * receive. */ - ssize_t (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt); + ssize_t (*recv)(struct net_backend *be, const struct iovec *iov, + int iovcnt); + + /* + * Ask the backend to enable or disable receive operation in the + * backend. On return from a disable operation, it is guaranteed + * that the receive callback won't be called until receive is + * enabled again. Note however that it is up to the caller to make + * sure that netbe_recv() is not currently being executed by another + * thread. + */ + void (*recv_enable)(struct net_backend *be); + void (*recv_disable)(struct net_backend *be); /* * Ask the backend for the virtio-net features it is able to @@ -145,7 +171,7 @@ SET_DECLARE(net_backend_set, struct net_backend); #define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr) -#define WPRINTF(params) printf params +#define WPRINTF(params) PRINTLN params /* * The tap backend @@ -153,6 +179,13 @@ SET_DECLARE(net_backend_set, struct net_backend); struct tap_priv { struct mevent *mevp; + /* + * A bounce buffer that allows us to implement the peek_recvlen + * callback. In the future we may get the same information from + * the kevent data. + */ + char bbuf[1 << 16]; + ssize_t bbuflen; }; static void @@ -171,7 +204,7 @@ tap_cleanup(struct net_backend *be) static int tap_init(struct net_backend *be, const char *devname, - net_be_rxeof_t cb, void *param) + const char *opts, net_be_rxeof_t cb, void *param) { struct tap_priv *priv = (struct tap_priv *)be->opaque; char tbuf[80]; @@ -181,7 +214,7 @@ tap_init(struct net_backend *be, const char *devname, #endif if (cb == NULL) { - WPRINTF(("TAP backend requires non-NULL callback\n")); + WPRINTF(("TAP backend requires non-NULL callback")); return (-1); } @@ -190,7 +223,7 @@ tap_init(struct net_backend *be, const char *devname, be->fd = open(tbuf, O_RDWR); if (be->fd == -1) { - WPRINTF(("open of tap device %s failed\n", tbuf)); + WPRINTF(("open of tap device %s failed", tbuf)); goto error; } @@ -199,7 +232,7 @@ tap_init(struct net_backend *be, const char *devname, * notifications with the event loop */ if (ioctl(be->fd, FIONBIO, &opt) < 0) { - WPRINTF(("tap device O_NONBLOCK failed\n")); + WPRINTF(("tap device O_NONBLOCK failed")); goto error; } @@ -209,9 +242,12 @@ tap_init(struct net_backend *be, const char *devname, errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif - priv->mevp = mevent_add(be->fd, EVF_READ, cb, param); + memset(priv->bbuf, 0, sizeof(priv->bbuf)); + priv->bbuflen = 0; + + priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); if (priv->mevp == NULL) { - WPRINTF(("Could not register event\n")); + WPRINTF(("Could not register event")); goto error; } @@ -226,21 +262,62 @@ error: * Called to send a buffer chain out to the tap device */ static ssize_t -tap_send(struct net_backend *be, struct iovec *iov, int iovcnt) +tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt) { return (writev(be->fd, iov, iovcnt)); } static ssize_t -tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) +tap_peek_recvlen(struct net_backend *be) { + struct tap_priv *priv = (struct tap_priv *)be->opaque; ssize_t ret; - /* Should never be called without a valid tap fd */ - assert(be->fd != -1); + if (priv->bbuflen > 0) { + /* + * We already have a packet in the bounce buffer. + * Just return its length. + */ + return priv->bbuflen; + } - ret = readv(be->fd, iov, iovcnt); + /* + * Read the next packet (if any) into the bounce buffer, so + * that we get to know its length and we can return that + * to the caller. + */ + ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf)); + if (ret < 0 && errno == EWOULDBLOCK) { + return (0); + } + + if (ret > 0) + priv->bbuflen = ret; + + return (ret); +} + +static ssize_t +tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt) +{ + struct tap_priv *priv = (struct tap_priv *)be->opaque; + ssize_t ret; + + if (priv->bbuflen > 0) { + /* + * A packet is available in the bounce buffer, so + * we read it from there. + */ + ret = buf_to_iov(priv->bbuf, priv->bbuflen, + iov, iovcnt, 0); + + /* Mark the bounce buffer as empty. */ + priv->bbuflen = 0; + + return (ret); + } + ret = readv(be->fd, iov, iovcnt); if (ret < 0 && errno == EWOULDBLOCK) { return (0); } @@ -248,6 +325,22 @@ tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) return (ret); } +static void +tap_recv_enable(struct net_backend *be) +{ + struct tap_priv *priv = (struct tap_priv *)be->opaque; + + mevent_enable(priv->mevp); +} + +static void +tap_recv_disable(struct net_backend *be) +{ + struct tap_priv *priv = (struct tap_priv *)be->opaque; + + mevent_disable(priv->mevp); +} + static uint64_t tap_get_cap(struct net_backend *be) { @@ -269,7 +362,10 @@ static struct net_backend tap_backend = { .init = tap_init, .cleanup = tap_cleanup, .send = tap_send, + .peek_recvlen = tap_peek_recvlen, .recv = tap_recv, + .recv_enable = tap_recv_enable, + .recv_disable = tap_recv_disable, .get_cap = tap_get_cap, .set_cap = tap_set_cap, }; @@ -281,7 +377,10 @@ static struct net_backend vmnet_backend = { .init = tap_init, .cleanup = tap_cleanup, .send = tap_send, + .peek_recvlen = tap_peek_recvlen, .recv = tap_recv, + .recv_enable = tap_recv_enable, + .recv_disable = tap_recv_disable, .get_cap = tap_get_cap, .set_cap = tap_set_cap, }; @@ -289,6 +388,192 @@ static struct net_backend vmnet_backend = { DATA_SET(net_backend_set, tap_backend); DATA_SET(net_backend_set, vmnet_backend); +#ifdef NETGRAPH + +/* + * Netgraph backend + */ + +#define NG_SBUF_MAX_SIZE (4 * 1024 * 1024) + +static int +ng_init(struct net_backend *be, const char *devname, + const char *opts, net_be_rxeof_t cb, void *param) +{ + struct tap_priv *p = (struct tap_priv *)be->opaque; + struct ngm_connect ngc; + char *ngopts, *tofree; + char nodename[NG_NODESIZ]; + int sbsz; + int ctrl_sock; + int flags; + int path_provided; + int peerhook_provided; + int socket_provided; + unsigned long maxsbsz; + size_t msbsz; +#ifndef WITHOUT_CAPSICUM + cap_rights_t rights; +#endif + + if (cb == NULL) { + WPRINTF(("Netgraph backend requires non-NULL callback")); + return (-1); + } + + be->fd = -1; + + memset(&ngc, 0, sizeof(ngc)); + + strncpy(ngc.ourhook, "vmlink", NG_HOOKSIZ - 1); + + tofree = ngopts = strdup(opts); + + if (ngopts == NULL) { + WPRINTF(("strdup error")); + return (-1); + } + + socket_provided = 0; + path_provided = 0; + peerhook_provided = 0; + + while (ngopts != NULL) { + char *value = ngopts; + char *key; + + key = strsep(&value, "="); + if (value == NULL) + break; + ngopts = value; + (void) strsep(&ngopts, ","); + + if (strcmp(key, "socket") == 0) { + strncpy(nodename, value, NG_NODESIZ - 1); + socket_provided = 1; + } else if (strcmp(key, "path") == 0) { + strncpy(ngc.path, value, NG_PATHSIZ - 1); + path_provided = 1; + } else if (strcmp(key, "hook") == 0) { + strncpy(ngc.ourhook, value, NG_HOOKSIZ - 1); + } else if (strcmp(key, "peerhook") == 0) { + strncpy(ngc.peerhook, value, NG_HOOKSIZ - 1); + peerhook_provided = 1; + } + } + + free(tofree); + + if (!path_provided) { + WPRINTF(("path must be provided")); + return (-1); + } + + if (!peerhook_provided) { + WPRINTF(("peer hook must be provided")); + return (-1); + } + + if (NgMkSockNode(socket_provided ? nodename : NULL, + &ctrl_sock, &be->fd) < 0) { + WPRINTF(("can't get Netgraph sockets")); + return (-1); + } + + if (NgSendMsg(ctrl_sock, ".", + NGM_GENERIC_COOKIE, + NGM_CONNECT, &ngc, sizeof(ngc)) < 0) { + WPRINTF(("can't connect to node")); + close(ctrl_sock); + goto error; + } + + close(ctrl_sock); + + flags = fcntl(be->fd, F_GETFL); + + if (flags < 0) { + WPRINTF(("can't get socket flags")); + goto error; + } + + if (fcntl(be->fd, F_SETFL, flags | O_NONBLOCK) < 0) { + WPRINTF(("can't set O_NONBLOCK flag")); + goto error; + } + + /* + * The default ng_socket(4) buffer's size is too low. + * Calculate the minimum value between NG_SBUF_MAX_SIZE + * and kern.ipc.maxsockbuf. + */ + msbsz = sizeof(maxsbsz); + if (sysctlbyname("kern.ipc.maxsockbuf", &maxsbsz, &msbsz, + NULL, 0) < 0) { + WPRINTF(("can't get 'kern.ipc.maxsockbuf' value")); + goto error; + } + + /* + * We can't set the socket buffer size to kern.ipc.maxsockbuf value, + * as it takes into account the mbuf(9) overhead. + */ + maxsbsz = maxsbsz * MCLBYTES / (MSIZE + MCLBYTES); + + sbsz = MIN(NG_SBUF_MAX_SIZE, maxsbsz); + + if (setsockopt(be->fd, SOL_SOCKET, SO_SNDBUF, &sbsz, + sizeof(sbsz)) < 0) { + WPRINTF(("can't set TX buffer size")); + goto error; + } + + if (setsockopt(be->fd, SOL_SOCKET, SO_RCVBUF, &sbsz, + sizeof(sbsz)) < 0) { + WPRINTF(("can't set RX buffer size")); + goto error; + } + +#ifndef WITHOUT_CAPSICUM + cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); + if (caph_rights_limit(be->fd, &rights) == -1) + errx(EX_OSERR, "Unable to apply rights for sandbox"); +#endif + + memset(p->bbuf, 0, sizeof(p->bbuf)); + p->bbuflen = 0; + + p->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); + if (p->mevp == NULL) { + WPRINTF(("Could not register event")); + goto error; + } + + return (0); + +error: + tap_cleanup(be); + return (-1); +} + +static struct net_backend ng_backend = { + .prefix = "netgraph", + .priv_size = sizeof(struct tap_priv), + .init = ng_init, + .cleanup = tap_cleanup, + .send = tap_send, + .peek_recvlen = tap_peek_recvlen, + .recv = tap_recv, + .recv_enable = tap_recv_enable, + .recv_disable = tap_recv_disable, + .get_cap = tap_get_cap, + .set_cap = tap_set_cap, +}; + +DATA_SET(net_backend_set, ng_backend); + +#endif /* NETGRAPH */ + /* * The netmap backend */ @@ -331,7 +616,7 @@ netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len) req.nr_arg1 = vnet_hdr_len; err = ioctl(be->fd, NIOCREGIF, &req); if (err) { - WPRINTF(("Unable to set vnet header length %d\n", + WPRINTF(("Unable to set vnet header length %d", vnet_hdr_len)); return (err); } @@ -379,7 +664,7 @@ netmap_set_cap(struct net_backend *be, uint64_t features, static int netmap_init(struct net_backend *be, const char *devname, - net_be_rxeof_t cb, void *param) + const char *opts, net_be_rxeof_t cb, void *param) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; @@ -388,7 +673,7 @@ netmap_init(struct net_backend *be, const char *devname, priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL); if (priv->nmd == NULL) { - WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)\n", + WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)", devname, strerror(errno))); free(priv); return (-1); @@ -401,9 +686,9 @@ netmap_init(struct net_backend *be, const char *devname, priv->cb_param = param; be->fd = priv->nmd->fd; - priv->mevp = mevent_add(be->fd, EVF_READ, cb, param); + priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); if (priv->mevp == NULL) { - WPRINTF(("Could not register event\n")); + WPRINTF(("Could not register event")); return (-1); } @@ -425,7 +710,7 @@ netmap_cleanup(struct net_backend *be) } static ssize_t -netmap_send(struct net_backend *be, struct iovec *iov, +netmap_send(struct net_backend *be, const struct iovec *iov, int iovcnt) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; @@ -440,7 +725,7 @@ netmap_send(struct net_backend *be, struct iovec *iov, ring = priv->tx; head = ring->head; if (head == ring->tail) { - WPRINTF(("No space, drop %zu bytes\n", count_iov(iov, iovcnt))); + WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt))); goto txsync; } nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx); @@ -481,7 +766,7 @@ netmap_send(struct net_backend *be, struct iovec *iov, * We ran out of netmap slots while * splitting the iovec fragments. */ - WPRINTF(("No space, drop %zu bytes\n", + WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt))); goto txsync; } @@ -505,7 +790,27 @@ txsync: } static ssize_t -netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) +netmap_peek_recvlen(struct net_backend *be) +{ + struct netmap_priv *priv = (struct netmap_priv *)be->opaque; + struct netmap_ring *ring = priv->rx; + uint32_t head = ring->head; + ssize_t totlen = 0; + + while (head != ring->tail) { + struct netmap_slot *slot = ring->slot + head; + + totlen += slot->len; + if ((slot->flags & NS_MOREFRAG) == 0) + break; + head = nm_ring_next(ring, head); + } + + return (totlen); +} + +static ssize_t +netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; struct netmap_slot *slot = NULL; @@ -553,7 +858,7 @@ netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) iovcnt--; if (iovcnt == 0) { /* No space to receive. */ - WPRINTF(("Short iov, drop %zd bytes\n", + WPRINTF(("Short iov, drop %zd bytes", totlen)); return (-ENOSPC); } @@ -571,13 +876,32 @@ netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt) return (totlen); } +static void +netmap_recv_enable(struct net_backend *be) +{ + struct netmap_priv *priv = (struct netmap_priv *)be->opaque; + + mevent_enable(priv->mevp); +} + +static void +netmap_recv_disable(struct net_backend *be) +{ + struct netmap_priv *priv = (struct netmap_priv *)be->opaque; + + mevent_disable(priv->mevp); +} + static struct net_backend netmap_backend = { .prefix = "netmap", .priv_size = sizeof(struct netmap_priv), .init = netmap_init, .cleanup = netmap_cleanup, .send = netmap_send, + .peek_recvlen = netmap_peek_recvlen, .recv = netmap_recv, + .recv_enable = netmap_recv_enable, + .recv_disable = netmap_recv_disable, .get_cap = netmap_get_cap, .set_cap = netmap_set_cap, }; @@ -589,7 +913,10 @@ static struct net_backend vale_backend = { .init = netmap_init, .cleanup = netmap_cleanup, .send = netmap_send, + .peek_recvlen = netmap_peek_recvlen, .recv = netmap_recv, + .recv_enable = netmap_recv_enable, + .recv_disable = netmap_recv_disable, .get_cap = netmap_get_cap, .set_cap = netmap_set_cap, }; @@ -610,12 +937,22 @@ DATA_SET(net_backend_set, vale_backend); * the argument for the callback. */ int -netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb, +netbe_init(struct net_backend **ret, const char *opts, net_be_rxeof_t cb, void *param) { struct net_backend **pbe, *nbe, *tbe = NULL; + char *devname; + char *options; int err; + devname = options = strdup(opts); + + if (devname == NULL) { + return (-1); + } + + devname = strsep(&options, ","); + /* * Find the network backend that matches the user-provided * device name. net_backend_set is built using a linker set. @@ -635,8 +972,11 @@ netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb, } *ret = NULL; - if (tbe == NULL) + if (tbe == NULL) { + free(devname); return (EINVAL); + } + nbe = calloc(1, sizeof(*nbe) + tbe->priv_size); *nbe = *tbe; /* copy the template */ nbe->fd = -1; @@ -645,13 +985,15 @@ netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb, nbe->fe_vnet_hdr_len = 0; /* Initialize the backend. */ - err = nbe->init(nbe, devname, cb, param); + err = nbe->init(nbe, devname, options, cb, param); if (err) { + free(devname); free(nbe); return (err); } *ret = nbe; + free(devname); return (0); } @@ -696,43 +1038,18 @@ netbe_set_cap(struct net_backend *be, uint64_t features, return (ret); } -static __inline struct iovec * -iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen) +ssize_t +netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt) { - struct iovec *riov; - - /* XXX short-cut: assume first segment is >= tlen */ - assert(iov[0].iov_len >= tlen); - iov[0].iov_len -= tlen; - if (iov[0].iov_len == 0) { - assert(*iovcnt > 1); - *iovcnt -= 1; - riov = &iov[1]; - } else { - iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen); - riov = &iov[0]; - } - - return (riov); + return (be->send(be, iov, iovcnt)); } ssize_t -netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt) +netbe_peek_recvlen(struct net_backend *be) { - assert(be != NULL); - if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) { - /* - * The frontend uses a virtio-net header, but the backend - * does not. We ignore it (as it must be all zeroes) and - * strip it. - */ - assert(be->be_vnet_hdr_len == 0); - iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len); - } - - return (be->send(be, iov, iovcnt)); + return (be->peek_recvlen(be)); } /* @@ -741,46 +1058,10 @@ netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt) * the length of the packet just read. Return -1 in case of errors. */ ssize_t -netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt) +netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt) { - /* Length of prepended virtio-net header. */ - unsigned int hlen = be->fe_vnet_hdr_len; - int ret; - - assert(be != NULL); - if (hlen && hlen != be->be_vnet_hdr_len) { - /* - * The frontend uses a virtio-net header, but the backend - * does not. We need to prepend a zeroed header. - */ - struct virtio_net_rxhdr *vh; - - assert(be->be_vnet_hdr_len == 0); - - /* - * Get a pointer to the rx header, and use the - * data immediately following it for the packet buffer. - */ - vh = iov[0].iov_base; - iov = iov_trim(iov, &iovcnt, hlen); - - /* - * The only valid field in the rx packet header is the - * number of buffers if merged rx bufs were negotiated. - */ - memset(vh, 0, hlen); - if (hlen == VNET_HDR_LEN) { - vh->vrh_bufs = 1; - } - } - - ret = be->recv(be, iov, iovcnt); - if (ret > 0) { - ret += hlen; - } - - return (ret); + return (be->recv(be, iov, iovcnt)); } /* @@ -805,3 +1086,23 @@ netbe_rx_discard(struct net_backend *be) return netbe_recv(be, &iov, 1); } +void +netbe_rx_disable(struct net_backend *be) +{ + + return be->recv_disable(be); +} + +void +netbe_rx_enable(struct net_backend *be) +{ + + return be->recv_enable(be); +} + +size_t +netbe_get_vnet_hdr_len(struct net_backend *be) +{ + + return (be->be_vnet_hdr_len); +} diff --git a/usr/src/cmd/bhyve/net_backends.h b/usr/src/cmd/bhyve/net_backends.h index bba39db59b..b55437fc7b 100644 --- a/usr/src/cmd/bhyve/net_backends.h +++ b/usr/src/cmd/bhyve/net_backends.h @@ -37,15 +37,19 @@ typedef struct net_backend net_backend_t; /* Interface between network frontends and the network backends. */ typedef void (*net_be_rxeof_t)(int, enum ev_type, void *param); -int netbe_init(net_backend_t **be, const char *devname, net_be_rxeof_t cb, +int netbe_init(net_backend_t **be, const char *opts, net_be_rxeof_t cb, void *param); void netbe_cleanup(net_backend_t *be); uint64_t netbe_get_cap(net_backend_t *be); int netbe_set_cap(net_backend_t *be, uint64_t cap, unsigned vnet_hdr_len); -ssize_t netbe_send(net_backend_t *be, struct iovec *iov, int iovcnt); -ssize_t netbe_recv(net_backend_t *be, struct iovec *iov, int iovcnt); +size_t netbe_get_vnet_hdr_len(net_backend_t *be); +ssize_t netbe_send(net_backend_t *be, const struct iovec *iov, int iovcnt); +ssize_t netbe_peek_recvlen(net_backend_t *be); +ssize_t netbe_recv(net_backend_t *be, const struct iovec *iov, int iovcnt); ssize_t netbe_rx_discard(net_backend_t *be); +void netbe_rx_disable(net_backend_t *be); +void netbe_rx_enable(net_backend_t *be); /* @@ -55,6 +59,7 @@ ssize_t netbe_rx_discard(net_backend_t *be); */ #define VIRTIO_NET_F_CSUM (1 << 0) /* host handles partial cksum */ #define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* guest handles partial cksum */ +#define VIRTIO_NET_F_MTU (1 << 3) /* initial MTU advice */ #define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */ #define VIRTIO_NET_F_GSO_DEPREC (1 << 6) /* deprecated: host handles GSO */ #define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can rcv TSOv4 */ @@ -72,6 +77,7 @@ ssize_t netbe_rx_discard(net_backend_t *be); #define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* control channel VLAN filtering */ #define VIRTIO_NET_F_GUEST_ANNOUNCE \ (1 << 21) /* guest can send gratuitous pkts */ +#define VIRTIO_NET_F_MQ (1 << 22) /* host supports multiple VQ pairs */ /* * Fixed network header size diff --git a/usr/src/cmd/bhyve/net_utils.c b/usr/src/cmd/bhyve/net_utils.c index a7ae4d2eef..d602cac3eb 100644 --- a/usr/src/cmd/bhyve/net_utils.c +++ b/usr/src/cmd/bhyve/net_utils.c @@ -31,37 +31,70 @@ __FBSDID("$FreeBSD$"); #include <sys/types.h> #include <net/ethernet.h> +#include <assert.h> #include <errno.h> +#include <limits.h> #include <md5.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> #include "bhyverun.h" +#include "debug.h" #include "net_utils.h" int net_parsemac(char *mac_str, uint8_t *mac_addr) { struct ether_addr *ea; - char *tmpstr; char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 }; - tmpstr = strsep(&mac_str,"="); + if (mac_str == NULL) + return (EINVAL); - if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) { - ea = ether_aton(mac_str); + ea = ether_aton(mac_str); - if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) || - memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) { - fprintf(stderr, "Invalid MAC %s\n", mac_str); - return (EINVAL); - } else - memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN); - } + if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) || + memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) { + EPRINTLN("Invalid MAC %s", mac_str); + return (EINVAL); + } else + memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN); return (0); } +int +net_parsemtu(const char *mtu_str, unsigned long *mtu) +{ + char *end; + unsigned long val; + + assert(mtu_str != NULL); + + if (*mtu_str == '-') + goto err; + + val = strtoul(mtu_str, &end, 0); + + if (*end != '\0') + goto err; + + if (val == ULONG_MAX) + return (ERANGE); + + if (val == 0 && errno == EINVAL) + return (EINVAL); + + *mtu = val; + + return (0); + +err: + errno = EINVAL; + return (EINVAL); +} + void net_genmac(struct pci_devinst *pi, uint8_t *macaddr) { diff --git a/usr/src/cmd/bhyve/net_utils.h b/usr/src/cmd/bhyve/net_utils.h index 3c83519931..1ca20ddb74 100644 --- a/usr/src/cmd/bhyve/net_utils.h +++ b/usr/src/cmd/bhyve/net_utils.h @@ -35,5 +35,6 @@ void net_genmac(struct pci_devinst *pi, uint8_t *macaddr); int net_parsemac(char *mac_str, uint8_t *mac_addr); +int net_parsemtu(const char *mtu_str, unsigned long *mtu); #endif /* _NET_UTILS_H_ */ diff --git a/usr/src/cmd/bhyve/pci_ahci.c b/usr/src/cmd/bhyve/pci_ahci.c index 1e3feffcc2..57934f9c84 100644 --- a/usr/src/cmd/bhyve/pci_ahci.c +++ b/usr/src/cmd/bhyve/pci_ahci.c @@ -240,7 +240,7 @@ ahci_generate_intr(struct pci_ahci_softc *sc, uint32_t mask) if (p->is & p->ie) sc->is |= (1 << i); } - DPRINTF("%s(%08x) %08x\n", __func__, mask, sc->is); + DPRINTF("%s(%08x) %08x", __func__, mask, sc->is); /* If there is nothing enabled -- clear legacy interrupt and exit. */ if (sc->is == 0 || (sc->ghc & AHCI_GHC_IE) == 0) { @@ -282,7 +282,7 @@ ahci_port_intr(struct ahci_port *p) struct pci_devinst *pi = sc->asc_pi; int nmsg; - DPRINTF("%s(%d) %08x/%08x %08x\n", __func__, + DPRINTF("%s(%d) %08x/%08x %08x", __func__, p->port, p->is, p->ie, sc->is); /* If there is nothing enabled -- we are done. */ @@ -341,7 +341,7 @@ ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis) irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0; break; default: - WPRINTF("unsupported fis type %d\n", ft); + WPRINTF("unsupported fis type %d", ft); return; } if (fis[2] & ATA_S_ERROR) { @@ -1601,7 +1601,7 @@ handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis) DPRINTF("ACMD:"); for (i = 0; i < 16; i++) DPRINTF("%02x ", acmd[i]); - DPRINTF("\n"); + DPRINTF(""); } #endif @@ -1788,7 +1788,7 @@ ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis) handle_packet_cmd(p, slot, cfis); break; default: - WPRINTF("Unsupported cmd:%02x\n", cfis[2]); + WPRINTF("Unsupported cmd:%02x", cfis[2]); ahci_write_fis_d2h(p, slot, cfis, (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); break; @@ -1818,22 +1818,22 @@ ahci_handle_slot(struct ahci_port *p, int slot) #ifdef AHCI_DEBUG prdt = (struct ahci_prdt_entry *)(cfis + 0x80); - DPRINTF("\ncfis:"); + DPRINTF("cfis:"); for (i = 0; i < cfl; i++) { if (i % 10 == 0) - DPRINTF("\n"); + DPRINTF(""); DPRINTF("%02x ", cfis[i]); } - DPRINTF("\n"); + DPRINTF(""); for (i = 0; i < hdr->prdtl; i++) { - DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba); + DPRINTF("%d@%08"PRIx64"", prdt->dbc & 0x3fffff, prdt->dba); prdt++; } #endif if (cfis[0] != FIS_TYPE_REGH2D) { - WPRINTF("Not a H2D FIS:%02x\n", cfis[0]); + WPRINTF("Not a H2D FIS:%02x", cfis[0]); return; } @@ -1889,7 +1889,7 @@ ata_ioreq_cb(struct blockif_req *br, int err) uint8_t *cfis; int slot, ncq, dsm; - DPRINTF("%s %d\n", __func__, err); + DPRINTF("%s %d", __func__, err); ncq = dsm = 0; aior = br->br_param; @@ -1949,7 +1949,7 @@ ata_ioreq_cb(struct blockif_req *br, int err) ahci_handle_port(p); out: pthread_mutex_unlock(&sc->mtx); - DPRINTF("%s exit\n", __func__); + DPRINTF("%s exit", __func__); } static void @@ -1963,7 +1963,7 @@ atapi_ioreq_cb(struct blockif_req *br, int err) uint32_t tfd; int slot; - DPRINTF("%s %d\n", __func__, err); + DPRINTF("%s %d", __func__, err); aior = br->br_param; p = aior->io_pr; @@ -2011,7 +2011,7 @@ atapi_ioreq_cb(struct blockif_req *br, int err) ahci_handle_port(p); out: pthread_mutex_unlock(&sc->mtx); - DPRINTF("%s exit\n", __func__); + DPRINTF("%s exit", __func__); } static void @@ -2048,7 +2048,7 @@ pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value) offset = (offset - AHCI_OFFSET) % AHCI_STEP; struct ahci_port *p = &sc->port[port]; - DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n", + DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"", port, offset, value); switch (offset) { @@ -2120,7 +2120,7 @@ pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value) case AHCI_P_TFD: case AHCI_P_SIG: case AHCI_P_SSTS: - WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset); + WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"", offset); break; case AHCI_P_SCTL: p->sctl = value; @@ -2149,7 +2149,7 @@ pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value) static void pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value) { - DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n", + DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"", offset, value); switch (offset) { @@ -2157,7 +2157,7 @@ pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value) case AHCI_PI: case AHCI_VS: case AHCI_CAP2: - DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset); + DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"", offset); break; case AHCI_GHC: if (value & AHCI_GHC_HR) { @@ -2195,7 +2195,7 @@ pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP) pci_ahci_port_write(sc, offset, value); else - WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset); + WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"", offset); pthread_mutex_unlock(&sc->mtx); } @@ -2226,7 +2226,7 @@ pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset) value = 0; break; } - DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n", + DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x", offset, value); return (value); @@ -2267,7 +2267,7 @@ pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset) break; } - DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n", + DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x", port, offset, value); return value; @@ -2294,7 +2294,7 @@ pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, value = pci_ahci_port_read(sc, offset); else { value = 0; - WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", + WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"", regoff); } value >>= 8 * (regoff & 0x3); diff --git a/usr/src/cmd/bhyve/pci_e82545.c b/usr/src/cmd/bhyve/pci_e82545.c index 62a647e43e..8f2c4d810f 100644 --- a/usr/src/cmd/bhyve/pci_e82545.c +++ b/usr/src/cmd/bhyve/pci_e82545.c @@ -66,6 +66,7 @@ __FBSDID("$FreeBSD$"); #include "mii.h" #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "mevent.h" #include "net_utils.h" @@ -231,8 +232,8 @@ struct ck_info { * Debug printf */ static int e82545_debug = 0; -#define DPRINTF(msg,params...) if (e82545_debug) fprintf(stderr, "e82545: " msg, params) -#define WPRINTF(msg,params...) fprintf(stderr, "e82545: " msg, params) +#define WPRINTF(msg,params...) PRINTLN("e82545: " msg, params) +#define DPRINTF(msg,params...) if (e82545_debug) WPRINTF(msg, params) #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) @@ -404,21 +405,21 @@ e82545_init_eeprom(struct e82545_softc *sc) } checksum = NVM_SUM - checksum; sc->eeprom_data[NVM_CHECKSUM_REG] = checksum; - DPRINTF("eeprom checksum: 0x%x\r\n", checksum); + DPRINTF("eeprom checksum: 0x%x", checksum); } static void e82545_write_mdi(struct e82545_softc *sc, uint8_t reg_addr, uint8_t phy_addr, uint32_t data) { - DPRINTF("Write mdi reg:0x%x phy:0x%x data: 0x%x\r\n", reg_addr, phy_addr, data); + DPRINTF("Write mdi reg:0x%x phy:0x%x data: 0x%x", reg_addr, phy_addr, data); } static uint32_t e82545_read_mdi(struct e82545_softc *sc, uint8_t reg_addr, uint8_t phy_addr) { - //DPRINTF("Read mdi reg:0x%x phy:0x%x\r\n", reg_addr, phy_addr); + //DPRINTF("Read mdi reg:0x%x phy:0x%x", reg_addr, phy_addr); switch (reg_addr) { case PHY_STATUS: return (MII_SR_LINK_STATUS | MII_SR_AUTONEG_CAPS | @@ -435,7 +436,7 @@ e82545_read_mdi(struct e82545_softc *sc, uint8_t reg_addr, case PHY_ID2: return (M88E1011_I_PHY_ID | E82545_REVISION_4) & 0xFFFF; default: - DPRINTF("Unknown mdi read reg:0x%x phy:0x%x\r\n", reg_addr, phy_addr); + DPRINTF("Unknown mdi read reg:0x%x phy:0x%x", reg_addr, phy_addr); return 0; } /* not reached */ @@ -447,13 +448,13 @@ e82545_eecd_strobe(struct e82545_softc *sc) /* Microwire state machine */ /* DPRINTF("eeprom state machine srtobe " - "0x%x 0x%x 0x%x 0x%x\r\n", + "0x%x 0x%x 0x%x 0x%x", sc->nvm_mode, sc->nvm_bits, sc->nvm_opaddr, sc->nvm_data);*/ if (sc->nvm_bits == 0) { DPRINTF("eeprom state machine not expecting data! " - "0x%x 0x%x 0x%x 0x%x\r\n", + "0x%x 0x%x 0x%x 0x%x", sc->nvm_mode, sc->nvm_bits, sc->nvm_opaddr, sc->nvm_data); return; @@ -484,13 +485,13 @@ e82545_eecd_strobe(struct e82545_softc *sc) uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK; uint16_t addr = sc->nvm_opaddr & E82545_NVM_ADDR_MASK; if (op != E82545_NVM_OPCODE_WRITE) { - DPRINTF("Illegal eeprom write op 0x%x\r\n", + DPRINTF("Illegal eeprom write op 0x%x", sc->nvm_opaddr); } else if (addr >= E82545_NVM_EEPROM_SIZE) { - DPRINTF("Illegal eeprom write addr 0x%x\r\n", + DPRINTF("Illegal eeprom write addr 0x%x", sc->nvm_opaddr); } else { - DPRINTF("eeprom write eeprom[0x%x] = 0x%x\r\n", + DPRINTF("eeprom write eeprom[0x%x] = 0x%x", addr, sc->nvm_data); sc->eeprom_data[addr] = sc->nvm_data; } @@ -508,7 +509,7 @@ e82545_eecd_strobe(struct e82545_softc *sc) uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK; switch (op) { case E82545_NVM_OPCODE_EWEN: - DPRINTF("eeprom write enable: 0x%x\r\n", + DPRINTF("eeprom write enable: 0x%x", sc->nvm_opaddr); /* back to opcode mode */ sc->nvm_opaddr = 0; @@ -523,10 +524,10 @@ e82545_eecd_strobe(struct e82545_softc *sc) sc->nvm_bits = E82545_NVM_DATA_BITS; if (addr < E82545_NVM_EEPROM_SIZE) { sc->nvm_data = sc->eeprom_data[addr]; - DPRINTF("eeprom read: eeprom[0x%x] = 0x%x\r\n", + DPRINTF("eeprom read: eeprom[0x%x] = 0x%x", addr, sc->nvm_data); } else { - DPRINTF("eeprom illegal read: 0x%x\r\n", + DPRINTF("eeprom illegal read: 0x%x", sc->nvm_opaddr); sc->nvm_data = 0; } @@ -538,7 +539,7 @@ e82545_eecd_strobe(struct e82545_softc *sc) sc->nvm_data = 0; break; default: - DPRINTF("eeprom unknown op: 0x%x\r\r", + DPRINTF("eeprom unknown op: 0x%x", sc->nvm_opaddr); /* back to opcode mode */ sc->nvm_opaddr = 0; @@ -548,7 +549,7 @@ e82545_eecd_strobe(struct e82545_softc *sc) } } else { DPRINTF("eeprom state machine wrong state! " - "0x%x 0x%x 0x%x 0x%x\r\n", + "0x%x 0x%x 0x%x 0x%x", sc->nvm_mode, sc->nvm_bits, sc->nvm_opaddr, sc->nvm_data); } @@ -564,7 +565,7 @@ e82545_itr_callback(int fd, enum ev_type type, void *param) pthread_mutex_lock(&sc->esc_mtx); new = sc->esc_ICR & sc->esc_IMS; if (new && !sc->esc_irq_asserted) { - DPRINTF("itr callback: lintr assert %x\r\n", new); + DPRINTF("itr callback: lintr assert %x", new); sc->esc_irq_asserted = 1; pci_lintr_assert(sc->esc_pi); } else { @@ -580,7 +581,7 @@ e82545_icr_assert(struct e82545_softc *sc, uint32_t bits) { uint32_t new; - DPRINTF("icr assert: 0x%x\r\n", bits); + DPRINTF("icr assert: 0x%x", bits); /* * An interrupt is only generated if bits are set that @@ -591,11 +592,11 @@ e82545_icr_assert(struct e82545_softc *sc, uint32_t bits) sc->esc_ICR |= bits; if (new == 0) { - DPRINTF("icr assert: masked %x, ims %x\r\n", new, sc->esc_IMS); + DPRINTF("icr assert: masked %x, ims %x", new, sc->esc_IMS); } else if (sc->esc_mevpitr != NULL) { - DPRINTF("icr assert: throttled %x, ims %x\r\n", new, sc->esc_IMS); + DPRINTF("icr assert: throttled %x, ims %x", new, sc->esc_IMS); } else if (!sc->esc_irq_asserted) { - DPRINTF("icr assert: lintr assert %x\r\n", new); + DPRINTF("icr assert: lintr assert %x", new); sc->esc_irq_asserted = 1; pci_lintr_assert(sc->esc_pi); if (sc->esc_ITR != 0) { @@ -621,11 +622,11 @@ e82545_ims_change(struct e82545_softc *sc, uint32_t bits) sc->esc_IMS |= bits; if (new == 0) { - DPRINTF("ims change: masked %x, ims %x\r\n", new, sc->esc_IMS); + DPRINTF("ims change: masked %x, ims %x", new, sc->esc_IMS); } else if (sc->esc_mevpitr != NULL) { - DPRINTF("ims change: throttled %x, ims %x\r\n", new, sc->esc_IMS); + DPRINTF("ims change: throttled %x, ims %x", new, sc->esc_IMS); } else if (!sc->esc_irq_asserted) { - DPRINTF("ims change: lintr assert %x\n\r", new); + DPRINTF("ims change: lintr assert %x", new); sc->esc_irq_asserted = 1; pci_lintr_assert(sc->esc_pi); if (sc->esc_ITR != 0) { @@ -642,7 +643,7 @@ static void e82545_icr_deassert(struct e82545_softc *sc, uint32_t bits) { - DPRINTF("icr deassert: 0x%x\r\n", bits); + DPRINTF("icr deassert: 0x%x", bits); sc->esc_ICR &= ~bits; /* @@ -650,7 +651,7 @@ e82545_icr_deassert(struct e82545_softc *sc, uint32_t bits) * was an asserted interrupt, clear it */ if (sc->esc_irq_asserted && !(sc->esc_ICR & sc->esc_IMS)) { - DPRINTF("icr deassert: lintr deassert %x\r\n", bits); + DPRINTF("icr deassert: lintr deassert %x", bits); pci_lintr_deassert(sc->esc_pi); sc->esc_irq_asserted = 0; } @@ -660,7 +661,7 @@ static void e82545_intr_write(struct e82545_softc *sc, uint32_t offset, uint32_t value) { - DPRINTF("intr_write: off %x, val %x\n\r", offset, value); + DPRINTF("intr_write: off %x, val %x", offset, value); switch (offset) { case E1000_ICR: @@ -694,7 +695,7 @@ e82545_intr_read(struct e82545_softc *sc, uint32_t offset) retval = 0; - DPRINTF("intr_read: off %x\n\r", offset); + DPRINTF("intr_read: off %x", offset); switch (offset) { case E1000_ICR: @@ -728,7 +729,7 @@ e82545_devctl(struct e82545_softc *sc, uint32_t val) sc->esc_CTRL = val & ~E1000_CTRL_RST; if (val & E1000_CTRL_RST) { - DPRINTF("e1k: s/w reset, ctl %x\n", val); + DPRINTF("e1k: s/w reset, ctl %x", val); e82545_reset(sc, 1); } /* XXX check for phy reset ? */ @@ -757,7 +758,7 @@ e82545_rx_ctl(struct e82545_softc *sc, uint32_t val) /* Save RCTL after stripping reserved bits 31:27,24,21,14,11:10,0 */ sc->esc_RCTL = val & ~0xF9204c01; - DPRINTF("rx_ctl - %s RCTL %x, val %x\n", + DPRINTF("rx_ctl - %s RCTL %x, val %x", on ? "on" : "off", sc->esc_RCTL, val); /* state change requested */ @@ -850,10 +851,10 @@ e82545_tap_callback(int fd, enum ev_type type, void *param) uint16_t *tp, tag, head; pthread_mutex_lock(&sc->esc_mtx); - DPRINTF("rx_run: head %x, tail %x\r\n", sc->esc_RDH, sc->esc_RDT); + DPRINTF("rx_run: head %x, tail %x", sc->esc_RDH, sc->esc_RDT); if (!sc->esc_rx_enabled || sc->esc_rx_loopback) { - DPRINTF("rx disabled (!%d || %d) -- packet(s) dropped\r\n", + DPRINTF("rx disabled (!%d || %d) -- packet(s) dropped", sc->esc_rx_enabled, sc->esc_rx_loopback); while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) { } @@ -866,7 +867,7 @@ e82545_tap_callback(int fd, enum ev_type type, void *param) head = sc->esc_RDH; left = (size + sc->esc_RDT - head) % size; if (left < maxpktdesc) { - DPRINTF("rx overflow (%d < %d) -- packet(s) dropped\r\n", + DPRINTF("rx overflow (%d < %d) -- packet(s) dropped", left, maxpktdesc); while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) { } @@ -902,7 +903,7 @@ e82545_tap_callback(int fd, enum ev_type type, void *param) len += ETHER_CRC_LEN; n = (len + bufsz - 1) / bufsz; - DPRINTF("packet read %d bytes, %d segs, head %d\r\n", + DPRINTF("packet read %d bytes, %d segs, head %d", len, n, head); /* Apply VLAN filter. */ @@ -912,9 +913,9 @@ e82545_tap_callback(int fd, enum ev_type type, void *param) tag = ntohs(tp[1]) & 0x0fff; if ((sc->esc_fvlan[tag >> 5] & (1 << (tag & 0x1f))) != 0) { - DPRINTF("known VLAN %d\r\n", tag); + DPRINTF("known VLAN %d", tag); } else { - DPRINTF("unknown VLAN %d\r\n", tag); + DPRINTF("unknown VLAN %d", tag); n = 0; continue; } @@ -965,7 +966,7 @@ done: if (cause != 0) e82545_icr_assert(sc, cause); done1: - DPRINTF("rx_run done: head %x, tail %x\r\n", sc->esc_RDH, sc->esc_RDT); + DPRINTF("rx_run done: head %x, tail %x", sc->esc_RDH, sc->esc_RDT); pthread_mutex_unlock(&sc->esc_mtx); } #endif @@ -1056,7 +1057,7 @@ e82545_transmit_checksum(struct iovec *iov, int iovcnt, struct ck_info *ck) uint16_t cksum; int cklen; - DPRINTF("tx cksum: iovcnt/s/off/len %d/%d/%d/%d\r\n", + DPRINTF("tx cksum: iovcnt/s/off/len %d/%d/%d/%d", iovcnt, ck->ck_start, ck->ck_off, ck->ck_len); cklen = ck->ck_len ? ck->ck_len - ck->ck_start + 1 : INT_MAX; cksum = e82545_iov_checksum(iov, iovcnt, ck->ck_start, cklen); @@ -1131,14 +1132,14 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, switch (dtype) { case E1000_TXD_TYP_C: DPRINTF("tx ctxt desc idx %d: %016jx " - "%08x%08x\r\n", + "%08x%08x", head, dsc->td.buffer_addr, dsc->td.upper.data, dsc->td.lower.data); /* Save context and return */ sc->esc_txctx = dsc->cd; goto done; case E1000_TXD_TYP_L: - DPRINTF("tx legacy desc idx %d: %08x%08x\r\n", + DPRINTF("tx legacy desc idx %d: %08x%08x", head, dsc->td.upper.data, dsc->td.lower.data); /* * legacy cksum start valid in first descriptor @@ -1147,7 +1148,7 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, ckinfo[0].ck_start = dsc->td.upper.fields.css; break; case E1000_TXD_TYP_D: - DPRINTF("tx data desc idx %d: %08x%08x\r\n", + DPRINTF("tx data desc idx %d: %08x%08x", head, dsc->td.upper.data, dsc->td.lower.data); ntype = dtype; break; @@ -1157,7 +1158,7 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, } else { /* Descriptor type must be consistent */ assert(dtype == ntype); - DPRINTF("tx next desc idx %d: %08x%08x\r\n", + DPRINTF("tx next desc idx %d: %08x%08x", head, dsc->td.upper.data, dsc->td.lower.data); } @@ -1224,7 +1225,7 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, } if (iovcnt > I82545_MAX_TXSEGS) { - WPRINTF("tx too many descriptors (%d > %d) -- dropped\r\n", + WPRINTF("tx too many descriptors (%d > %d) -- dropped", iovcnt, I82545_MAX_TXSEGS); goto done; } @@ -1405,7 +1406,7 @@ e82545_tx_run(struct e82545_softc *sc) head = sc->esc_TDH; tail = sc->esc_TDT; size = sc->esc_TDLEN / 16; - DPRINTF("tx_run: head %x, rhead %x, tail %x\r\n", + DPRINTF("tx_run: head %x, rhead %x, tail %x", sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT); pthread_mutex_unlock(&sc->esc_mtx); @@ -1429,7 +1430,7 @@ e82545_tx_run(struct e82545_softc *sc) if (cause) e82545_icr_assert(sc, cause); - DPRINTF("tx_run done: head %x, rhead %x, tail %x\r\n", + DPRINTF("tx_run done: head %x, rhead %x, tail %x", sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT); } @@ -1559,10 +1560,10 @@ e82545_write_register(struct e82545_softc *sc, uint32_t offset, uint32_t value) int ridx; if (offset & 0x3) { - DPRINTF("Unaligned register write offset:0x%x value:0x%x\r\n", offset, value); + DPRINTF("Unaligned register write offset:0x%x value:0x%x", offset, value); return; } - DPRINTF("Register write: 0x%x value: 0x%x\r\n", offset, value); + DPRINTF("Register write: 0x%x value: 0x%x", offset, value); switch (offset) { case E1000_CTRL: @@ -1706,7 +1707,7 @@ e82545_write_register(struct e82545_softc *sc, uint32_t offset, uint32_t value) break; case E1000_EECD: { - //DPRINTF("EECD write 0x%x -> 0x%x\r\n", sc->eeprom_control, value); + //DPRINTF("EECD write 0x%x -> 0x%x", sc->eeprom_control, value); /* edge triggered low->high */ uint32_t eecd_strobe = ((sc->eeprom_control & E1000_EECD_SK) ? 0 : (value & E1000_EECD_SK)); @@ -1734,7 +1735,7 @@ e82545_write_register(struct e82545_softc *sc, uint32_t offset, uint32_t value) sc->mdi_control = (value & ~(E1000_MDIC_ERROR|E1000_MDIC_DEST)); if ((value & E1000_MDIC_READY) != 0) { - DPRINTF("Incorrect MDIC ready bit: 0x%x\r\n", value); + DPRINTF("Incorrect MDIC ready bit: 0x%x", value); return; } switch (value & E82545_MDIC_OP_MASK) { @@ -1747,7 +1748,7 @@ e82545_write_register(struct e82545_softc *sc, uint32_t offset, uint32_t value) value & E82545_MDIC_DATA_MASK); break; default: - DPRINTF("Unknown MDIC op: 0x%x\r\n", value); + DPRINTF("Unknown MDIC op: 0x%x", value); return; } /* TODO: barrier? */ @@ -1761,7 +1762,7 @@ e82545_write_register(struct e82545_softc *sc, uint32_t offset, uint32_t value) case E1000_STATUS: return; default: - DPRINTF("Unknown write register: 0x%x value:%x\r\n", offset, value); + DPRINTF("Unknown write register: 0x%x value:%x", offset, value); return; } } @@ -1773,11 +1774,11 @@ e82545_read_register(struct e82545_softc *sc, uint32_t offset) int ridx; if (offset & 0x3) { - DPRINTF("Unaligned register read offset:0x%x\r\n", offset); + DPRINTF("Unaligned register read offset:0x%x", offset); return 0; } - DPRINTF("Register read: 0x%x\r\n", offset); + DPRINTF("Register read: 0x%x", offset); switch (offset) { case E1000_CTRL: @@ -1902,7 +1903,7 @@ e82545_read_register(struct e82545_softc *sc, uint32_t offset) retval = sc->esc_fvlan[(offset - E1000_VFTA) >> 2]; break; case E1000_EECD: - //DPRINTF("EECD read %x\r\n", sc->eeprom_control); + //DPRINTF("EECD read %x", sc->eeprom_control); retval = sc->eeprom_control; break; case E1000_MDIC: @@ -2032,7 +2033,7 @@ e82545_read_register(struct e82545_softc *sc, uint32_t offset) retval = 0; break; default: - DPRINTF("Unknown read register: 0x%x\r\n", offset); + DPRINTF("Unknown read register: 0x%x", offset); retval = 0; break; } @@ -2046,7 +2047,7 @@ e82545_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, { struct e82545_softc *sc; - //DPRINTF("Write bar:%d offset:0x%lx value:0x%lx size:%d\r\n", baridx, offset, value, size); + //DPRINTF("Write bar:%d offset:0x%lx value:0x%lx size:%d", baridx, offset, value, size); sc = pi->pi_arg; @@ -2057,33 +2058,33 @@ e82545_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, switch (offset) { case E82545_IOADDR: if (size != 4) { - DPRINTF("Wrong io addr write sz:%d value:0x%lx\r\n", size, value); + DPRINTF("Wrong io addr write sz:%d value:0x%lx", size, value); } else sc->io_addr = (uint32_t)value; break; case E82545_IODATA: if (size != 4) { - DPRINTF("Wrong io data write size:%d value:0x%lx\r\n", size, value); + DPRINTF("Wrong io data write size:%d value:0x%lx", size, value); } else if (sc->io_addr > E82545_IO_REGISTER_MAX) { - DPRINTF("Non-register io write addr:0x%x value:0x%lx\r\n", sc->io_addr, value); + DPRINTF("Non-register io write addr:0x%x value:0x%lx", sc->io_addr, value); } else e82545_write_register(sc, sc->io_addr, (uint32_t)value); break; default: - DPRINTF("Unknown io bar write offset:0x%lx value:0x%lx size:%d\r\n", offset, value, size); + DPRINTF("Unknown io bar write offset:0x%lx value:0x%lx size:%d", offset, value, size); break; } break; case E82545_BAR_REGISTER: if (size != 4) { - DPRINTF("Wrong register write size:%d offset:0x%lx value:0x%lx\r\n", size, offset, value); + DPRINTF("Wrong register write size:%d offset:0x%lx value:0x%lx", size, offset, value); } else e82545_write_register(sc, (uint32_t)offset, (uint32_t)value); break; default: - DPRINTF("Unknown write bar:%d off:0x%lx val:0x%lx size:%d\r\n", + DPRINTF("Unknown write bar:%d off:0x%lx val:0x%lx size:%d", baridx, offset, value, size); } @@ -2097,7 +2098,7 @@ e82545_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, struct e82545_softc *sc; uint64_t retval; - //DPRINTF("Read bar:%d offset:0x%lx size:%d\r\n", baridx, offset, size); + //DPRINTF("Read bar:%d offset:0x%lx size:%d", baridx, offset, size); sc = pi->pi_arg; retval = 0; @@ -2108,35 +2109,35 @@ e82545_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, switch (offset) { case E82545_IOADDR: if (size != 4) { - DPRINTF("Wrong io addr read sz:%d\r\n", size); + DPRINTF("Wrong io addr read sz:%d", size); } else retval = sc->io_addr; break; case E82545_IODATA: if (size != 4) { - DPRINTF("Wrong io data read sz:%d\r\n", size); + DPRINTF("Wrong io data read sz:%d", size); } if (sc->io_addr > E82545_IO_REGISTER_MAX) { - DPRINTF("Non-register io read addr:0x%x\r\n", + DPRINTF("Non-register io read addr:0x%x", sc->io_addr); } else retval = e82545_read_register(sc, sc->io_addr); break; default: - DPRINTF("Unknown io bar read offset:0x%lx size:%d\r\n", + DPRINTF("Unknown io bar read offset:0x%lx size:%d", offset, size); break; } break; case E82545_BAR_REGISTER: if (size != 4) { - DPRINTF("Wrong register read size:%d offset:0x%lx\r\n", + DPRINTF("Wrong register read size:%d offset:0x%lx", size, offset); } else retval = e82545_read_register(sc, (uint32_t)offset); break; default: - DPRINTF("Unknown read bar:%d offset:0x%lx size:%d\r\n", + DPRINTF("Unknown read bar:%d offset:0x%lx size:%d", baridx, offset, size); break; } @@ -2291,11 +2292,11 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { char nstr[80]; struct e82545_softc *sc; - char *devname; + char *optscopy; char *vtopts; int mac_provided; - DPRINTF("Loading with options: %s\r\n", opts); + DPRINTF("Loading with options: %s", opts); /* Setup our softc */ sc = calloc(1, sizeof(*sc)); @@ -2340,25 +2341,38 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) mac_provided = 0; sc->esc_tapfd = -1; if (opts != NULL) { - int err; + int err = 0; - devname = vtopts = strdup(opts); + optscopy = vtopts = strdup(opts); (void) strsep(&vtopts, ","); - if (vtopts != NULL) { - err = net_parsemac(vtopts, sc->esc_mac.octet); - if (err != 0) { - free(devname); - return (err); + /* + * Parse the list of options in the form + * key1=value1,...,keyN=valueN. + */ + while (vtopts != NULL) { + char *value = vtopts; + char *key; + + key = strsep(&value, "="); + if (value == NULL) + break; + vtopts = value; + (void) strsep(&vtopts, ","); + + if (strcmp(key, "mac") == 0) { + err = net_parsemac(value, sc->esc_mac.octet); + if (err) + break; + mac_provided = 1; } - mac_provided = 1; } - if (strncmp(devname, "tap", 3) == 0 || - strncmp(devname, "vmnet", 5) == 0) - e82545_open_tap(sc, devname); + if (strncmp(optscopy, "tap", 3) == 0 || + strncmp(optscopy, "vmnet", 5) == 0) + e82545_open_tap(sc, optscopy); - free(devname); + free(optscopy); } if (!mac_provided) { diff --git a/usr/src/cmd/bhyve/pci_emul.c b/usr/src/cmd/bhyve/pci_emul.c index 771cf4e77e..c510116e19 100644 --- a/usr/src/cmd/bhyve/pci_emul.c +++ b/usr/src/cmd/bhyve/pci_emul.c @@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$"); #include "acpi.h" #include "bhyverun.h" +#include "debug.h" #include "inout.h" #include "ioapic.h" #include "mem.h" @@ -175,7 +176,7 @@ static void pci_parse_slot_usage(char *aopt) { - fprintf(stderr, "Invalid PCI slot info field \"%s\"\n", aopt); + EPRINTLN("Invalid PCI slot info field \"%s\"", aopt); } int @@ -228,13 +229,13 @@ pci_parse_slot(char *opt) si = &bi->slotinfo[snum]; if (si->si_funcs[fnum].fi_name != NULL) { - fprintf(stderr, "pci slot %d:%d already occupied!\n", + EPRINTLN("pci slot %d:%d already occupied!", snum, fnum); goto done; } if (pci_emul_finddev(emul) == NULL) { - fprintf(stderr, "pci slot %d:%d: unknown device \"%s\"\n", + EPRINTLN("pci slot %d:%d: unknown device \"%s\"", snum, fnum, emul); goto done; } @@ -892,7 +893,7 @@ pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) sizeof(msixcap))); } -void +static void msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, int bytes, uint32_t val) { @@ -916,7 +917,7 @@ msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, CFGWRITE(pi, offset, val, bytes); } -void +static void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, int bytes, uint32_t val) { @@ -933,26 +934,26 @@ msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, msgctrl &= ~rwmask; msgctrl |= val & rwmask; val = msgctrl; - - addrlo = pci_get_cfgdata32(pi, capoff + 4); - if (msgctrl & PCIM_MSICTRL_64BIT) - msgdata = pci_get_cfgdata16(pi, capoff + 12); - else - msgdata = pci_get_cfgdata16(pi, capoff + 8); - - mme = msgctrl & PCIM_MSICTRL_MME_MASK; - pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; - if (pi->pi_msi.enabled) { - pi->pi_msi.addr = addrlo; - pi->pi_msi.msg_data = msgdata; - pi->pi_msi.maxmsgnum = 1 << (mme >> 4); - } else { - pi->pi_msi.maxmsgnum = 0; - } - pci_lintr_update(pi); } - CFGWRITE(pi, offset, val, bytes); + + msgctrl = pci_get_cfgdata16(pi, capoff + 2); + addrlo = pci_get_cfgdata32(pi, capoff + 4); + if (msgctrl & PCIM_MSICTRL_64BIT) + msgdata = pci_get_cfgdata16(pi, capoff + 12); + else + msgdata = pci_get_cfgdata16(pi, capoff + 8); + + mme = msgctrl & PCIM_MSICTRL_MME_MASK; + pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; + if (pi->pi_msi.enabled) { + pi->pi_msi.addr = addrlo; + pi->pi_msi.msg_data = msgdata; + pi->pi_msi.maxmsgnum = 1 << (mme >> 4); + } else { + pi->pi_msi.maxmsgnum = 0; + } + pci_lintr_update(pi); } void @@ -995,30 +996,34 @@ pci_emul_add_pciecap(struct pci_devinst *pi, int type) /* * This function assumes that 'coff' is in the capabilities region of the - * config space. + * config space. A capoff parameter of zero will force a search for the + * offset and type. */ -static void -pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val) +void +pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val, + uint8_t capoff, int capid) { - int capid; - uint8_t capoff, nextoff; + uint8_t nextoff; /* Do not allow un-aligned writes */ if ((offset & (bytes - 1)) != 0) return; - /* Find the capability that we want to update */ - capoff = CAP_START_OFFSET; - while (1) { - nextoff = pci_get_cfgdata8(pi, capoff + 1); - if (nextoff == 0) - break; - if (offset >= capoff && offset < nextoff) - break; + if (capoff == 0) { + /* Find the capability that we want to update */ + capoff = CAP_START_OFFSET; + while (1) { + nextoff = pci_get_cfgdata8(pi, capoff + 1); + if (nextoff == 0) + break; + if (offset >= capoff && offset < nextoff) + break; - capoff = nextoff; + capoff = nextoff; + } + assert(offset >= capoff); + capid = pci_get_cfgdata8(pi, capoff); } - assert(offset >= capoff); /* * Capability ID and Next Capability Pointer are readonly. @@ -1035,7 +1040,6 @@ pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val) return; } - capid = pci_get_cfgdata8(pi, capoff); switch (capid) { case PCIY_MSI: msicap_cfgwrite(pi, capoff, offset, bytes, val); @@ -1287,7 +1291,6 @@ pci_bus_write_dsdt(int bus) dsdt_line(" Device (PC%02X)", bus); dsdt_line(" {"); dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); - dsdt_line(" Name (_ADR, Zero)"); dsdt_line(" Method (_BBN, 0, NotSerialized)"); dsdt_line(" {"); @@ -1921,7 +1924,7 @@ pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, pci_set_cfgdata32(pi, coff, bar); } else if (pci_emul_iscap(pi, coff)) { - pci_emul_capwrite(pi, coff, bytes, *eax); + pci_emul_capwrite(pi, coff, bytes, *eax, 0, 0); } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { pci_emul_cmdsts_write(pi, coff, *eax, bytes); } else { diff --git a/usr/src/cmd/bhyve/pci_emul.h b/usr/src/cmd/bhyve/pci_emul.h index 51de897543..d3dd9a2f46 100644 --- a/usr/src/cmd/bhyve/pci_emul.h +++ b/usr/src/cmd/bhyve/pci_emul.h @@ -219,10 +219,6 @@ typedef void (*pci_lintr_cb)(int b, int s, int pin, int pirq_pin, int ioapic_irq, void *arg); int init_pci(struct vmctx *ctx); -void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, - int bytes, uint32_t val); -void msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, - int bytes, uint32_t val); void pci_callback(void); int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, uint64_t size); @@ -230,6 +226,8 @@ int pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, enum pcibar_type type, uint64_t size); int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum); int pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type); +void pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, + uint32_t val, uint8_t capoff, int capid); void pci_emul_cmd_changed(struct pci_devinst *pi, uint16_t old); void pci_generate_msi(struct pci_devinst *pi, int msgnum); void pci_generate_msix(struct pci_devinst *pi, int msgnum); diff --git a/usr/src/cmd/bhyve/pci_fbuf.c b/usr/src/cmd/bhyve/pci_fbuf.c index 1b2eb03b9b..d945545b9d 100644 --- a/usr/src/cmd/bhyve/pci_fbuf.c +++ b/usr/src/cmd/bhyve/pci_fbuf.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include "bhyvegc.h" #include "bhyverun.h" +#include "debug.h" #include "console.h" #include "inout.h" #include "pci_emul.h" @@ -64,7 +65,7 @@ __FBSDID("$FreeBSD$"); static int fbuf_debug = 1; #define DEBUG_INFO 1 #define DEBUG_VERBOSE 4 -#define DPRINTF(level, params) if (level <= fbuf_debug) printf params +#define DPRINTF(level, params) if (level <= fbuf_debug) PRINTLN params #define KB (1024UL) @@ -121,9 +122,9 @@ static void pci_fbuf_usage(char *opt) { - fprintf(stderr, "Invalid fbuf emulation option \"%s\"\r\n", opt); - fprintf(stderr, "fbuf: {wait,}{vga=on|io|off,}rfb=<ip>:port" - "{,w=width}{,h=height}\r\n"); + EPRINTLN("Invalid fbuf emulation option \"%s\"", opt); + EPRINTLN("fbuf: {wait,}{vga=on|io|off,}rfb=<ip>:port" + "{,w=width}{,h=height}"); } static void @@ -138,7 +139,7 @@ pci_fbuf_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, sc = pi->pi_arg; DPRINTF(DEBUG_VERBOSE, - ("fbuf wr: offset 0x%lx, size: %d, value: 0x%lx\n", + ("fbuf wr: offset 0x%lx, size: %d, value: 0x%lx", offset, size, value)); if (offset + size > DMEMSZ) { @@ -169,13 +170,13 @@ pci_fbuf_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, if (!sc->gc_image->vgamode && sc->memregs.width == 0 && sc->memregs.height == 0) { - DPRINTF(DEBUG_INFO, ("switching to VGA mode\r\n")); + DPRINTF(DEBUG_INFO, ("switching to VGA mode")); sc->gc_image->vgamode = 1; sc->gc_width = 0; sc->gc_height = 0; } else if (sc->gc_image->vgamode && sc->memregs.width != 0 && sc->memregs.height != 0) { - DPRINTF(DEBUG_INFO, ("switching to VESA mode\r\n")); + DPRINTF(DEBUG_INFO, ("switching to VESA mode")); sc->gc_image->vgamode = 0; } } @@ -220,7 +221,7 @@ pci_fbuf_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, } DPRINTF(DEBUG_VERBOSE, - ("fbuf rd: offset 0x%lx, size: %d, value: 0x%lx\n", + ("fbuf rd: offset 0x%lx, size: %d, value: 0x%lx", offset, size, value)); return (value); @@ -249,7 +250,7 @@ pci_fbuf_parse_opts(struct pci_fbuf_softc *sc, char *opts) *config++ = '\0'; - DPRINTF(DEBUG_VERBOSE, ("pci_fbuf option %s = %s\r\n", + DPRINTF(DEBUG_VERBOSE, ("pci_fbuf option %s = %s", xopts, config)); if (!strcmp(xopts, "tcp") || !strcmp(xopts, "rfb")) { @@ -363,7 +364,7 @@ pci_fbuf_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) struct pci_fbuf_softc *sc; if (fbuf_sc != NULL) { - fprintf(stderr, "Only one frame buffer device is allowed.\n"); + EPRINTLN("Only one frame buffer device is allowed."); return (-1); } @@ -403,7 +404,7 @@ pci_fbuf_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) /* XXX until VGA rendering is enabled */ if (sc->vga_full != 0) { - fprintf(stderr, "pci_fbuf: VGA rendering not enabled"); + EPRINTLN("pci_fbuf: VGA rendering not enabled"); goto done; } @@ -412,7 +413,7 @@ pci_fbuf_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) error = -1; goto done; } - DPRINTF(DEBUG_INFO, ("fbuf frame buffer base: %p [sz %lu]\r\n", + DPRINTF(DEBUG_INFO, ("fbuf frame buffer base: %p [sz %lu]", sc->fb_base, FB_SIZE)); /* @@ -423,7 +424,7 @@ pci_fbuf_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) */ prot = PROT_READ | PROT_WRITE; if (vm_mmap_memseg(ctx, sc->fbaddr, VM_FRAMEBUFFER, 0, FB_SIZE, prot) != 0) { - fprintf(stderr, "pci_fbuf: mapseg failed - try deleting VM and restarting\n"); + EPRINTLN("pci_fbuf: mapseg failed - try deleting VM and restarting"); error = -1; goto done; } diff --git a/usr/src/cmd/bhyve/pci_hda.c b/usr/src/cmd/bhyve/pci_hda.c index e0324f46a9..86e46a550a 100644 --- a/usr/src/cmd/bhyve/pci_hda.c +++ b/usr/src/cmd/bhyve/pci_hda.c @@ -332,11 +332,11 @@ hda_parse_config(const char *opts, const char *key, char *val) len = strlen(opts); if (len >= sizeof(buf)) { - DPRINTF("Opts too big\n"); + DPRINTF("Opts too big"); return (0); } - DPRINTF("opts: %s\n", opts); + DPRINTF("opts: %s", opts); strcpy(buf, opts); @@ -377,7 +377,7 @@ hda_init(const char *opts) dbg = fopen("/tmp/bhyve_hda.log", "w+"); #endif - DPRINTF("opts: %s\n", opts); + DPRINTF("opts: %s", opts); sc = calloc(1, sizeof(*sc)); if (!sc) @@ -393,7 +393,7 @@ hda_init(const char *opts) if (codec) { p = hda_parse_config(opts, "play=", play); r = hda_parse_config(opts, "rec=", rec); - DPRINTF("play: %s rec: %s\n", play, rec); + DPRINTF("play: %s rec: %s", play, rec); if (p | r) { err = hda_codec_constructor(sc, codec, p ? \ play : NULL, r ? rec : NULL, NULL); @@ -489,7 +489,7 @@ hda_codec_constructor(struct hda_softc *sc, struct hda_codec_class *codec, sc->codecs[sc->codecs_no++] = hci; if (!codec->init) { - DPRINTF("This codec does not implement the init function\n"); + DPRINTF("This codec does not implement the init function"); return (-1); } @@ -522,13 +522,13 @@ hda_send_command(struct hda_softc *sc, uint32_t verb) if (!hci) return (-1); - DPRINTF("cad: 0x%x verb: 0x%x\n", cad, verb); + DPRINTF("cad: 0x%x verb: 0x%x", cad, verb); codec = hci->codec; assert(codec); if (!codec->command) { - DPRINTF("This codec does not implement the command function\n"); + DPRINTF("This codec does not implement the command function"); return (-1); } @@ -592,7 +592,7 @@ hda_reset_regs(struct hda_softc *sc) uint32_t off = 0; uint8_t i; - DPRINTF("Reset the HDA controller registers ...\n"); + DPRINTF("Reset the HDA controller registers ..."); memset(sc->regs, 0, sizeof(sc->regs)); @@ -620,7 +620,7 @@ hda_stream_reset(struct hda_softc *sc, uint8_t stream_ind) struct hda_stream_desc *st = &sc->streams[stream_ind]; uint32_t off = hda_get_offset_stream(stream_ind); - DPRINTF("Reset the HDA stream: 0x%x\n", stream_ind); + DPRINTF("Reset the HDA stream: 0x%x", stream_ind); /* Reset the Stream Descriptor registers */ memset(sc->regs + HDA_STREAM_REGS_BASE + off, 0, HDA_STREAM_REGS_LEN); @@ -670,11 +670,11 @@ hda_stream_start(struct hda_softc *sc, uint8_t stream_ind) bdl_vaddr = hda_dma_get_vaddr(sc, bdl_paddr, HDA_BDL_ENTRY_LEN * bdl_cnt); if (!bdl_vaddr) { - DPRINTF("Fail to get the guest virtual address\n"); + DPRINTF("Fail to get the guest virtual address"); return (-1); } - DPRINTF("stream: 0x%x bdl_cnt: 0x%x bdl_paddr: 0x%lx\n", + DPRINTF("stream: 0x%x bdl_cnt: 0x%x bdl_paddr: 0x%lx", stream_ind, bdl_cnt, bdl_paddr); st->bdl_cnt = bdl_cnt; @@ -690,7 +690,7 @@ hda_stream_start(struct hda_softc *sc, uint8_t stream_ind) bdle_paddr = bdle_addrl | (bdle_addrh << 32); bdle_vaddr = hda_dma_get_vaddr(sc, bdle_paddr, bdle_sz); if (!bdle_vaddr) { - DPRINTF("Fail to get the guest virtual address\n"); + DPRINTF("Fail to get the guest virtual address"); return (-1); } @@ -699,14 +699,14 @@ hda_stream_start(struct hda_softc *sc, uint8_t stream_ind) bdle_desc->len = bdle_sz; bdle_desc->ioc = bdle->ioc; - DPRINTF("bdle: 0x%x bdle_sz: 0x%x\n", i, bdle_sz); + DPRINTF("bdle: 0x%x bdle_sz: 0x%x", i, bdle_sz); } sdctl = hda_get_reg_by_offset(sc, off + HDAC_SDCTL0); strm = (sdctl >> 20) & 0x0f; dir = stream_ind >= HDA_ISS_NO; - DPRINTF("strm: 0x%x, dir: 0x%x\n", strm, dir); + DPRINTF("strm: 0x%x, dir: 0x%x", strm, dir); sc->stream_map[dir][strm] = stream_ind; st->stream = strm; @@ -730,7 +730,7 @@ hda_stream_stop(struct hda_softc *sc, uint8_t stream_ind) uint8_t strm = st->stream; uint8_t dir = st->dir; - DPRINTF("stream: 0x%x, strm: 0x%x, dir: 0x%x\n", stream_ind, strm, dir); + DPRINTF("stream: 0x%x, strm: 0x%x, dir: 0x%x", stream_ind, strm, dir); st->run = 0; @@ -771,10 +771,10 @@ hda_print_cmd_ctl_data(struct hda_codec_cmd_ctl *p) #if DEBUG_HDA == 1 char *name = p->name; #endif - DPRINTF("%s size: %d\n", name, p->size); - DPRINTF("%s dma_vaddr: %p\n", name, p->dma_vaddr); - DPRINTF("%s wp: 0x%x\n", name, p->wp); - DPRINTF("%s rp: 0x%x\n", name, p->rp); + DPRINTF("%s size: %d", name, p->size); + DPRINTF("%s dma_vaddr: %p", name, p->dma_vaddr); + DPRINTF("%s wp: 0x%x", name, p->wp); + DPRINTF("%s rp: 0x%x", name, p->rp); } static int @@ -793,7 +793,7 @@ hda_corb_start(struct hda_softc *sc) corb->size = hda_corb_sizes[corbsize]; if (!corb->size) { - DPRINTF("Invalid corb size\n"); + DPRINTF("Invalid corb size"); return (-1); } @@ -801,12 +801,12 @@ hda_corb_start(struct hda_softc *sc) corbubase = hda_get_reg_by_offset(sc, HDAC_CORBUBASE); corbpaddr = corblbase | (corbubase << 32); - DPRINTF("CORB dma_paddr: %p\n", (void *)corbpaddr); + DPRINTF("CORB dma_paddr: %p", (void *)corbpaddr); corb->dma_vaddr = hda_dma_get_vaddr(sc, corbpaddr, HDA_CORB_ENTRY_LEN * corb->size); if (!corb->dma_vaddr) { - DPRINTF("Fail to get the guest virtual address\n"); + DPRINTF("Fail to get the guest virtual address"); return (-1); } @@ -864,7 +864,7 @@ hda_rirb_start(struct hda_softc *sc) rirb->size = hda_rirb_sizes[rirbsize]; if (!rirb->size) { - DPRINTF("Invalid rirb size\n"); + DPRINTF("Invalid rirb size"); return (-1); } @@ -872,12 +872,12 @@ hda_rirb_start(struct hda_softc *sc) rirbubase = hda_get_reg_by_offset(sc, HDAC_RIRBUBASE); rirbpaddr = rirblbase | (rirbubase << 32); - DPRINTF("RIRB dma_paddr: %p\n", (void *)rirbpaddr); + DPRINTF("RIRB dma_paddr: %p", (void *)rirbpaddr); rirb->dma_vaddr = hda_dma_get_vaddr(sc, rirbpaddr, HDA_RIRB_ENTRY_LEN * rirb->size); if (!rirb->dma_vaddr) { - DPRINTF("Fail to get the guest virtual address\n"); + DPRINTF("Fail to get the guest virtual address"); return (-1); } @@ -1022,18 +1022,18 @@ hda_set_dpiblbase(struct hda_softc *sc, uint32_t offset, uint32_t old) dpibubase = hda_get_reg_by_offset(sc, HDAC_DPIBUBASE); dpibpaddr = dpiblbase | (dpibubase << 32); - DPRINTF("DMA Position In Buffer dma_paddr: %p\n", + DPRINTF("DMA Position In Buffer dma_paddr: %p", (void *)dpibpaddr); sc->dma_pib_vaddr = hda_dma_get_vaddr(sc, dpibpaddr, HDA_DMA_PIB_ENTRY_LEN * HDA_IOSS_NO); if (!sc->dma_pib_vaddr) { DPRINTF("Fail to get the guest \ - virtual address\n"); + virtual address"); assert(0); } } else { - DPRINTF("DMA Position In Buffer Reset\n"); + DPRINTF("DMA Position In Buffer Reset"); sc->dma_pib_vaddr = NULL; } } @@ -1046,7 +1046,7 @@ hda_set_sdctl(struct hda_softc *sc, uint32_t offset, uint32_t old) uint32_t value = hda_get_reg_by_offset(sc, offset); int err; - DPRINTF("stream_ind: 0x%x old: 0x%x value: 0x%x\n", + DPRINTF("stream_ind: 0x%x old: 0x%x value: 0x%x", stream_ind, old, value); if (value & HDAC_SDCTL_SRST) { @@ -1094,7 +1094,7 @@ hda_signal_state_change(struct hda_codec_inst *hci) assert(hci); assert(hci->hda); - DPRINTF("cad: 0x%x\n", hci->cad); + DPRINTF("cad: 0x%x", hci->cad); sc = hci->hda; sdiwake = 1 << hci->cad; @@ -1164,7 +1164,7 @@ hda_transfer(struct hda_codec_inst *hci, uint8_t stream, uint8_t dir, assert(!(count % HDA_DMA_ACCESS_LEN)); if (!stream) { - DPRINTF("Invalid stream\n"); + DPRINTF("Invalid stream"); return (-1); } @@ -1180,7 +1180,7 @@ hda_transfer(struct hda_codec_inst *hci, uint8_t stream, uint8_t dir, st = &sc->streams[stream_ind]; if (!st->run) { - DPRINTF("Stream 0x%x stopped\n", stream); + DPRINTF("Stream 0x%x stopped", stream); return (-1); } @@ -1306,7 +1306,7 @@ pci_hda_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, assert(baridx == 0); assert(size <= 4); - DPRINTF("offset: 0x%lx value: 0x%lx\n", offset, value); + DPRINTF("offset: 0x%lx value: 0x%lx", offset, value); err = hda_write(sc, offset, size, value); assert(!err); @@ -1325,7 +1325,7 @@ pci_hda_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, value = hda_read(sc, offset); - DPRINTF("offset: 0x%lx value: 0x%lx\n", offset, value); + DPRINTF("offset: 0x%lx value: 0x%lx", offset, value); return (value); } diff --git a/usr/src/cmd/bhyve/pci_hda.h b/usr/src/cmd/bhyve/pci_hda.h index 8ed050cc8f..7b6bba92e4 100644 --- a/usr/src/cmd/bhyve/pci_hda.h +++ b/usr/src/cmd/bhyve/pci_hda.h @@ -50,7 +50,7 @@ #if DEBUG_HDA == 1 extern FILE *dbg; #define DPRINTF(fmt, arg...) \ -do {fprintf(dbg, "%s-%d: " fmt, __func__, __LINE__, ##arg); \ +do {fprintf(dbg, "%s-%d: " fmt "\n", __func__, __LINE__, ##arg); \ fflush(dbg); } while (0) #else #define DPRINTF(fmt, arg...) diff --git a/usr/src/cmd/bhyve/pci_lpc.c b/usr/src/cmd/bhyve/pci_lpc.c index b7ddb772a1..50413250d3 100644 --- a/usr/src/cmd/bhyve/pci_lpc.c +++ b/usr/src/cmd/bhyve/pci_lpc.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include <vmmapi.h> #include "acpi.h" +#include "debug.h" #include "bootrom.h" #include "inout.h" #include "pci_emul.h" @@ -210,7 +211,7 @@ lpc_init(struct vmctx *ctx) int unit, error; if (romfile != NULL) { - error = bootrom_init(ctx, romfile); + error = bootrom_loadrom(ctx, romfile); if (error) return (error); } @@ -221,8 +222,8 @@ lpc_init(struct vmctx *ctx) name = lpc_uart_names[unit]; if (uart_legacy_alloc(unit, &sc->iobase, &sc->irq) != 0) { - fprintf(stderr, "Unable to allocate resources for " - "LPC device %s\n", name); + EPRINTLN("Unable to allocate resources for " + "LPC device %s", name); return (-1); } pci_irq_reserve(sc->irq); @@ -231,8 +232,8 @@ lpc_init(struct vmctx *ctx) lpc_uart_intr_deassert, sc); if (uart_set_backend(sc->uart_softc, sc->opts) != 0) { - fprintf(stderr, "Unable to initialize backend '%s' " - "for LPC device %s\n", sc->opts, name); + EPRINTLN("Unable to initialize backend '%s' " + "for LPC device %s", sc->opts, name); return (-1); } @@ -417,7 +418,7 @@ pci_lpc_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) * Do not allow more than one LPC bridge to be configured. */ if (lpc_bridge != NULL) { - fprintf(stderr, "Only one LPC bridge is allowed.\n"); + EPRINTLN("Only one LPC bridge is allowed."); return (-1); } @@ -427,7 +428,7 @@ pci_lpc_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) * all legacy i/o ports behind bus 0. */ if (pi->pi_bus != 0) { - fprintf(stderr, "LPC bridge can be present only on bus 0.\n"); + EPRINTLN("LPC bridge can be present only on bus 0."); return (-1); } diff --git a/usr/src/cmd/bhyve/pci_nvme.c b/usr/src/cmd/bhyve/pci_nvme.c index 3e6e469ed1..65d8d49b64 100644 --- a/usr/src/cmd/bhyve/pci_nvme.c +++ b/usr/src/cmd/bhyve/pci_nvme.c @@ -81,12 +81,13 @@ __FBSDID("$FreeBSD$"); #include "bhyverun.h" #include "block_if.h" +#include "debug.h" #include "pci_emul.h" static int nvme_debug = 0; -#define DPRINTF(params) if (nvme_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (nvme_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params /* defaults; can be overridden */ #define NVME_MSIX_BAR 4 @@ -142,6 +143,11 @@ enum nvme_cmd_cdw11 { NVME_CMD_CDW11_IV = 0xFFFF0000, }; +enum nvme_copy_dir { + NVME_COPY_TO_PRP, + NVME_COPY_FROM_PRP, +}; + #define NVME_CQ_INTEN 0x01 #define NVME_CQ_INTCOAL 0x02 @@ -177,11 +183,12 @@ struct pci_nvme_blockstore { uint32_t sectsz; uint32_t sectsz_bits; uint64_t eui64; + uint32_t deallocate:1; }; struct pci_nvme_ioreq { struct pci_nvme_softc *sc; - struct pci_nvme_ioreq *next; + STAILQ_ENTRY(pci_nvme_ioreq) link; struct nvme_submission_queue *nvme_sq; uint16_t sqid; @@ -206,6 +213,15 @@ struct pci_nvme_ioreq { struct iovec iovpadding[NVME_MAX_BLOCKIOVS-BLOCKIF_IOV_MAX]; }; +enum nvme_dsm_type { + /* Dataset Management bit in ONCS reflects backing storage capability */ + NVME_DATASET_MANAGEMENT_AUTO, + /* Unconditionally set Dataset Management bit in ONCS */ + NVME_DATASET_MANAGEMENT_ENABLE, + /* Unconditionally clear Dataset Management bit in ONCS */ + NVME_DATASET_MANAGEMENT_DISABLE, +}; + struct pci_nvme_softc { struct pci_devinst *nsc_pi; @@ -227,7 +243,7 @@ struct pci_nvme_softc { uint32_t num_squeues; struct pci_nvme_ioreq *ioreqs; - struct pci_nvme_ioreq *ioreqs_free; /* free list of ioreqs */ + STAILQ_HEAD(, pci_nvme_ioreq) ioreqs_free; /* free list of ioreqs */ uint32_t pending_ios; uint32_t ioslots; sem_t iosemlock; @@ -243,6 +259,8 @@ struct pci_nvme_softc { uint32_t intr_coales_aggr_time; /* 0x08: uS to delay intr */ uint32_t intr_coales_aggr_thresh; /* 0x08: compl-Q entries */ uint32_t async_ev_config; /* 0x0B: async event config */ + + enum nvme_dsm_type dataset_management; }; @@ -282,6 +300,9 @@ static void pci_nvme_io_partial(struct blockif_req *br, int err); ((NVME_STATUS_SCT_MASK << NVME_STATUS_SCT_SHIFT) |\ (NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT)) +#define NVME_ONCS_DSM (NVME_CTRLR_DATA_ONCS_DSM_MASK << \ + NVME_CTRLR_DATA_ONCS_DSM_SHIFT) + static __inline void cpywithpad(char *dst, size_t dst_size, const char *src, char pad) { @@ -360,6 +381,19 @@ pci_nvme_init_ctrldata(struct pci_nvme_softc *sc) (4 << NVME_CTRLR_DATA_CQES_MIN_SHIFT); cd->nn = 1; /* number of namespaces */ + cd->oncs = 0; + switch (sc->dataset_management) { + case NVME_DATASET_MANAGEMENT_AUTO: + if (sc->nvstore.deallocate) + cd->oncs |= NVME_ONCS_DSM; + break; + case NVME_DATASET_MANAGEMENT_ENABLE: + cd->oncs |= NVME_ONCS_DSM; + break; + default: + break; + } + cd->fna = 0x03; cd->power_state[0].mp = 10; @@ -418,20 +452,24 @@ crc16(uint16_t crc, const void *buffer, unsigned int len) static void pci_nvme_init_nsdata(struct pci_nvme_softc *sc, struct nvme_namespace_data *nd, uint32_t nsid, - uint64_t eui64) + struct pci_nvme_blockstore *nvstore) { - nd->nsze = sc->nvstore.size / sc->nvstore.sectsz; + /* Get capacity and block size information from backing store */ + nd->nsze = nvstore->size / nvstore->sectsz; nd->ncap = nd->nsze; nd->nuse = nd->nsze; - /* Get LBA and backstore information from backing store */ + if (nvstore->type == NVME_STOR_BLOCKIF) + nvstore->deallocate = blockif_candelete(nvstore->ctx); + nd->nlbaf = 0; /* NLBAF is a 0's based value (i.e. 1 LBA Format) */ nd->flbas = 0; /* Create an EUI-64 if user did not provide one */ - if (eui64 == 0) { + if (nvstore->eui64 == 0) { char *data = NULL; + uint64_t eui64 = nvstore->eui64; asprintf(&data, "%s%u%u%u", vmname, sc->nsc_pi->pi_bus, sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func); @@ -440,12 +478,12 @@ pci_nvme_init_nsdata(struct pci_nvme_softc *sc, eui64 = OUI_FREEBSD_NVME_LOW | crc16(0, data, strlen(data)); free(data); } - eui64 = (eui64 << 16) | (nsid & 0xffff); + nvstore->eui64 = (eui64 << 16) | (nsid & 0xffff); } - be64enc(nd->eui64, eui64); + be64enc(nd->eui64, nvstore->eui64); /* LBA data-sz = 2^lbads */ - nd->lbaf[0] = sc->nvstore.sectsz_bits << NVME_NS_DATA_LBAF_LBADS_SHIFT; + nd->lbaf[0] = nvstore->sectsz_bits << NVME_NS_DATA_LBAF_LBADS_SHIFT; } static void @@ -460,7 +498,7 @@ pci_nvme_init_logpages(struct pci_nvme_softc *sc) static void pci_nvme_reset_locked(struct pci_nvme_softc *sc) { - DPRINTF(("%s\r\n", __func__)); + DPRINTF(("%s", __func__)); sc->regs.cap_lo = (ZERO_BASED(sc->max_qentries) & NVME_CAP_LO_REG_MQES_MASK) | (1 << NVME_CAP_LO_REG_CQR_SHIFT) | @@ -527,14 +565,14 @@ pci_nvme_init_controller(struct vmctx *ctx, struct pci_nvme_softc *sc) { uint16_t acqs, asqs; - DPRINTF(("%s\r\n", __func__)); + DPRINTF(("%s", __func__)); asqs = (sc->regs.aqa & NVME_AQA_REG_ASQS_MASK) + 1; sc->submit_queues[0].size = asqs; sc->submit_queues[0].qbase = vm_map_gpa(ctx, sc->regs.asq, sizeof(struct nvme_command) * asqs); - DPRINTF(("%s mapping Admin-SQ guest 0x%lx, host: %p\r\n", + DPRINTF(("%s mapping Admin-SQ guest 0x%lx, host: %p", __func__, sc->regs.asq, sc->submit_queues[0].qbase)); acqs = ((sc->regs.aqa >> NVME_AQA_REG_ACQS_SHIFT) & @@ -542,15 +580,15 @@ pci_nvme_init_controller(struct vmctx *ctx, struct pci_nvme_softc *sc) sc->compl_queues[0].size = acqs; sc->compl_queues[0].qbase = vm_map_gpa(ctx, sc->regs.acq, sizeof(struct nvme_completion) * acqs); - DPRINTF(("%s mapping Admin-CQ guest 0x%lx, host: %p\r\n", + DPRINTF(("%s mapping Admin-CQ guest 0x%lx, host: %p", __func__, sc->regs.acq, sc->compl_queues[0].qbase)); } static int -nvme_prp_memcpy(struct vmctx *ctx, uint64_t prp1, uint64_t prp2, uint8_t *src, - size_t len) +nvme_prp_memcpy(struct vmctx *ctx, uint64_t prp1, uint64_t prp2, uint8_t *b, + size_t len, enum nvme_copy_dir dir) { - uint8_t *dst; + uint8_t *p; size_t bytes; if (len > (8 * 1024)) { @@ -561,14 +599,17 @@ nvme_prp_memcpy(struct vmctx *ctx, uint64_t prp1, uint64_t prp2, uint8_t *src, bytes = PAGE_SIZE - (prp1 & PAGE_MASK); bytes = MIN(bytes, len); - dst = vm_map_gpa(ctx, prp1, bytes); - if (dst == NULL) { + p = vm_map_gpa(ctx, prp1, bytes); + if (p == NULL) { return (-1); } - memcpy(dst, src, bytes); + if (dir == NVME_COPY_TO_PRP) + memcpy(p, b, bytes); + else + memcpy(b, p, bytes); - src += bytes; + b += bytes; len -= bytes; if (len == 0) { @@ -577,12 +618,15 @@ nvme_prp_memcpy(struct vmctx *ctx, uint64_t prp1, uint64_t prp2, uint8_t *src, len = MIN(len, PAGE_SIZE); - dst = vm_map_gpa(ctx, prp2, len); - if (dst == NULL) { + p = vm_map_gpa(ctx, prp2, len); + if (p == NULL) { return (-1); } - memcpy(dst, src, len); + if (dir == NVME_COPY_TO_PRP) + memcpy(p, b, len); + else + memcpy(b, p, len); return (0); } @@ -593,9 +637,9 @@ nvme_opc_delete_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, { uint16_t qid = command->cdw10 & 0xffff; - DPRINTF(("%s DELETE_IO_SQ %u\r\n", __func__, qid)); + DPRINTF(("%s DELETE_IO_SQ %u", __func__, qid)); if (qid == 0 || qid > sc->num_squeues) { - WPRINTF(("%s NOT PERMITTED queue id %u / num_squeues %u\r\n", + WPRINTF(("%s NOT PERMITTED queue id %u / num_squeues %u", __func__, qid, sc->num_squeues)); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_QUEUE_IDENTIFIER); @@ -616,7 +660,7 @@ nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_submission_queue *nsq; if ((qid == 0) || (qid > sc->num_squeues)) { - WPRINTF(("%s queue index %u > num_squeues %u\r\n", + WPRINTF(("%s queue index %u > num_squeues %u", __func__, qid, sc->num_squeues)); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, @@ -632,12 +676,12 @@ nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, nsq->cqid = (command->cdw11 >> 16) & 0xffff; nsq->qpriority = (command->cdw11 >> 1) & 0x03; - DPRINTF(("%s sq %u size %u gaddr %p cqid %u\r\n", __func__, + DPRINTF(("%s sq %u size %u gaddr %p cqid %u", __func__, qid, nsq->size, nsq->qbase, nsq->cqid)); pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); - DPRINTF(("%s completed creating IOSQ qid %u\r\n", + DPRINTF(("%s completed creating IOSQ qid %u", __func__, qid)); } else { /* @@ -645,7 +689,7 @@ nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, * This setting is unsupported by this emulation. */ WPRINTF(("%s unsupported non-contig (list-based) " - "create i/o submission queue\r\n", __func__)); + "create i/o submission queue", __func__)); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); } @@ -658,9 +702,9 @@ nvme_opc_delete_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, { uint16_t qid = command->cdw10 & 0xffff; - DPRINTF(("%s DELETE_IO_CQ %u\r\n", __func__, qid)); + DPRINTF(("%s DELETE_IO_CQ %u", __func__, qid)); if (qid == 0 || qid > sc->num_cqueues) { - WPRINTF(("%s queue index %u / num_cqueues %u\r\n", + WPRINTF(("%s queue index %u / num_cqueues %u", __func__, qid, sc->num_cqueues)); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, NVME_SC_INVALID_QUEUE_IDENTIFIER); @@ -681,7 +725,7 @@ nvme_opc_create_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion_queue *ncq; if ((qid == 0) || (qid > sc->num_cqueues)) { - WPRINTF(("%s queue index %u > num_cqueues %u\r\n", + WPRINTF(("%s queue index %u > num_cqueues %u", __func__, qid, sc->num_cqueues)); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, @@ -704,7 +748,7 @@ nvme_opc_create_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, * Non-contig completion queue unsupported. */ WPRINTF(("%s unsupported non-contig (list-based) " - "create i/o completion queue\r\n", + "create i/o completion queue", __func__)); /* 0x12 = Invalid Use of Controller Memory Buffer */ @@ -721,26 +765,29 @@ nvme_opc_get_log_page(struct pci_nvme_softc* sc, struct nvme_command* command, uint32_t logsize = (1 + ((command->cdw10 >> 16) & 0xFFF)) * 2; uint8_t logpage = command->cdw10 & 0xFF; - DPRINTF(("%s log page %u len %u\r\n", __func__, logpage, logsize)); + DPRINTF(("%s log page %u len %u", __func__, logpage, logsize)); pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS); switch (logpage) { case NVME_LOG_ERROR: nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, - command->prp2, (uint8_t *)&sc->err_log, logsize); + command->prp2, (uint8_t *)&sc->err_log, logsize, + NVME_COPY_TO_PRP); break; case NVME_LOG_HEALTH_INFORMATION: /* TODO: present some smart info */ nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, - command->prp2, (uint8_t *)&sc->health_log, logsize); + command->prp2, (uint8_t *)&sc->health_log, logsize, + NVME_COPY_TO_PRP); break; case NVME_LOG_FIRMWARE_SLOT: nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, - command->prp2, (uint8_t *)&sc->fw_log, logsize); + command->prp2, (uint8_t *)&sc->fw_log, logsize, + NVME_COPY_TO_PRP); break; default: - WPRINTF(("%s get log page %x command not supported\r\n", + WPRINTF(("%s get log page %x command not supported", __func__, logpage)); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, @@ -756,18 +803,20 @@ nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command, { void *dest; - DPRINTF(("%s identify 0x%x nsid 0x%x\r\n", __func__, + DPRINTF(("%s identify 0x%x nsid 0x%x", __func__, command->cdw10 & 0xFF, command->nsid)); switch (command->cdw10 & 0xFF) { case 0x00: /* return Identify Namespace data structure */ nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, - command->prp2, (uint8_t *)&sc->nsdata, sizeof(sc->nsdata)); + command->prp2, (uint8_t *)&sc->nsdata, sizeof(sc->nsdata), + NVME_COPY_TO_PRP); break; case 0x01: /* return Identify Controller data structure */ nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1, command->prp2, (uint8_t *)&sc->ctrldata, - sizeof(sc->ctrldata)); + sizeof(sc->ctrldata), + NVME_COPY_TO_PRP); break; case 0x02: /* list of 1024 active NSIDs > CDW1.NSID */ dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, @@ -786,7 +835,7 @@ nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command, case 0x14: case 0x15: default: - DPRINTF(("%s unsupported identify command requested 0x%x\r\n", + DPRINTF(("%s unsupported identify command requested 0x%x", __func__, command->cdw10 & 0xFF)); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (1); @@ -804,28 +853,28 @@ nvme_set_feature_queues(struct pci_nvme_softc* sc, struct nvme_command* command, nqr = command->cdw11 & 0xFFFF; if (nqr == 0xffff) { - WPRINTF(("%s: Illegal NSQR value %#x\n", __func__, nqr)); + WPRINTF(("%s: Illegal NSQR value %#x", __func__, nqr)); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (-1); } sc->num_squeues = ONE_BASED(nqr); if (sc->num_squeues > sc->max_queues) { - DPRINTF(("NSQR=%u is greater than max %u\n", sc->num_squeues, + DPRINTF(("NSQR=%u is greater than max %u", sc->num_squeues, sc->max_queues)); sc->num_squeues = sc->max_queues; } nqr = (command->cdw11 >> 16) & 0xFFFF; if (nqr == 0xffff) { - WPRINTF(("%s: Illegal NCQR value %#x\n", __func__, nqr)); + WPRINTF(("%s: Illegal NCQR value %#x", __func__, nqr)); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (-1); } sc->num_cqueues = ONE_BASED(nqr); if (sc->num_cqueues > sc->max_queues) { - DPRINTF(("NCQR=%u is greater than max %u\n", sc->num_cqueues, + DPRINTF(("NCQR=%u is greater than max %u", sc->num_cqueues, sc->max_queues)); sc->num_cqueues = sc->max_queues; } @@ -842,33 +891,33 @@ nvme_opc_set_features(struct pci_nvme_softc* sc, struct nvme_command* command, int feature = command->cdw10 & 0xFF; uint32_t iv; - DPRINTF(("%s feature 0x%x\r\n", __func__, feature)); + DPRINTF(("%s feature 0x%x", __func__, feature)); compl->cdw0 = 0; switch (feature) { case NVME_FEAT_ARBITRATION: - DPRINTF((" arbitration 0x%x\r\n", command->cdw11)); + DPRINTF((" arbitration 0x%x", command->cdw11)); break; case NVME_FEAT_POWER_MANAGEMENT: - DPRINTF((" power management 0x%x\r\n", command->cdw11)); + DPRINTF((" power management 0x%x", command->cdw11)); break; case NVME_FEAT_LBA_RANGE_TYPE: - DPRINTF((" lba range 0x%x\r\n", command->cdw11)); + DPRINTF((" lba range 0x%x", command->cdw11)); break; case NVME_FEAT_TEMPERATURE_THRESHOLD: - DPRINTF((" temperature threshold 0x%x\r\n", command->cdw11)); + DPRINTF((" temperature threshold 0x%x", command->cdw11)); break; case NVME_FEAT_ERROR_RECOVERY: - DPRINTF((" error recovery 0x%x\r\n", command->cdw11)); + DPRINTF((" error recovery 0x%x", command->cdw11)); break; case NVME_FEAT_VOLATILE_WRITE_CACHE: - DPRINTF((" volatile write cache 0x%x\r\n", command->cdw11)); + DPRINTF((" volatile write cache 0x%x", command->cdw11)); break; case NVME_FEAT_NUMBER_OF_QUEUES: nvme_set_feature_queues(sc, command, compl); break; case NVME_FEAT_INTERRUPT_COALESCING: - DPRINTF((" interrupt coalescing 0x%x\r\n", command->cdw11)); + DPRINTF((" interrupt coalescing 0x%x", command->cdw11)); /* in uS */ sc->intr_coales_aggr_time = ((command->cdw11 >> 8) & 0xFF)*100; @@ -878,7 +927,7 @@ nvme_opc_set_features(struct pci_nvme_softc* sc, struct nvme_command* command, case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION: iv = command->cdw11 & 0xFFFF; - DPRINTF((" interrupt vector configuration 0x%x\r\n", + DPRINTF((" interrupt vector configuration 0x%x", command->cdw11)); for (uint32_t i = 0; i < sc->num_cqueues + 1; i++) { @@ -893,23 +942,23 @@ nvme_opc_set_features(struct pci_nvme_softc* sc, struct nvme_command* command, } break; case NVME_FEAT_WRITE_ATOMICITY: - DPRINTF((" write atomicity 0x%x\r\n", command->cdw11)); + DPRINTF((" write atomicity 0x%x", command->cdw11)); break; case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: - DPRINTF((" async event configuration 0x%x\r\n", + DPRINTF((" async event configuration 0x%x", command->cdw11)); sc->async_ev_config = command->cdw11; break; case NVME_FEAT_SOFTWARE_PROGRESS_MARKER: - DPRINTF((" software progress marker 0x%x\r\n", + DPRINTF((" software progress marker 0x%x", command->cdw11)); break; case 0x0C: - DPRINTF((" autonomous power state transition 0x%x\r\n", + DPRINTF((" autonomous power state transition 0x%x", command->cdw11)); break; default: - WPRINTF(("%s invalid feature\r\n", __func__)); + WPRINTF(("%s invalid feature", __func__)); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (1); } @@ -924,22 +973,22 @@ nvme_opc_get_features(struct pci_nvme_softc* sc, struct nvme_command* command, { int feature = command->cdw10 & 0xFF; - DPRINTF(("%s feature 0x%x\r\n", __func__, feature)); + DPRINTF(("%s feature 0x%x", __func__, feature)); compl->cdw0 = 0; switch (feature) { case NVME_FEAT_ARBITRATION: - DPRINTF((" arbitration\r\n")); + DPRINTF((" arbitration")); break; case NVME_FEAT_POWER_MANAGEMENT: - DPRINTF((" power management\r\n")); + DPRINTF((" power management")); break; case NVME_FEAT_LBA_RANGE_TYPE: - DPRINTF((" lba range\r\n")); + DPRINTF((" lba range")); break; case NVME_FEAT_TEMPERATURE_THRESHOLD: - DPRINTF((" temperature threshold\r\n")); + DPRINTF((" temperature threshold")); switch ((command->cdw11 >> 20) & 0x3) { case 0: /* Over temp threshold */ @@ -950,47 +999,47 @@ nvme_opc_get_features(struct pci_nvme_softc* sc, struct nvme_command* command, compl->cdw0 = 0; break; default: - WPRINTF((" invalid threshold type select\r\n")); + WPRINTF((" invalid threshold type select")); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (1); } break; case NVME_FEAT_ERROR_RECOVERY: - DPRINTF((" error recovery\r\n")); + DPRINTF((" error recovery")); break; case NVME_FEAT_VOLATILE_WRITE_CACHE: - DPRINTF((" volatile write cache\r\n")); + DPRINTF((" volatile write cache")); break; case NVME_FEAT_NUMBER_OF_QUEUES: compl->cdw0 = NVME_FEATURE_NUM_QUEUES(sc); - DPRINTF((" number of queues (submit %u, completion %u)\r\n", + DPRINTF((" number of queues (submit %u, completion %u)", compl->cdw0 & 0xFFFF, (compl->cdw0 >> 16) & 0xFFFF)); break; case NVME_FEAT_INTERRUPT_COALESCING: - DPRINTF((" interrupt coalescing\r\n")); + DPRINTF((" interrupt coalescing")); break; case NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION: - DPRINTF((" interrupt vector configuration\r\n")); + DPRINTF((" interrupt vector configuration")); break; case NVME_FEAT_WRITE_ATOMICITY: - DPRINTF((" write atomicity\r\n")); + DPRINTF((" write atomicity")); break; case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: - DPRINTF((" async event configuration\r\n")); + DPRINTF((" async event configuration")); sc->async_ev_config = command->cdw11; break; case NVME_FEAT_SOFTWARE_PROGRESS_MARKER: - DPRINTF((" software progress marker\r\n")); + DPRINTF((" software progress marker")); break; case 0x0C: - DPRINTF((" autonomous power state transition\r\n")); + DPRINTF((" autonomous power state transition")); break; default: - WPRINTF(("%s invalid feature 0x%x\r\n", __func__, feature)); + WPRINTF(("%s invalid feature 0x%x", __func__, feature)); pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); return (1); } @@ -1003,7 +1052,7 @@ static int nvme_opc_abort(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { - DPRINTF(("%s submission queue %u, command ID 0x%x\r\n", __func__, + DPRINTF(("%s submission queue %u, command ID 0x%x", __func__, command->cdw10 & 0xFFFF, (command->cdw10 >> 16) & 0xFFFF)); /* TODO: search for the command ID and abort it */ @@ -1018,7 +1067,7 @@ static int nvme_opc_async_event_req(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { - DPRINTF(("%s async event request 0x%x\r\n", __func__, command->cdw11)); + DPRINTF(("%s async event request 0x%x", __func__, command->cdw11)); /* * TODO: raise events when they happen based on the Set Features cmd. @@ -1040,22 +1089,22 @@ pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, uint64_t value) struct nvme_command *cmd; struct nvme_submission_queue *sq; struct nvme_completion_queue *cq; - int do_intr = 0; uint16_t sqhead; - DPRINTF(("%s index %u\r\n", __func__, (uint32_t)value)); + DPRINTF(("%s index %u", __func__, (uint32_t)value)); sq = &sc->submit_queues[0]; + cq = &sc->compl_queues[0]; sqhead = atomic_load_acq_short(&sq->head); if (atomic_testandset_int(&sq->busy, 1)) { - DPRINTF(("%s SQ busy, head %u, tail %u\r\n", + DPRINTF(("%s SQ busy, head %u, tail %u", __func__, sqhead, sq->tail)); return; } - DPRINTF(("sqhead %u, tail %u\r\n", sqhead, sq->tail)); + DPRINTF(("sqhead %u, tail %u", sqhead, sq->tail)); while (sqhead != atomic_load_acq_short(&sq->tail)) { cmd = &(sq->qbase)[sqhead]; @@ -1064,61 +1113,59 @@ pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, uint64_t value) switch (cmd->opc) { case NVME_OPC_DELETE_IO_SQ: - DPRINTF(("%s command DELETE_IO_SQ\r\n", __func__)); - do_intr |= nvme_opc_delete_io_sq(sc, cmd, &compl); + DPRINTF(("%s command DELETE_IO_SQ", __func__)); + nvme_opc_delete_io_sq(sc, cmd, &compl); break; case NVME_OPC_CREATE_IO_SQ: - DPRINTF(("%s command CREATE_IO_SQ\r\n", __func__)); - do_intr |= nvme_opc_create_io_sq(sc, cmd, &compl); + DPRINTF(("%s command CREATE_IO_SQ", __func__)); + nvme_opc_create_io_sq(sc, cmd, &compl); break; case NVME_OPC_DELETE_IO_CQ: - DPRINTF(("%s command DELETE_IO_CQ\r\n", __func__)); - do_intr |= nvme_opc_delete_io_cq(sc, cmd, &compl); + DPRINTF(("%s command DELETE_IO_CQ", __func__)); + nvme_opc_delete_io_cq(sc, cmd, &compl); break; case NVME_OPC_CREATE_IO_CQ: - DPRINTF(("%s command CREATE_IO_CQ\r\n", __func__)); - do_intr |= nvme_opc_create_io_cq(sc, cmd, &compl); + DPRINTF(("%s command CREATE_IO_CQ", __func__)); + nvme_opc_create_io_cq(sc, cmd, &compl); break; case NVME_OPC_GET_LOG_PAGE: - DPRINTF(("%s command GET_LOG_PAGE\r\n", __func__)); - do_intr |= nvme_opc_get_log_page(sc, cmd, &compl); + DPRINTF(("%s command GET_LOG_PAGE", __func__)); + nvme_opc_get_log_page(sc, cmd, &compl); break; case NVME_OPC_IDENTIFY: - DPRINTF(("%s command IDENTIFY\r\n", __func__)); - do_intr |= nvme_opc_identify(sc, cmd, &compl); + DPRINTF(("%s command IDENTIFY", __func__)); + nvme_opc_identify(sc, cmd, &compl); break; case NVME_OPC_ABORT: - DPRINTF(("%s command ABORT\r\n", __func__)); - do_intr |= nvme_opc_abort(sc, cmd, &compl); + DPRINTF(("%s command ABORT", __func__)); + nvme_opc_abort(sc, cmd, &compl); break; case NVME_OPC_SET_FEATURES: - DPRINTF(("%s command SET_FEATURES\r\n", __func__)); - do_intr |= nvme_opc_set_features(sc, cmd, &compl); + DPRINTF(("%s command SET_FEATURES", __func__)); + nvme_opc_set_features(sc, cmd, &compl); break; case NVME_OPC_GET_FEATURES: - DPRINTF(("%s command GET_FEATURES\r\n", __func__)); - do_intr |= nvme_opc_get_features(sc, cmd, &compl); + DPRINTF(("%s command GET_FEATURES", __func__)); + nvme_opc_get_features(sc, cmd, &compl); break; case NVME_OPC_ASYNC_EVENT_REQUEST: - DPRINTF(("%s command ASYNC_EVENT_REQ\r\n", __func__)); + DPRINTF(("%s command ASYNC_EVENT_REQ", __func__)); /* XXX dont care, unhandled for now - do_intr |= nvme_opc_async_event_req(sc, cmd, &compl); + nvme_opc_async_event_req(sc, cmd, &compl); */ compl.status = NVME_NO_STATUS; break; default: - WPRINTF(("0x%x command is not implemented\r\n", + WPRINTF(("0x%x command is not implemented", cmd->opc)); pci_nvme_status_genc(&compl.status, NVME_SC_INVALID_OPCODE); - do_intr |= 1; } - + sqhead = (sqhead + 1) % sq->size; + if (NVME_COMPLETION_VALID(compl)) { struct nvme_completion *cp; int phase; - cq = &sc->compl_queues[0]; - cp = &(cq->qbase)[cq->tail]; cp->cdw0 = compl.cdw0; cp->sqid = 0; @@ -1131,14 +1178,13 @@ pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, uint64_t value) cq->tail = (cq->tail + 1) % cq->size; } - sqhead = (sqhead + 1) % sq->size; } - DPRINTF(("setting sqhead %u\r\n", sqhead)); + DPRINTF(("setting sqhead %u", sqhead)); atomic_store_short(&sq->head, sqhead); atomic_store_int(&sq->busy, 0); - if (do_intr) + if (cq->head != cq->tail) pci_generate_msix(sc->nsc_pi, 0); } @@ -1169,7 +1215,7 @@ pci_nvme_append_iov_req(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req, if (iovidx == NVME_MAX_BLOCKIOVS) { int err = 0; - DPRINTF(("large I/O, doing partial req\r\n")); + DPRINTF(("large I/O, doing partial req")); iovidx = 0; req->io_req.br_iovcnt = 0; @@ -1213,7 +1259,7 @@ pci_nvme_append_iov_req(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req, void *gptr; if ((lba + size) > sc->nvstore.size) { - WPRINTF(("%s write would overflow RAM\r\n", __func__)); + WPRINTF(("%s write would overflow RAM", __func__)); return (-1); } @@ -1234,10 +1280,9 @@ pci_nvme_set_completion(struct pci_nvme_softc *sc, { struct nvme_completion_queue *cq = &sc->compl_queues[sq->cqid]; struct nvme_completion *compl; - int do_intr = 0; int phase; - DPRINTF(("%s sqid %d cqid %u cid %u status: 0x%x 0x%x\r\n", + DPRINTF(("%s sqid %d cqid %u cid %u status: 0x%x 0x%x", __func__, sqid, sq->cqid, cid, NVME_STATUS_GET_SCT(status), NVME_STATUS_GET_SC(status))); @@ -1247,8 +1292,9 @@ pci_nvme_set_completion(struct pci_nvme_softc *sc, compl = &cq->qbase[cq->tail]; - compl->sqhd = atomic_load_acq_short(&sq->head); + compl->cdw0 = cdw0; compl->sqid = sqid; + compl->sqhd = atomic_load_acq_short(&sq->head); compl->cid = cid; // toggle phase @@ -1258,14 +1304,16 @@ pci_nvme_set_completion(struct pci_nvme_softc *sc, cq->tail = (cq->tail + 1) % cq->size; - if (cq->intr_en & NVME_CQ_INTEN) - do_intr = 1; - pthread_mutex_unlock(&cq->mtx); - if (ignore_busy || !atomic_load_acq_int(&sq->busy)) - if (do_intr) + if (cq->head != cq->tail) { + if (cq->intr_en & NVME_CQ_INTEN) { pci_generate_msix(sc->nsc_pi, cq->intr_vec); + } else { + DPRINTF(("%s: CQ%u interrupt disabled\n", + __func__, sq->cqid)); + } + } } static void @@ -1277,8 +1325,7 @@ pci_nvme_release_ioreq(struct pci_nvme_softc *sc, struct pci_nvme_ioreq *req) pthread_mutex_lock(&sc->mtx); - req->next = sc->ioreqs_free; - sc->ioreqs_free = req; + STAILQ_INSERT_TAIL(&sc->ioreqs_free, req, link); sc->pending_ios--; /* when no more IO pending, can set to ready if device reset/enabled */ @@ -1299,12 +1346,10 @@ pci_nvme_get_ioreq(struct pci_nvme_softc *sc) sem_wait(&sc->iosemlock); pthread_mutex_lock(&sc->mtx); - req = sc->ioreqs_free; + req = STAILQ_FIRST(&sc->ioreqs_free); assert(req != NULL); + STAILQ_REMOVE_HEAD(&sc->ioreqs_free, link); - sc->ioreqs_free = req->next; - - req->next = NULL; req->sc = sc; sc->pending_ios++; @@ -1328,8 +1373,8 @@ pci_nvme_io_done(struct blockif_req *br, int err) struct nvme_submission_queue *sq = req->nvme_sq; uint16_t code, status = 0; - DPRINTF(("%s error %d %s\r\n", __func__, err, strerror(err))); - + DPRINTF(("%s error %d %s", __func__, err, strerror(err))); + /* TODO return correct error */ code = err ? NVME_SC_DATA_TRANSFER_ERROR : NVME_SC_SUCCESS; pci_nvme_status_genc(&status, code); @@ -1343,11 +1388,136 @@ pci_nvme_io_partial(struct blockif_req *br, int err) { struct pci_nvme_ioreq *req = br->br_param; - DPRINTF(("%s error %d %s\r\n", __func__, err, strerror(err))); + DPRINTF(("%s error %d %s", __func__, err, strerror(err))); pthread_cond_signal(&req->cv); } +static void +pci_nvme_dealloc_sm(struct blockif_req *br, int err) +{ + struct pci_nvme_ioreq *req = br->br_param; + struct pci_nvme_softc *sc = req->sc; + bool done = true; +#ifdef __FreeBSD__ + uint16_t status; +#else + uint16_t status = 0; +#endif + + if (err) { + pci_nvme_status_genc(&status, NVME_SC_INTERNAL_DEVICE_ERROR); + } else if ((req->prev_gpaddr + 1) == (req->prev_size)) { + pci_nvme_status_genc(&status, NVME_SC_SUCCESS); + } else { + struct iovec *iov = req->io_req.br_iov; + + req->prev_gpaddr++; + iov += req->prev_gpaddr; + + /* The iov_* values already include the sector size */ + req->io_req.br_offset = (off_t)iov->iov_base; + req->io_req.br_resid = iov->iov_len; + if (blockif_delete(sc->nvstore.ctx, &req->io_req)) { + pci_nvme_status_genc(&status, + NVME_SC_INTERNAL_DEVICE_ERROR); + } else + done = false; + } + + if (done) { + pci_nvme_set_completion(sc, req->nvme_sq, req->sqid, + req->cid, 0, status, 0); + pci_nvme_release_ioreq(sc, req); + } +} + +static int +nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc, + struct nvme_command *cmd, + struct pci_nvme_blockstore *nvstore, + struct pci_nvme_ioreq *req, + uint16_t *status) +{ + int err = -1; + + if ((sc->ctrldata.oncs & NVME_ONCS_DSM) == 0) { + pci_nvme_status_genc(status, NVME_SC_INVALID_OPCODE); + goto out; + } + + if (cmd->cdw11 & NVME_DSM_ATTR_DEALLOCATE) { + struct nvme_dsm_range *range; + uint32_t nr, r; + int sectsz = sc->nvstore.sectsz; + + /* + * DSM calls are advisory only, and compliant controllers + * may choose to take no actions (i.e. return Success). + */ + if (!nvstore->deallocate) { + pci_nvme_status_genc(status, NVME_SC_SUCCESS); + goto out; + } + + if (req == NULL) { + pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); + goto out; + } + + /* copy locally because a range entry could straddle PRPs */ + range = calloc(1, NVME_MAX_DSM_TRIM); + if (range == NULL) { + pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); + goto out; + } + nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2, + (uint8_t *)range, NVME_MAX_DSM_TRIM, NVME_COPY_FROM_PRP); + + req->opc = cmd->opc; + req->cid = cmd->cid; + req->nsid = cmd->nsid; + /* + * If the request is for more than a single range, store + * the ranges in the br_iov. Optimize for the common case + * of a single range. + * + * Note that NVMe Number of Ranges is a zero based value + */ + nr = cmd->cdw10 & 0xff; + + req->io_req.br_iovcnt = 0; + req->io_req.br_offset = range[0].starting_lba * sectsz; + req->io_req.br_resid = range[0].length * sectsz; + + if (nr == 0) { + req->io_req.br_callback = pci_nvme_io_done; + } else { + struct iovec *iov = req->io_req.br_iov; + + for (r = 0; r <= nr; r++) { + iov[r].iov_base = (void *)(range[r].starting_lba * sectsz); + iov[r].iov_len = range[r].length * sectsz; + } + req->io_req.br_callback = pci_nvme_dealloc_sm; + + /* + * Use prev_gpaddr to track the current entry and + * prev_size to track the number of entries + */ + req->prev_gpaddr = 0; + req->prev_size = r; + } + + err = blockif_delete(nvstore->ctx, &req->io_req); + if (err) + pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR); + + free(range); + } +out: + return (err); +} static void pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx) @@ -1361,13 +1531,13 @@ pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx) sq = &sc->submit_queues[idx]; if (atomic_testandset_int(&sq->busy, 1)) { - DPRINTF(("%s sqid %u busy\r\n", __func__, idx)); + DPRINTF(("%s sqid %u busy", __func__, idx)); return; } sqhead = atomic_load_acq_short(&sq->head); - DPRINTF(("nvme_handle_io qid %u head %u tail %u cmdlist %p\r\n", + DPRINTF(("nvme_handle_io qid %u head %u tail %u cmdlist %p", idx, sqhead, sq->tail, sq->qbase)); while (sqhead != atomic_load_acq_short(&sq->tail)) { @@ -1391,7 +1561,7 @@ pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx) continue; } else if (cmd->opc == 0x08) { /* TODO: write zeroes */ - WPRINTF(("%s write zeroes lba 0x%lx blocks %u\r\n", + WPRINTF(("%s write zeroes lba 0x%lx blocks %u", __func__, lba, cmd->cdw12 & 0xFFFF)); pci_nvme_status_genc(&status, NVME_SC_SUCCESS); pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0, @@ -1400,23 +1570,34 @@ pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx) continue; } - nblocks = (cmd->cdw12 & 0xFFFF) + 1; - - bytes = nblocks * sc->nvstore.sectsz; - if (sc->nvstore.type == NVME_STOR_BLOCKIF) { req = pci_nvme_get_ioreq(sc); req->nvme_sq = sq; req->sqid = idx; } + if (cmd->opc == NVME_OPC_DATASET_MANAGEMENT) { + if (nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore, req, + &status)) { + pci_nvme_set_completion(sc, sq, idx, cmd->cid, + 0, status, 1); + if (req) + pci_nvme_release_ioreq(sc, req); + } + continue; + } + + nblocks = (cmd->cdw12 & 0xFFFF) + 1; + + bytes = nblocks * sc->nvstore.sectsz; + /* * If data starts mid-page and flows into the next page, then * increase page count */ DPRINTF(("[h%u:t%u:n%u] %s starting LBA 0x%lx blocks %lu " - "(%lu-bytes)\r\n", + "(%lu-bytes)", sqhead==0 ? sq->size-1 : sqhead-1, sq->tail, sq->size, cmd->opc == NVME_OPC_WRITE ? "WRITE" : "READ", @@ -1425,7 +1606,7 @@ pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx) cmd->prp1 &= ~(0x03UL); cmd->prp2 &= ~(0x03UL); - DPRINTF((" prp1 0x%lx prp2 0x%lx\r\n", cmd->prp1, cmd->prp2)); + DPRINTF((" prp1 0x%lx prp2 0x%lx", cmd->prp1, cmd->prp2)); size = bytes; lba *= sc->nvstore.sectsz; @@ -1483,7 +1664,7 @@ pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx) i = 0; } if (prp_list[i] == 0) { - WPRINTF(("PRP2[%d] = 0 !!!\r\n", i)); + WPRINTF(("PRP2[%d] = 0 !!!", i)); err = 1; break; } @@ -1529,7 +1710,7 @@ iodone: err = blockif_write(sc->nvstore.ctx, &req->io_req); break; default: - WPRINTF(("%s unhandled io command 0x%x\r\n", + WPRINTF(("%s unhandled io command 0x%x", __func__, cmd->opc)); err = 1; } @@ -1555,7 +1736,7 @@ static void pci_nvme_handle_doorbell(struct vmctx *ctx, struct pci_nvme_softc* sc, uint64_t idx, int is_sq, uint64_t value) { - DPRINTF(("nvme doorbell %lu, %s, val 0x%lx\r\n", + DPRINTF(("nvme doorbell %lu, %s, val 0x%lx", idx, is_sq ? "SQ" : "CQ", value & 0xFFFF)); if (is_sq) { @@ -1568,7 +1749,7 @@ pci_nvme_handle_doorbell(struct vmctx *ctx, struct pci_nvme_softc* sc, /* submission queue; handle new entries in SQ */ if (idx > sc->num_squeues) { WPRINTF(("%s SQ index %lu overflow from " - "guest (max %u)\r\n", + "guest (max %u)", __func__, idx, sc->num_squeues)); return; } @@ -1577,7 +1758,7 @@ pci_nvme_handle_doorbell(struct vmctx *ctx, struct pci_nvme_softc* sc, } else { if (idx > sc->num_cqueues) { WPRINTF(("%s queue index %lu overflow from " - "guest (max %u)\r\n", + "guest (max %u)", __func__, idx, sc->num_cqueues)); return; } @@ -1593,46 +1774,46 @@ pci_nvme_bar0_reg_dumps(const char *func, uint64_t offset, int iswrite) switch (offset) { case NVME_CR_CAP_LOW: - DPRINTF(("%s %s NVME_CR_CAP_LOW\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_CAP_LOW", func, s)); break; case NVME_CR_CAP_HI: - DPRINTF(("%s %s NVME_CR_CAP_HI\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_CAP_HI", func, s)); break; case NVME_CR_VS: - DPRINTF(("%s %s NVME_CR_VS\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_VS", func, s)); break; case NVME_CR_INTMS: - DPRINTF(("%s %s NVME_CR_INTMS\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_INTMS", func, s)); break; case NVME_CR_INTMC: - DPRINTF(("%s %s NVME_CR_INTMC\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_INTMC", func, s)); break; case NVME_CR_CC: - DPRINTF(("%s %s NVME_CR_CC\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_CC", func, s)); break; case NVME_CR_CSTS: - DPRINTF(("%s %s NVME_CR_CSTS\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_CSTS", func, s)); break; case NVME_CR_NSSR: - DPRINTF(("%s %s NVME_CR_NSSR\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_NSSR", func, s)); break; case NVME_CR_AQA: - DPRINTF(("%s %s NVME_CR_AQA\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_AQA", func, s)); break; case NVME_CR_ASQ_LOW: - DPRINTF(("%s %s NVME_CR_ASQ_LOW\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_ASQ_LOW", func, s)); break; case NVME_CR_ASQ_HI: - DPRINTF(("%s %s NVME_CR_ASQ_HI\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_ASQ_HI", func, s)); break; case NVME_CR_ACQ_LOW: - DPRINTF(("%s %s NVME_CR_ACQ_LOW\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_ACQ_LOW", func, s)); break; case NVME_CR_ACQ_HI: - DPRINTF(("%s %s NVME_CR_ACQ_HI\r\n", func, s)); + DPRINTF(("%s %s NVME_CR_ACQ_HI", func, s)); break; default: - DPRINTF(("unknown nvme bar-0 offset 0x%lx\r\n", offset)); + DPRINTF(("unknown nvme bar-0 offset 0x%lx", offset)); } } @@ -1659,7 +1840,7 @@ pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc, return; } - DPRINTF(("nvme-write offset 0x%lx, size %d, value 0x%lx\r\n", + DPRINTF(("nvme-write offset 0x%lx, size %d, value 0x%lx", offset, size, value)); if (size != 4) { @@ -1692,7 +1873,7 @@ pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc, ccreg = (uint32_t)value; DPRINTF(("%s NVME_CR_CC en %x css %x shn %x iosqes %u " - "iocqes %u\r\n", + "iocqes %u", __func__, NVME_CC_GET_EN(ccreg), NVME_CC_GET_CSS(ccreg), NVME_CC_GET_SHN(ccreg), NVME_CC_GET_IOSQES(ccreg), @@ -1750,7 +1931,7 @@ pci_nvme_write_bar_0(struct vmctx *ctx, struct pci_nvme_softc* sc, (value << 32); break; default: - DPRINTF(("%s unknown offset 0x%lx, value 0x%lx size %d\r\n", + DPRINTF(("%s unknown offset 0x%lx, value 0x%lx size %d", __func__, offset, value, size)); } pthread_mutex_unlock(&sc->mtx); @@ -1765,7 +1946,7 @@ pci_nvme_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, if (baridx == pci_msix_table_bar(pi) || baridx == pci_msix_pba_bar(pi)) { DPRINTF(("nvme-write baridx %d, msix: off 0x%lx, size %d, " - " value 0x%lx\r\n", baridx, offset, size, value)); + " value 0x%lx", baridx, offset, size, value)); pci_emul_msix_twrite(pi, offset, size, value); return; @@ -1777,7 +1958,7 @@ pci_nvme_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, break; default: - DPRINTF(("%s unknown baridx %d, val 0x%lx\r\n", + DPRINTF(("%s unknown baridx %d, val 0x%lx", __func__, baridx, value)); } } @@ -1796,7 +1977,7 @@ static uint64_t pci_nvme_read_bar_0(struct pci_nvme_softc* sc, pthread_mutex_unlock(&sc->mtx); } else { value = 0; - WPRINTF(("pci_nvme: read invalid offset %ld\r\n", offset)); + WPRINTF(("pci_nvme: read invalid offset %ld", offset)); } switch (size) { @@ -1811,7 +1992,7 @@ static uint64_t pci_nvme_read_bar_0(struct pci_nvme_softc* sc, break; } - DPRINTF((" nvme-read offset 0x%lx, size %d -> value 0x%x\r\n", + DPRINTF((" nvme-read offset 0x%lx, size %d -> value 0x%x", offset, size, (uint32_t)value)); return (value); @@ -1827,7 +2008,7 @@ pci_nvme_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, if (baridx == pci_msix_table_bar(pi) || baridx == pci_msix_pba_bar(pi)) { - DPRINTF(("nvme-read bar: %d, msix: regoff 0x%lx, size %d\r\n", + DPRINTF(("nvme-read bar: %d, msix: regoff 0x%lx, size %d", baridx, offset, size)); return pci_emul_msix_tread(pi, offset, size); @@ -1838,7 +2019,7 @@ pci_nvme_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, return pci_nvme_read_bar_0(sc, offset, size); default: - DPRINTF(("unknown bar %d, 0x%lx\r\n", baridx, offset)); + DPRINTF(("unknown bar %d, 0x%lx", baridx, offset)); } return (0); @@ -1858,6 +2039,7 @@ pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *opts) sc->ioslots = NVME_IOSLOTS; sc->num_squeues = sc->max_queues; sc->num_cqueues = sc->max_queues; + sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO; sectsz = 0; uopt = strdup(opts); @@ -1902,6 +2084,13 @@ pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *opts) } } else if (!strcmp("eui64", xopts)) { sc->nvstore.eui64 = htobe64(strtoull(config, NULL, 0)); + } else if (!strcmp("dsm", xopts)) { + if (!strcmp("auto", config)) + sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO; + else if (!strcmp("enable", config)) + sc->dataset_management = NVME_DATASET_MANAGEMENT_ENABLE; + else if (!strcmp("disable", config)) + sc->dataset_management = NVME_DATASET_MANAGEMENT_DISABLE; } else if (optidx == 0) { snprintf(bident, sizeof(bident), "%d:%d", sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func); @@ -1914,7 +2103,7 @@ pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *opts) sc->nvstore.type = NVME_STOR_BLOCKIF; sc->nvstore.size = blockif_size(sc->nvstore.ctx); } else { - fprintf(stderr, "Invalid option %s\n", xopts); + EPRINTLN("Invalid option %s", xopts); free(uopt); return (-1); } @@ -1924,7 +2113,7 @@ pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *opts) free(uopt); if (sc->nvstore.ctx == NULL || sc->nvstore.size == 0) { - fprintf(stderr, "backing store not specified\n"); + EPRINTLN("backing store not specified"); return (-1); } if (sectsz == 512 || sectsz == 4096 || sectsz == 8192) @@ -1939,11 +2128,11 @@ pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *opts) sc->max_queues = NVME_QUEUES; if (sc->max_qentries <= 0) { - fprintf(stderr, "Invalid qsz option\n"); + EPRINTLN("Invalid qsz option"); return (-1); } if (sc->ioslots <= 0) { - fprintf(stderr, "Invalid ioslots option\n"); + EPRINTLN("Invalid ioslots option"); return (-1); } @@ -1969,14 +2158,13 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) else error = 0; + STAILQ_INIT(&sc->ioreqs_free); sc->ioreqs = calloc(sc->ioslots, sizeof(struct pci_nvme_ioreq)); for (int i = 0; i < sc->ioslots; i++) { - if (i < (sc->ioslots-1)) - sc->ioreqs[i].next = &sc->ioreqs[i+1]; + STAILQ_INSERT_TAIL(&sc->ioreqs_free, &sc->ioreqs[i], link); pthread_mutex_init(&sc->ioreqs[i].mtx, NULL); pthread_cond_init(&sc->ioreqs[i].cv, NULL); } - sc->ioreqs_free = sc->ioreqs; sc->intr_coales_aggr_thresh = 1; pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0A0A); @@ -1997,23 +2185,23 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 2 * sizeof(uint32_t) * (sc->max_queues + 1); pci_membar_sz = MAX(pci_membar_sz, NVME_MMIO_SPACE_MIN); - DPRINTF(("nvme membar size: %u\r\n", pci_membar_sz)); + DPRINTF(("nvme membar size: %u", pci_membar_sz)); error = pci_emul_alloc_bar(pi, 0, PCIBAR_MEM64, pci_membar_sz); if (error) { - WPRINTF(("%s pci alloc mem bar failed\r\n", __func__)); + WPRINTF(("%s pci alloc mem bar failed", __func__)); goto done; } error = pci_emul_add_msixcap(pi, sc->max_queues + 1, NVME_MSIX_BAR); if (error) { - WPRINTF(("%s pci add msixcap failed\r\n", __func__)); + WPRINTF(("%s pci add msixcap failed", __func__)); goto done; } error = pci_emul_add_pciecap(pi, PCIEM_TYPE_ROOT_INT_EP); if (error) { - WPRINTF(("%s pci add Express capability failed\r\n", __func__)); + WPRINTF(("%s pci add Express capability failed", __func__)); goto done; } @@ -2021,8 +2209,12 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) sem_init(&sc->iosemlock, 0, sc->ioslots); pci_nvme_reset(sc); + /* + * Controller data depends on Namespace data so initialize Namespace + * data first. + */ + pci_nvme_init_nsdata(sc, &sc->nsdata, 1, &sc->nvstore); pci_nvme_init_ctrldata(sc); - pci_nvme_init_nsdata(sc, &sc->nsdata, 1, sc->nvstore.eui64); pci_nvme_init_logpages(sc); pci_lintr_request(pi); diff --git a/usr/src/cmd/bhyve/pci_passthru.c b/usr/src/cmd/bhyve/pci_passthru.c index 7dff426253..664d07b731 100644 --- a/usr/src/cmd/bhyve/pci_passthru.c +++ b/usr/src/cmd/bhyve/pci_passthru.c @@ -825,8 +825,8 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, * MSI capability is emulated */ if (msicap_access(sc, coff)) { - msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val); - + pci_emul_capwrite(pi, coff, bytes, val, sc->psc_msi.capoff, + PCIY_MSI); error = vm_setup_pptdev_msi(ctx, vcpu, sc->pptfd, pi->pi_msi.addr, pi->pi_msi.msg_data, pi->pi_msi.maxmsgnum); if (error != 0) @@ -835,7 +835,8 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, } if (msixcap_access(sc, coff)) { - msixcap_cfgwrite(pi, sc->psc_msix.capoff, coff, bytes, val); + pci_emul_capwrite(pi, coff, bytes, val, sc->psc_msix.capoff, + PCIY_MSIX); if (pi->pi_msix.enabled) { msix_table_entries = pi->pi_msix.table_count; for (i = 0; i < msix_table_entries; i++) { diff --git a/usr/src/cmd/bhyve/pci_uart.c b/usr/src/cmd/bhyve/pci_uart.c index 093d0cb361..2e8177bafb 100644 --- a/usr/src/cmd/bhyve/pci_uart.c +++ b/usr/src/cmd/bhyve/pci_uart.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include <stdio.h> #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "uart_emul.h" @@ -104,8 +105,8 @@ pci_uart_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) pi->pi_arg = sc; if (uart_set_backend(sc, opts) != 0) { - fprintf(stderr, "Unable to initialize backend '%s' for " - "pci uart at %d:%d\n", opts, pi->pi_slot, pi->pi_func); + EPRINTLN("Unable to initialize backend '%s' for " + "pci uart at %d:%d", opts, pi->pi_slot, pi->pi_func); return (-1); } diff --git a/usr/src/cmd/bhyve/pci_virtio_block.c b/usr/src/cmd/bhyve/pci_virtio_block.c index 406a232710..a34bd864be 100644 --- a/usr/src/cmd/bhyve/pci_virtio_block.c +++ b/usr/src/cmd/bhyve/pci_virtio_block.c @@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include <md5.h> #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "virtio.h" #include "block_if.h" @@ -91,6 +92,7 @@ _Static_assert(VTBLK_RINGSZ <= BLOCKIF_RING_MAX, "Each ring entry must be able t #define VTBLK_F_WCE (1 << 9) /* Legacy alias for FLUSH */ #define VTBLK_F_TOPOLOGY (1 << 10) /* Topology information is available */ #define VTBLK_F_CONFIG_WCE (1 << 11) /* Writeback mode available in config */ +#define VTBLK_F_MQ (1 << 12) /* Multi-Queue */ #define VTBLK_F_DISCARD (1 << 13) /* Trim blocks */ #define VTBLK_F_WRITE_ZEROES (1 << 14) /* Write zeros */ @@ -136,7 +138,8 @@ struct vtblk_config { uint32_t opt_io_size; } vbc_topology; uint8_t vbc_writeback; - uint8_t unused0[3]; + uint8_t unused0[1]; + uint16_t num_queues; uint32_t max_discard_sectors; uint32_t max_discard_seg; uint32_t discard_sector_alignment; @@ -170,8 +173,8 @@ struct virtio_blk_hdr { * Debug printf */ static int pci_vtblk_debug; -#define DPRINTF(params) if (pci_vtblk_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (pci_vtblk_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params struct pci_vtblk_ioreq { struct blockif_req io_req; @@ -235,7 +238,7 @@ pci_vtblk_reset(void *vsc) { struct pci_vtblk_softc *sc = vsc; - DPRINTF(("vtblk: device reset requested !\n")); + DPRINTF(("vtblk: device reset requested !")); vi_reset_dev(&sc->vbsc_vs); #ifndef __FreeBSD__ /* Disable write cache until FLUSH feature is negotiated */ @@ -333,7 +336,7 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) } io->io_req.br_resid = iolen; - DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld\n\r", + DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld", writeop ? "write/discard" : "read/ident", iolen, i - 1, io->io_req.br_offset)); @@ -424,7 +427,7 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) int i, sectsz, sts, sto; if (opts == NULL) { - printf("virtio-block: backing device required\n"); + WPRINTF(("virtio-block: backing device required")); return (1); } @@ -533,7 +536,7 @@ static int pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value) { - DPRINTF(("vtblk: write to readonly reg %d\n\r", offset)); + DPRINTF(("vtblk: write to readonly reg %d", offset)); return (1); } diff --git a/usr/src/cmd/bhyve/pci_virtio_console.c b/usr/src/cmd/bhyve/pci_virtio_console.c index f7038ff40f..5799b20f6a 100644 --- a/usr/src/cmd/bhyve/pci_virtio_console.c +++ b/usr/src/cmd/bhyve/pci_virtio_console.c @@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include <sysexits.h> #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "virtio.h" #include "mevent.h" @@ -89,8 +90,8 @@ __FBSDID("$FreeBSD$"); (VTCON_F_SIZE | VTCON_F_MULTIPORT | VTCON_F_EMERG_WRITE) static int pci_vtcon_debug; -#define DPRINTF(params) if (pci_vtcon_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (pci_vtcon_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params struct pci_vtcon_softc; struct pci_vtcon_port; @@ -191,7 +192,7 @@ pci_vtcon_reset(void *vsc) sc = vsc; - DPRINTF(("vtcon: device reset requested!\n")); + DPRINTF(("vtcon: device reset requested!")); vi_reset_dev(&sc->vsc_vs); } @@ -442,7 +443,7 @@ pci_vtcon_sock_rx(int fd __unused, enum ev_type t __unused, void *arg) len = readv(sock->vss_conn_fd, &iov, n); if (len == 0 || (len < 0 && errno == EWOULDBLOCK)) { - vq_retchain(vq); + vq_retchains(vq, 1); vq_endchains(vq, 0); if (len == 0) goto close; @@ -521,7 +522,7 @@ pci_vtcon_control_tx(struct pci_vtcon_port *port, void *arg, struct iovec *iov, case VTCON_PORT_READY: if (ctrl->id >= sc->vsc_nports) { - WPRINTF(("VTCON_PORT_READY event for unknown port %d\n", + WPRINTF(("VTCON_PORT_READY event for unknown port %d", ctrl->id)); return; } @@ -686,7 +687,7 @@ pci_vtcon_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) /* create port */ if (pci_vtcon_sock_add(sc, portname, portpath) < 0) { - fprintf(stderr, "cannot create port %s: %s\n", + EPRINTLN("cannot create port %s: %s", portname, strerror(errno)); return (1); } diff --git a/usr/src/cmd/bhyve/pci_virtio_net.c b/usr/src/cmd/bhyve/pci_virtio_net.c index 73f8aa0d6b..3a1cc46a06 100644 --- a/usr/src/cmd/bhyve/pci_virtio_net.c +++ b/usr/src/cmd/bhyve/pci_virtio_net.c @@ -83,6 +83,7 @@ __FBSDID("$FreeBSD$"); #endif #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #ifdef __FreeBSD__ #include "mevent.h" @@ -127,6 +128,8 @@ __FBSDID("$FreeBSD$"); struct virtio_net_config { uint8_t mac[6]; uint16_t status; + uint16_t max_virtqueue_pairs; + uint16_t mtu; } __packed; /* @@ -155,8 +158,8 @@ struct virtio_net_rxhdr { * Debug printf */ static int pci_vtnet_debug; -#define DPRINTF(params) if (pci_vtnet_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (pci_vtnet_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params /* * Per-device softc @@ -181,6 +184,7 @@ struct pci_vtnet_softc { uint64_t vsc_features; /* negotiated features */ struct virtio_net_config vsc_config; + struct virtio_consts vsc_consts; pthread_mutex_t rx_mtx; int rx_vhdrlen; @@ -219,7 +223,7 @@ pci_vtnet_reset(void *vsc) { struct pci_vtnet_softc *sc = vsc; - DPRINTF(("vtnet: device reset requested !\n")); + DPRINTF(("vtnet: device reset requested !")); /* Acquire the RX lock to block RX processing. */ pthread_mutex_lock(&sc->rx_mtx); @@ -403,7 +407,7 @@ pci_vtnet_tap_rx(struct pci_vtnet_softc *sc) * No more packets, but still some avail ring * entries. Interrupt if needed/appropriate. */ - vq_retchain(vq); + vq_retchains(vq, 1); vq_endchains(vq, 0); return; } @@ -673,7 +677,7 @@ pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) struct pci_vtnet_softc *sc = vsc; /* - * A qnotify means that the rx process can now begin + * A qnotify means that the rx process can now begin. */ if (sc->vsc_rx_ready == 0) { sc->vsc_rx_ready = 1; @@ -788,7 +792,7 @@ static void pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq) { - DPRINTF(("vtnet: control qnotify!\n\r")); + DPRINTF(("vtnet: control qnotify!")); } #endif /* __FreeBSD__ */ @@ -914,86 +918,132 @@ pci_vtnet_netmap_setup(struct pci_vtnet_softc *sc, char *ifname) static int pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { - char tname[MAXCOMLEN + 1]; struct pci_vtnet_softc *sc; - const char *env_msi; - char *devname; - char *vtopts; + char tname[MAXCOMLEN + 1]; #ifdef __FreeBSD__ int mac_provided; + int mtu_provided; + unsigned long mtu = ETHERMTU; +#else + int use_msix = 1; #endif - int use_msix; + /* + * Allocate data structures for further virtio initializations. + * sc also contains a copy of vtnet_vi_consts, since capabilities + * change depending on the backend. + */ sc = calloc(1, sizeof(struct pci_vtnet_softc)); + sc->vsc_consts = vtnet_vi_consts; pthread_mutex_init(&sc->vsc_mtx, NULL); - vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues); - sc->vsc_vs.vs_mtx = &sc->vsc_mtx; - sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ; sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq; sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ; sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq; -#ifdef __FreeBSD__ +#ifdef notyet sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ; sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq; #endif /* - * Use MSI if set by user - */ - use_msix = 1; - if ((env_msi = getenv("BHYVE_USE_MSI")) != NULL) { - if (strcasecmp(env_msi, "yes") == 0) - use_msix = 0; - } - - /* - * Attempt to open the tap device and read the MAC address - * if specified + * Attempt to open the backend device and read the MAC address + * if specified. */ -#ifdef __FreeBSD__ +#ifdef __FreeBSD__ mac_provided = 0; - sc->vsc_tapfd = -1; + mtu_provided = 0; #endif - sc->vsc_nmd = NULL; if (opts != NULL) { -#ifdef __FreeBSD__ - int err; -#endif + char *optscopy; + char *vtopts; + int err = 0; - devname = vtopts = strdup(opts); + /* Get the device name. */ + optscopy = vtopts = strdup(opts); (void) strsep(&vtopts, ","); -#ifdef __FreBSD__ - if (vtopts != NULL) { - err = net_parsemac(vtopts, sc->vsc_config.mac); - if (err != 0) { - free(devname); - return (err); +#ifdef __FreeBSD__ + /* + * Parse the list of options in the form + * key1=value1,...,keyN=valueN. + */ + while (vtopts != NULL) { + char *value = vtopts; + char *key; + + key = strsep(&value, "="); + if (value == NULL) + break; + vtopts = value; + (void) strsep(&vtopts, ","); + + if (strcmp(key, "mac") == 0) { + err = net_parsemac(value, sc->vsc_config.mac); + if (err) + break; + mac_provided = 1; + } else if (strcmp(key, "mtu") == 0) { + err = net_parsemtu(value, &mtu); + if (err) + break; + + if (mtu < VTNET_MIN_MTU || mtu > VTNET_MAX_MTU) { + err = EINVAL; + errno = EINVAL; + break; + } + mtu_provided = 1; } - mac_provided = 1; } #endif +#ifndef __FreeBSD__ + /* Use the already strsep(",")-ed optscopy */ + if (strncmp(optscopy, "tap", 3) == 0 || + strncmp(optscopy, "vmnet", 5) == 0) + pci_vtnet_tap_setup(sc, optscopy); +#endif + + free(optscopy); + + if (err) { + free(sc); + return (err); + } + #ifdef __FreeBSD__ - if (strncmp(devname, "vale", 4) == 0) - pci_vtnet_netmap_setup(sc, devname); + err = netbe_init(&sc->vsc_be, opts, pci_vtnet_rx_callback, + sc); + if (err) { + free(sc); + return (err); + } + + sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MRG_RXBUF | + netbe_get_cap(sc->vsc_be); #endif - if (strncmp(devname, "tap", 3) == 0 || - strncmp(devname, "vmnet", 5) == 0) - pci_vtnet_tap_setup(sc, devname); - free(devname); } -#ifdef __FreeBSD__ +#ifdef __FreeBSD__ if (!mac_provided) { net_genmac(pi, sc->vsc_config.mac); } + + sc->vsc_config.mtu = mtu; + if (mtu_provided) { + sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MTU; + } #endif + /* + * Since we do not actually support multiqueue, + * set the maximum virtqueue pairs to 1. + */ + sc->vsc_config.max_virtqueue_pairs = 1; + /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); @@ -1006,9 +1056,9 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0 || #else sc->vsc_config.status = (opts == NULL || sc->vsc_dlpifd >= 0 || -#endif sc->vsc_nmd != NULL); - +#endif + /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ if (vi_intr_init(&sc->vsc_vs, 1, use_msix)) return (1); @@ -1053,7 +1103,7 @@ pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value) memcpy(ptr, &value, size); } else { /* silently ignore other writes */ - DPRINTF(("vtnet: write to readonly reg %d\n\r", offset)); + DPRINTF(("vtnet: write to readonly reg %d", offset)); } return (0); diff --git a/usr/src/cmd/bhyve/pci_virtio_rnd.c b/usr/src/cmd/bhyve/pci_virtio_rnd.c index 5f470c03a6..4f908324cf 100644 --- a/usr/src/cmd/bhyve/pci_virtio_rnd.c +++ b/usr/src/cmd/bhyve/pci_virtio_rnd.c @@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include <sysexits.h> #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "virtio.h" @@ -65,8 +66,8 @@ __FBSDID("$FreeBSD$"); static int pci_vtrnd_debug; -#define DPRINTF(params) if (pci_vtrnd_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (pci_vtrnd_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params /* * Per-device softc @@ -102,7 +103,7 @@ pci_vtrnd_reset(void *vsc) sc = vsc; - DPRINTF(("vtrnd: device reset requested !\n")); + DPRINTF(("vtrnd: device reset requested !")); vi_reset_dev(&sc->vrsc_vs); } @@ -127,7 +128,7 @@ pci_vtrnd_notify(void *vsc, struct vqueue_info *vq) len = read(sc->vrsc_fd, iov.iov_base, iov.iov_len); - DPRINTF(("vtrnd: vtrnd_notify(): %d\r\n", len)); + DPRINTF(("vtrnd: vtrnd_notify(): %d", len)); /* Catastrophe if unable to read from /dev/random */ assert(len > 0); diff --git a/usr/src/cmd/bhyve/pci_virtio_scsi.c b/usr/src/cmd/bhyve/pci_virtio_scsi.c index 632f920293..92a3311b69 100644 --- a/usr/src/cmd/bhyve/pci_virtio_scsi.c +++ b/usr/src/cmd/bhyve/pci_virtio_scsi.c @@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$"); #include <camlib.h> #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "virtio.h" #include "iov.h" @@ -86,8 +87,8 @@ __FBSDID("$FreeBSD$"); #define VIRTIO_SCSI_F_CHANGE (1 << 2) static int pci_vtscsi_debug = 0; -#define DPRINTF(params) if (pci_vtscsi_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (pci_vtscsi_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params struct pci_vtscsi_config { uint32_t num_queues; @@ -287,7 +288,7 @@ pci_vtscsi_proc(void *arg) vq_endchains(q->vsq_vq, 0); pthread_mutex_unlock(&q->vsq_qmtx); - DPRINTF(("virtio-scsi: request <idx=%d> completed\n", + DPRINTF(("virtio-scsi: request <idx=%d> completed", req->vsr_idx)); free(req); } @@ -303,7 +304,7 @@ pci_vtscsi_reset(void *vsc) sc = vsc; - DPRINTF(("vtscsi: device reset requested\n")); + DPRINTF(("vtscsi: device reset requested")); vi_reset_dev(&sc->vss_vs); /* initialize config structure */ @@ -438,7 +439,7 @@ pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc, err = ioctl(sc->vss_ctl_fd, CTL_IO, io); if (err != 0) - WPRINTF(("CTL_IO: err=%d (%s)\n", errno, strerror(errno))); + WPRINTF(("CTL_IO: err=%d (%s)", errno, strerror(errno))); tmf->response = io->taskio.task_status; ctl_scsi_free_io(io); @@ -531,7 +532,7 @@ pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in, err = ioctl(sc->vss_ctl_fd, CTL_IO, io); if (err != 0) { - WPRINTF(("CTL_IO: err=%d (%s)\n", errno, strerror(errno))); + WPRINTF(("CTL_IO: err=%d (%s)", errno, strerror(errno))); cmd_wr->response = VIRTIO_SCSI_S_FAILURE; } else { cmd_wr->sense_len = MIN(io->scsiio.sense_len, @@ -627,7 +628,7 @@ pci_vtscsi_requestq_notify(void *vsc, struct vqueue_info *vq) pthread_cond_signal(&q->vsq_cv); pthread_mutex_unlock(&q->vsq_mtx); - DPRINTF(("virtio-scsi: request <idx=%d> enqueued\n", idx)); + DPRINTF(("virtio-scsi: request <idx=%d> enqueued", idx)); } } @@ -683,7 +684,7 @@ pci_vtscsi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) } else if (strcmp(optname, "iid") == 0 && opt != NULL) { sc->vss_iid = strtoul(opt, NULL, 10); } else { - fprintf(stderr, "Invalid option %s\n", optname); + EPRINTLN("Invalid option %s", optname); free(sc); return (1); } @@ -692,7 +693,7 @@ pci_vtscsi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) sc->vss_ctl_fd = open(devname, O_RDWR); if (sc->vss_ctl_fd < 0) { - WPRINTF(("cannot open %s: %s\n", devname, strerror(errno))); + WPRINTF(("cannot open %s: %s", devname, strerror(errno))); free(sc); return (1); } diff --git a/usr/src/cmd/bhyve/pci_xhci.c b/usr/src/cmd/bhyve/pci_xhci.c index 324c706c47..b92be4dec3 100644 --- a/usr/src/cmd/bhyve/pci_xhci.c +++ b/usr/src/cmd/bhyve/pci_xhci.c @@ -55,14 +55,15 @@ __FBSDID("$FreeBSD$"); #include <xhcireg.h> #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "pci_xhci.h" #include "usb_emul.h" static int xhci_debug = 0; -#define DPRINTF(params) if (xhci_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (xhci_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params #define XHCI_NAME "xhci" @@ -447,19 +448,19 @@ pci_xhci_portregs_write(struct pci_xhci_softc *sc, uint64_t offset, port = (offset - XHCI_PORTREGS_PORT0) / XHCI_PORTREGS_SETSZ; offset = (offset - XHCI_PORTREGS_PORT0) % XHCI_PORTREGS_SETSZ; - DPRINTF(("pci_xhci: portregs wr offset 0x%lx, port %u: 0x%lx\r\n", + DPRINTF(("pci_xhci: portregs wr offset 0x%lx, port %u: 0x%lx", offset, port, value)); assert(port >= 0); if (port > XHCI_MAX_DEVS) { - DPRINTF(("pci_xhci: portregs_write port %d > ndevices\r\n", + DPRINTF(("pci_xhci: portregs_write port %d > ndevices", port)); return; } if (XHCI_DEVINST_PTR(sc, port) == NULL) { - DPRINTF(("pci_xhci: portregs_write to unattached port %d\r\n", + DPRINTF(("pci_xhci: portregs_write to unattached port %d", port)); } @@ -474,7 +475,7 @@ pci_xhci_portregs_write(struct pci_xhci_softc *sc, uint64_t offset, if ((p->portsc & XHCI_PS_PP) == 0) { WPRINTF(("pci_xhci: portregs_write to unpowered " - "port %d\r\n", port)); + "port %d", port)); break; } @@ -510,12 +511,12 @@ pci_xhci_portregs_write(struct pci_xhci_softc *sc, uint64_t offset, /* port disable request; for USB3, don't care */ if (value & XHCI_PS_PED) - DPRINTF(("Disable port %d request\r\n", port)); + DPRINTF(("Disable port %d request", port)); if (!(value & XHCI_PS_LWS)) break; - DPRINTF(("Port new PLS: %d\r\n", newpls)); + DPRINTF(("Port new PLS: %d", newpls)); switch (newpls) { case 0: /* U0 */ case 3: /* U3 */ @@ -535,7 +536,7 @@ pci_xhci_portregs_write(struct pci_xhci_softc *sc, uint64_t offset, break; default: - DPRINTF(("Unhandled change port %d PLS %u\r\n", + DPRINTF(("Unhandled change port %d PLS %u", port, newpls)); break; } @@ -546,7 +547,7 @@ pci_xhci_portregs_write(struct pci_xhci_softc *sc, uint64_t offset, break; case 8: /* Port link information register */ - DPRINTF(("pci_xhci attempted write to PORTLI, port %d\r\n", + DPRINTF(("pci_xhci attempted write to PORTLI, port %d", port)); break; case 12: @@ -571,11 +572,11 @@ pci_xhci_get_dev_ctx(struct pci_xhci_softc *sc, uint32_t slot) devctx_addr = sc->opregs.dcbaa_p->dcba[slot]; if (devctx_addr == 0) { - DPRINTF(("get_dev_ctx devctx_addr == 0\r\n")); + DPRINTF(("get_dev_ctx devctx_addr == 0")); return (NULL); } - DPRINTF(("pci_xhci: get dev ctx, slot %u devctx addr %016lx\r\n", + DPRINTF(("pci_xhci: get dev ctx, slot %u devctx addr %016lx", slot, devctx_addr)); devctx = XHCI_GADDR(sc, devctx_addr & ~0x3FUL); @@ -645,7 +646,7 @@ pci_xhci_init_ep(struct pci_xhci_dev_emu *dev, int epid) devep = &dev->eps[epid]; pstreams = XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0); if (pstreams > 0) { - DPRINTF(("init_ep %d with pstreams %d\r\n", epid, pstreams)); + DPRINTF(("init_ep %d with pstreams %d", epid, pstreams)); assert(devep->ep_sctx_trbs == NULL); devep->ep_sctx = XHCI_GADDR(dev->xsc, ep_ctx->qwEpCtx2 & @@ -660,12 +661,12 @@ pci_xhci_init_ep(struct pci_xhci_dev_emu *dev, int epid) XHCI_SCTX_0_DCS_GET(devep->ep_sctx[i].qwSctx0); } } else { - DPRINTF(("init_ep %d with no pstreams\r\n", epid)); + DPRINTF(("init_ep %d with no pstreams", epid)); devep->ep_ringaddr = ep_ctx->qwEpCtx2 & XHCI_EPCTX_2_TR_DQ_PTR_MASK; devep->ep_ccs = XHCI_EPCTX_2_DCS_GET(ep_ctx->qwEpCtx2); devep->ep_tr = XHCI_GADDR(dev->xsc, devep->ep_ringaddr); - DPRINTF(("init_ep tr DCS %x\r\n", devep->ep_ccs)); + DPRINTF(("init_ep tr DCS %x", devep->ep_ccs)); } if (devep->ep_xfer == NULL) { @@ -681,7 +682,7 @@ pci_xhci_disable_ep(struct pci_xhci_dev_emu *dev, int epid) struct pci_xhci_dev_ep *devep; struct xhci_endp_ctx *ep_ctx; - DPRINTF(("pci_xhci disable_ep %d\r\n", epid)); + DPRINTF(("pci_xhci disable_ep %d", epid)); dev_ctx = dev->dev_ctx; ep_ctx = &dev_ctx->ctx_ep[epid]; @@ -710,7 +711,7 @@ pci_xhci_reset_slot(struct pci_xhci_softc *sc, int slot) dev = XHCI_SLOTDEV_PTR(sc, slot); if (!dev) { - DPRINTF(("xhci reset unassigned slot (%d)?\r\n", slot)); + DPRINTF(("xhci reset unassigned slot (%d)?", slot)); } else { dev->dev_slotstate = XHCI_ST_DISABLED; } @@ -736,20 +737,20 @@ pci_xhci_insert_event(struct pci_xhci_softc *sc, struct xhci_trb *evtrb, erdp_idx = (erdp - rts->erstba_p[rts->er_deq_seg].qwEvrsTablePtr) / sizeof(struct xhci_trb); - DPRINTF(("pci_xhci: insert event 0[%lx] 2[%x] 3[%x]\r\n" - "\terdp idx %d/seg %d, enq idx %d/seg %d, pcs %u\r\n" - "\t(erdp=0x%lx, erst=0x%lx, tblsz=%u, do_intr %d)\r\n", - evtrb->qwTrb0, evtrb->dwTrb2, evtrb->dwTrb3, + DPRINTF(("pci_xhci: insert event 0[%lx] 2[%x] 3[%x]", + evtrb->qwTrb0, evtrb->dwTrb2, evtrb->dwTrb3)); + DPRINTF(("\terdp idx %d/seg %d, enq idx %d/seg %d, pcs %u", erdp_idx, rts->er_deq_seg, rts->er_enq_idx, - rts->er_enq_seg, - rts->event_pcs, erdp, rts->erstba_p->qwEvrsTablePtr, + rts->er_enq_seg, rts->event_pcs)); + DPRINTF(("\t(erdp=0x%lx, erst=0x%lx, tblsz=%u, do_intr %d)", + erdp, rts->erstba_p->qwEvrsTablePtr, rts->erstba_p->dwEvrsTableSize, do_intr)); evtrbptr = &rts->erst_p[rts->er_enq_idx]; /* TODO: multi-segment table */ if (rts->er_events_cnt >= rts->erstba_p->dwEvrsTableSize) { - DPRINTF(("pci_xhci[%d] cannot insert event; ring full\r\n", + DPRINTF(("pci_xhci[%d] cannot insert event; ring full", __LINE__)); err = XHCI_TRB_ERROR_EV_RING_FULL; goto done; @@ -760,7 +761,7 @@ pci_xhci_insert_event(struct pci_xhci_softc *sc, struct xhci_trb *evtrb, if ((evtrbptr->dwTrb3 & 0x1) == (rts->event_pcs & 0x1)) { - DPRINTF(("pci_xhci[%d] insert evt err: ring full\r\n", + DPRINTF(("pci_xhci[%d] insert evt err: ring full", __LINE__)); errev.qwTrb0 = 0; @@ -820,7 +821,7 @@ pci_xhci_cmd_enable_slot(struct pci_xhci_softc *sc, uint32_t *slot) } } - DPRINTF(("pci_xhci enable slot (error=%d) slot %u\r\n", + DPRINTF(("pci_xhci enable slot (error=%d) slot %u", cmderr != XHCI_TRB_ERROR_SUCCESS, *slot)); return (cmderr); @@ -832,7 +833,7 @@ pci_xhci_cmd_disable_slot(struct pci_xhci_softc *sc, uint32_t slot) struct pci_xhci_dev_emu *dev; uint32_t cmderr; - DPRINTF(("pci_xhci disable slot %u\r\n", slot)); + DPRINTF(("pci_xhci disable slot %u", slot)); cmderr = XHCI_TRB_ERROR_NO_SLOTS; if (sc->portregs == NULL) @@ -871,7 +872,7 @@ pci_xhci_cmd_reset_device(struct pci_xhci_softc *sc, uint32_t slot) if (sc->portregs == NULL) goto done; - DPRINTF(("pci_xhci reset device slot %u\r\n", slot)); + DPRINTF(("pci_xhci reset device slot %u", slot)); dev = XHCI_SLOTDEV_PTR(sc, slot); if (!dev || dev->dev_slotstate == XHCI_ST_DISABLED) @@ -924,19 +925,19 @@ pci_xhci_cmd_address_device(struct pci_xhci_softc *sc, uint32_t slot, cmderr = XHCI_TRB_ERROR_SUCCESS; - DPRINTF(("pci_xhci: address device, input ctl: D 0x%08x A 0x%08x,\r\n" - " slot %08x %08x %08x %08x\r\n" - " ep0 %08x %08x %016lx %08x\r\n", - input_ctx->ctx_input.dwInCtx0, input_ctx->ctx_input.dwInCtx1, + DPRINTF(("pci_xhci: address device, input ctl: D 0x%08x A 0x%08x,", + input_ctx->ctx_input.dwInCtx0, input_ctx->ctx_input.dwInCtx1)); + DPRINTF((" slot %08x %08x %08x %08x", islot_ctx->dwSctx0, islot_ctx->dwSctx1, - islot_ctx->dwSctx2, islot_ctx->dwSctx3, + islot_ctx->dwSctx2, islot_ctx->dwSctx3)); + DPRINTF((" ep0 %08x %08x %016lx %08x", ep0_ctx->dwEpCtx0, ep0_ctx->dwEpCtx1, ep0_ctx->qwEpCtx2, ep0_ctx->dwEpCtx4)); /* when setting address: drop-ctx=0, add-ctx=slot+ep0 */ if ((input_ctx->ctx_input.dwInCtx0 != 0) || (input_ctx->ctx_input.dwInCtx1 & 0x03) != 0x03) { - DPRINTF(("pci_xhci: address device, input ctl invalid\r\n")); + DPRINTF(("pci_xhci: address device, input ctl invalid")); cmderr = XHCI_TRB_ERROR_TRB; goto done; } @@ -944,8 +945,8 @@ pci_xhci_cmd_address_device(struct pci_xhci_softc *sc, uint32_t slot, /* assign address to slot */ dev_ctx = pci_xhci_get_dev_ctx(sc, slot); - DPRINTF(("pci_xhci: address device, dev ctx\r\n" - " slot %08x %08x %08x %08x\r\n", + DPRINTF(("pci_xhci: address device, dev ctx")); + DPRINTF((" slot %08x %08x %08x %08x", dev_ctx->ctx_slot.dwSctx0, dev_ctx->ctx_slot.dwSctx1, dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3)); @@ -976,11 +977,11 @@ pci_xhci_cmd_address_device(struct pci_xhci_softc *sc, uint32_t slot, dev->dev_slotstate = XHCI_ST_ADDRESSED; - DPRINTF(("pci_xhci: address device, output ctx\r\n" - " slot %08x %08x %08x %08x\r\n" - " ep0 %08x %08x %016lx %08x\r\n", + DPRINTF(("pci_xhci: address device, output ctx")); + DPRINTF((" slot %08x %08x %08x %08x", dev_ctx->ctx_slot.dwSctx0, dev_ctx->ctx_slot.dwSctx1, - dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3, + dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3)); + DPRINTF((" ep0 %08x %08x %016lx %08x", ep0_ctx->dwEpCtx0, ep0_ctx->dwEpCtx1, ep0_ctx->qwEpCtx2, ep0_ctx->dwEpCtx4)); @@ -1001,13 +1002,13 @@ pci_xhci_cmd_config_ep(struct pci_xhci_softc *sc, uint32_t slot, cmderr = XHCI_TRB_ERROR_SUCCESS; - DPRINTF(("pci_xhci config_ep slot %u\r\n", slot)); + DPRINTF(("pci_xhci config_ep slot %u", slot)); dev = XHCI_SLOTDEV_PTR(sc, slot); assert(dev != NULL); if ((trb->dwTrb3 & XHCI_TRB_3_DCEP_BIT) != 0) { - DPRINTF(("pci_xhci config_ep - deconfigure ep slot %u\r\n", + DPRINTF(("pci_xhci config_ep - deconfigure ep slot %u", slot)); if (dev->dev_ue->ue_stop != NULL) dev->dev_ue->ue_stop(dev->dev_sc); @@ -1036,7 +1037,7 @@ pci_xhci_cmd_config_ep(struct pci_xhci_softc *sc, uint32_t slot, } if (dev->dev_slotstate < XHCI_ST_ADDRESSED) { - DPRINTF(("pci_xhci: config_ep slotstate x%x != addressed\r\n", + DPRINTF(("pci_xhci: config_ep slotstate x%x != addressed", dev->dev_slotstate)); cmderr = XHCI_TRB_ERROR_SLOT_NOT_ON; goto done; @@ -1058,7 +1059,7 @@ pci_xhci_cmd_config_ep(struct pci_xhci_softc *sc, uint32_t slot, input_ctx = XHCI_GADDR(sc, trb->qwTrb0 & ~0xFUL); dev_ctx = dev->dev_ctx; - DPRINTF(("pci_xhci: config_ep inputctx: D:x%08x A:x%08x 7:x%08x\r\n", + DPRINTF(("pci_xhci: config_ep inputctx: D:x%08x A:x%08x 7:x%08x", input_ctx->ctx_input.dwInCtx0, input_ctx->ctx_input.dwInCtx1, input_ctx->ctx_input.dwInCtx7)); @@ -1067,7 +1068,7 @@ pci_xhci_cmd_config_ep(struct pci_xhci_softc *sc, uint32_t slot, if (input_ctx->ctx_input.dwInCtx0 & XHCI_INCTX_0_DROP_MASK(i)) { - DPRINTF((" config ep - dropping ep %d\r\n", i)); + DPRINTF((" config ep - dropping ep %d", i)); pci_xhci_disable_ep(dev, i); } @@ -1075,7 +1076,7 @@ pci_xhci_cmd_config_ep(struct pci_xhci_softc *sc, uint32_t slot, XHCI_INCTX_1_ADD_MASK(i)) { iep_ctx = &input_ctx->ctx_ep[i]; - DPRINTF((" enable ep[%d] %08x %08x %016lx %08x\r\n", + DPRINTF((" enable ep[%d] %08x %08x %016lx %08x", i, iep_ctx->dwEpCtx0, iep_ctx->dwEpCtx1, iep_ctx->qwEpCtx2, iep_ctx->dwEpCtx4)); @@ -1097,7 +1098,7 @@ pci_xhci_cmd_config_ep(struct pci_xhci_softc *sc, uint32_t slot, dev->dev_slotstate = XHCI_ST_CONFIGURED; DPRINTF(("EP configured; slot %u [0]=0x%08x [1]=0x%08x [2]=0x%08x " - "[3]=0x%08x\r\n", + "[3]=0x%08x", slot, dev_ctx->ctx_slot.dwSctx0, dev_ctx->ctx_slot.dwSctx1, dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3)); @@ -1118,7 +1119,7 @@ pci_xhci_cmd_reset_ep(struct pci_xhci_softc *sc, uint32_t slot, epid = XHCI_TRB_3_EP_GET(trb->dwTrb3); - DPRINTF(("pci_xhci: reset ep %u: slot %u\r\n", epid, slot)); + DPRINTF(("pci_xhci: reset ep %u: slot %u", epid, slot)); cmderr = XHCI_TRB_ERROR_SUCCESS; @@ -1133,7 +1134,7 @@ pci_xhci_cmd_reset_ep(struct pci_xhci_softc *sc, uint32_t slot, } if (epid < 1 || epid > 31) { - DPRINTF(("pci_xhci: reset ep: invalid epid %u\r\n", epid)); + DPRINTF(("pci_xhci: reset ep: invalid epid %u", epid)); cmderr = XHCI_TRB_ERROR_TRB; goto done; } @@ -1152,7 +1153,7 @@ pci_xhci_cmd_reset_ep(struct pci_xhci_softc *sc, uint32_t slot, if (XHCI_EPCTX_0_MAXP_STREAMS_GET(ep_ctx->dwEpCtx0) == 0) ep_ctx->qwEpCtx2 = devep->ep_ringaddr | devep->ep_ccs; - DPRINTF(("pci_xhci: reset ep[%u] %08x %08x %016lx %08x\r\n", + DPRINTF(("pci_xhci: reset ep[%u] %08x %08x %016lx %08x", epid, ep_ctx->dwEpCtx0, ep_ctx->dwEpCtx1, ep_ctx->qwEpCtx2, ep_ctx->dwEpCtx4)); @@ -1183,7 +1184,7 @@ pci_xhci_find_stream(struct pci_xhci_softc *sc, struct xhci_endp_ctx *ep, return (XHCI_TRB_ERROR_INVALID_SID); if (XHCI_EPCTX_0_LSA_GET(ep->dwEpCtx0) == 0) { - DPRINTF(("pci_xhci: find_stream; LSA bit not set\r\n")); + DPRINTF(("pci_xhci: find_stream; LSA bit not set")); return (XHCI_TRB_ERROR_INVALID_SID); } @@ -1217,16 +1218,17 @@ pci_xhci_cmd_set_tr(struct pci_xhci_softc *sc, uint32_t slot, dev = XHCI_SLOTDEV_PTR(sc, slot); assert(dev != NULL); - DPRINTF(("pci_xhci set_tr: new-tr x%016lx, SCT %u DCS %u\r\n" - " stream-id %u, slot %u, epid %u, C %u\r\n", + DPRINTF(("pci_xhci set_tr: new-tr x%016lx, SCT %u DCS %u", (trb->qwTrb0 & ~0xF), (uint32_t)((trb->qwTrb0 >> 1) & 0x7), - (uint32_t)(trb->qwTrb0 & 0x1), (trb->dwTrb2 >> 16) & 0xFFFF, + (uint32_t)(trb->qwTrb0 & 0x1))); + DPRINTF((" stream-id %u, slot %u, epid %u, C %u", + (trb->dwTrb2 >> 16) & 0xFFFF, XHCI_TRB_3_SLOT_GET(trb->dwTrb3), XHCI_TRB_3_EP_GET(trb->dwTrb3), trb->dwTrb3 & 0x1)); epid = XHCI_TRB_3_EP_GET(trb->dwTrb3); if (epid < 1 || epid > 31) { - DPRINTF(("pci_xhci: set_tr_deq: invalid epid %u\r\n", epid)); + DPRINTF(("pci_xhci: set_tr_deq: invalid epid %u", epid)); cmderr = XHCI_TRB_ERROR_TRB; goto done; } @@ -1242,7 +1244,7 @@ pci_xhci_cmd_set_tr(struct pci_xhci_softc *sc, uint32_t slot, case XHCI_ST_EPCTX_ERROR: break; default: - DPRINTF(("pci_xhci cmd set_tr invalid state %x\r\n", + DPRINTF(("pci_xhci cmd set_tr invalid state %x", XHCI_EPCTX_0_EPSTATE_GET(ep_ctx->dwEpCtx0))); cmderr = XHCI_TRB_ERROR_CONTEXT_STATE; goto done; @@ -1265,7 +1267,7 @@ pci_xhci_cmd_set_tr(struct pci_xhci_softc *sc, uint32_t slot, } } else { if (streamid != 0) { - DPRINTF(("pci_xhci cmd set_tr streamid %x != 0\r\n", + DPRINTF(("pci_xhci cmd set_tr streamid %x != 0", streamid)); } ep_ctx->qwEpCtx2 = trb->qwTrb0 & ~0xFUL; @@ -1273,7 +1275,7 @@ pci_xhci_cmd_set_tr(struct pci_xhci_softc *sc, uint32_t slot, devep->ep_ccs = trb->qwTrb0 & 0x1; devep->ep_tr = XHCI_GADDR(sc, devep->ep_ringaddr); - DPRINTF(("pci_xhci set_tr first TRB:\r\n")); + DPRINTF(("pci_xhci set_tr first TRB:")); pci_xhci_dump_trb(devep->ep_tr); } ep_ctx->dwEpCtx0 = (ep_ctx->dwEpCtx0 & ~0x7) | XHCI_ST_EPCTX_STOPPED; @@ -1297,19 +1299,19 @@ pci_xhci_cmd_eval_ctx(struct pci_xhci_softc *sc, uint32_t slot, ep0_ctx = &input_ctx->ctx_ep[1]; cmderr = XHCI_TRB_ERROR_SUCCESS; - DPRINTF(("pci_xhci: eval ctx, input ctl: D 0x%08x A 0x%08x,\r\n" - " slot %08x %08x %08x %08x\r\n" - " ep0 %08x %08x %016lx %08x\r\n", - input_ctx->ctx_input.dwInCtx0, input_ctx->ctx_input.dwInCtx1, + DPRINTF(("pci_xhci: eval ctx, input ctl: D 0x%08x A 0x%08x,", + input_ctx->ctx_input.dwInCtx0, input_ctx->ctx_input.dwInCtx1)); + DPRINTF((" slot %08x %08x %08x %08x", islot_ctx->dwSctx0, islot_ctx->dwSctx1, - islot_ctx->dwSctx2, islot_ctx->dwSctx3, + islot_ctx->dwSctx2, islot_ctx->dwSctx3)); + DPRINTF((" ep0 %08x %08x %016lx %08x", ep0_ctx->dwEpCtx0, ep0_ctx->dwEpCtx1, ep0_ctx->qwEpCtx2, ep0_ctx->dwEpCtx4)); /* this command expects drop-ctx=0 & add-ctx=slot+ep0 */ if ((input_ctx->ctx_input.dwInCtx0 != 0) || (input_ctx->ctx_input.dwInCtx1 & 0x03) == 0) { - DPRINTF(("pci_xhci: eval ctx, input ctl invalid\r\n")); + DPRINTF(("pci_xhci: eval ctx, input ctl invalid")); cmderr = XHCI_TRB_ERROR_TRB; goto done; } @@ -1317,8 +1319,8 @@ pci_xhci_cmd_eval_ctx(struct pci_xhci_softc *sc, uint32_t slot, /* assign address to slot; in this emulation, slot_id = address */ dev_ctx = pci_xhci_get_dev_ctx(sc, slot); - DPRINTF(("pci_xhci: eval ctx, dev ctx\r\n" - " slot %08x %08x %08x %08x\r\n", + DPRINTF(("pci_xhci: eval ctx, dev ctx")); + DPRINTF((" slot %08x %08x %08x %08x", dev_ctx->ctx_slot.dwSctx0, dev_ctx->ctx_slot.dwSctx1, dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3)); @@ -1342,11 +1344,11 @@ pci_xhci_cmd_eval_ctx(struct pci_xhci_softc *sc, uint32_t slot, ep0_ctx = &dev_ctx->ctx_ep[1]; } - DPRINTF(("pci_xhci: eval ctx, output ctx\r\n" - " slot %08x %08x %08x %08x\r\n" - " ep0 %08x %08x %016lx %08x\r\n", + DPRINTF(("pci_xhci: eval ctx, output ctx")); + DPRINTF((" slot %08x %08x %08x %08x", dev_ctx->ctx_slot.dwSctx0, dev_ctx->ctx_slot.dwSctx1, - dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3, + dev_ctx->ctx_slot.dwSctx2, dev_ctx->ctx_slot.dwSctx3)); + DPRINTF((" ep0 %08x %08x %016lx %08x", ep0_ctx->dwEpCtx0, ep0_ctx->dwEpCtx1, ep0_ctx->qwEpCtx2, ep0_ctx->dwEpCtx4)); @@ -1383,7 +1385,7 @@ pci_xhci_complete_commands(struct pci_xhci_softc *sc) break; DPRINTF(("pci_xhci: cmd type 0x%x, Trb0 x%016lx dwTrb2 x%08x" - " dwTrb3 x%08x, TRB_CYCLE %u/ccs %u\r\n", + " dwTrb3 x%08x, TRB_CYCLE %u/ccs %u", type, trb->qwTrb0, trb->dwTrb2, trb->dwTrb3, trb->dwTrb3 & XHCI_TRB_3_CYCLE_BIT, ccs)); @@ -1424,13 +1426,13 @@ pci_xhci_complete_commands(struct pci_xhci_softc *sc) break; case XHCI_TRB_TYPE_RESET_EP: /* 0x0E */ - DPRINTF(("Reset Endpoint on slot %d\r\n", slot)); + DPRINTF(("Reset Endpoint on slot %d", slot)); slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_reset_ep(sc, slot, trb); break; case XHCI_TRB_TYPE_STOP_EP: /* 0x0F */ - DPRINTF(("Stop Endpoint on slot %d\r\n", slot)); + DPRINTF(("Stop Endpoint on slot %d", slot)); slot = XHCI_TRB_3_SLOT_GET(trb->dwTrb3); cmderr = pci_xhci_cmd_reset_ep(sc, slot, trb); break; @@ -1465,7 +1467,7 @@ pci_xhci_complete_commands(struct pci_xhci_softc *sc) break; default: - DPRINTF(("pci_xhci: unsupported cmd %x\r\n", type)); + DPRINTF(("pci_xhci: unsupported cmd %x", type)); break; } @@ -1476,7 +1478,7 @@ pci_xhci_complete_commands(struct pci_xhci_softc *sc) evtrb.qwTrb0 = crcr; evtrb.dwTrb2 |= XHCI_TRB_2_ERROR_SET(cmderr); evtrb.dwTrb3 |= XHCI_TRB_3_SLOT_SET(slot); - DPRINTF(("pci_xhci: command 0x%x result: 0x%x\r\n", + DPRINTF(("pci_xhci: command 0x%x result: 0x%x", type, cmderr)); pci_xhci_insert_event(sc, &evtrb, 1); } @@ -1521,7 +1523,7 @@ pci_xhci_dump_trb(struct xhci_trb *trb) uint32_t type; type = XHCI_TRB_3_TYPE_GET(trb->dwTrb3); - DPRINTF(("pci_xhci: trb[@%p] type x%02x %s 0:x%016lx 2:x%08x 3:x%08x\r\n", + DPRINTF(("pci_xhci: trb[@%p] type x%02x %s 0:x%016lx 2:x%08x 3:x%08x", trb, type, type <= XHCI_TRB_TYPE_NOOP_CMD ? trbtypes[type] : "INVALID", trb->qwTrb0, trb->dwTrb2, trb->dwTrb3)); @@ -1560,7 +1562,7 @@ pci_xhci_xfer_complete(struct pci_xhci_softc *sc, struct usb_data_xfer *xfer, trbflags = trb->dwTrb3; DPRINTF(("pci_xhci: xfer[%d] done?%u:%d trb %x %016lx %x " - "(err %d) IOC?%d\r\n", + "(err %d) IOC?%d", i, xfer->data[i].processed, xfer->data[i].blen, XHCI_TRB_3_TYPE_GET(trbflags), evtrb.qwTrb0, trbflags, err, @@ -1596,7 +1598,7 @@ pci_xhci_xfer_complete(struct pci_xhci_softc *sc, struct usb_data_xfer *xfer, XHCI_TRB_3_SLOT_SET(slot) | XHCI_TRB_3_EP_SET(epid); if (XHCI_TRB_3_TYPE_GET(trbflags) == XHCI_TRB_TYPE_EVENT_DATA) { - DPRINTF(("pci_xhci EVENT_DATA edtla %u\r\n", edtla)); + DPRINTF(("pci_xhci EVENT_DATA edtla %u", edtla)); evtrb.qwTrb0 = trb->qwTrb0; evtrb.dwTrb2 = (edtla & 0xFFFFF) | XHCI_TRB_2_ERROR_SET(err); @@ -1631,7 +1633,7 @@ pci_xhci_update_ep_ring(struct pci_xhci_softc *sc, struct pci_xhci_dev_emu *dev, devep->ep_sctx_trbs[streamid].ccs = ccs & 0x1; ep_ctx->qwEpCtx2 = (ep_ctx->qwEpCtx2 & ~0x1) | (ccs & 0x1); - DPRINTF(("xhci update ep-ring stream %d, addr %lx\r\n", + DPRINTF(("xhci update ep-ring stream %d, addr %lx", streamid, devep->ep_sctx[streamid].qwSctx0)); } else { devep->ep_ringaddr = ringaddr & ~0xFUL; @@ -1639,7 +1641,7 @@ pci_xhci_update_ep_ring(struct pci_xhci_softc *sc, struct pci_xhci_dev_emu *dev, devep->ep_tr = XHCI_GADDR(sc, ringaddr & ~0xFUL); ep_ctx->qwEpCtx2 = (ringaddr & ~0xFUL) | (ccs & 0x1); - DPRINTF(("xhci update ep-ring, addr %lx\r\n", + DPRINTF(("xhci update ep-ring, addr %lx", (devep->ep_ringaddr | devep->ep_ccs))); } } @@ -1724,7 +1726,7 @@ pci_xhci_handle_transfer(struct pci_xhci_softc *sc, xfer = devep->ep_xfer; USB_DATA_XFER_LOCK(xfer); - DPRINTF(("pci_xhci handle_transfer slot %u\r\n", slot)); + DPRINTF(("pci_xhci handle_transfer slot %u", slot)); retry: err = 0; @@ -1740,7 +1742,7 @@ retry: if (XHCI_TRB_3_TYPE_GET(trbflags) != XHCI_TRB_TYPE_LINK && (trbflags & XHCI_TRB_3_CYCLE_BIT) != (ccs & XHCI_TRB_3_CYCLE_BIT)) { - DPRINTF(("Cycle-bit changed trbflags %x, ccs %x\r\n", + DPRINTF(("Cycle-bit changed trbflags %x, ccs %x", trbflags & XHCI_TRB_3_CYCLE_BIT, ccs)); break; } @@ -1760,7 +1762,7 @@ retry: case XHCI_TRB_TYPE_SETUP_STAGE: if ((trbflags & XHCI_TRB_3_IDT_BIT) == 0 || XHCI_TRB_2_BYTES_GET(trb->dwTrb2) != 8) { - DPRINTF(("pci_xhci: invalid setup trb\r\n")); + DPRINTF(("pci_xhci: invalid setup trb")); err = XHCI_TRB_ERROR_TRB; goto errout; } @@ -1782,7 +1784,7 @@ retry: case XHCI_TRB_TYPE_ISOCH: if (setup_trb != NULL) { DPRINTF(("pci_xhci: trb not supposed to be in " - "ctl scope\r\n")); + "ctl scope")); err = XHCI_TRB_ERROR_TRB; goto errout; } @@ -1816,7 +1818,7 @@ retry: default: DPRINTF(("pci_xhci: handle xfer unexpected trb type " - "0x%x\r\n", + "0x%x", XHCI_TRB_3_TYPE_GET(trbflags))); err = XHCI_TRB_ERROR_TRB; goto errout; @@ -1824,7 +1826,7 @@ retry: trb = pci_xhci_trb_next(sc, trb, &addr); - DPRINTF(("pci_xhci: next trb: 0x%lx\r\n", (uint64_t)trb)); + DPRINTF(("pci_xhci: next trb: 0x%lx", (uint64_t)trb)); if (xfer_block) { xfer_block->trbnext = addr; @@ -1838,14 +1840,14 @@ retry: /* handle current batch that requires interrupt on complete */ if (trbflags & XHCI_TRB_3_IOC_BIT) { - DPRINTF(("pci_xhci: trb IOC bit set\r\n")); + DPRINTF(("pci_xhci: trb IOC bit set")); if (epid == 1) do_retry = 1; break; } } - DPRINTF(("pci_xhci[%d]: xfer->ndata %u\r\n", __LINE__, xfer->ndata)); + DPRINTF(("pci_xhci[%d]: xfer->ndata %u", __LINE__, xfer->ndata)); if (epid == 1) { err = USB_ERR_NOT_STARTED; @@ -1869,7 +1871,7 @@ retry: errout: if (err == XHCI_TRB_ERROR_EV_RING_FULL) - DPRINTF(("pci_xhci[%d]: event ring full\r\n", __LINE__)); + DPRINTF(("pci_xhci[%d]: event ring full", __LINE__)); if (!do_retry) USB_DATA_XFER_UNLOCK(xfer); @@ -1879,7 +1881,7 @@ errout: if (do_retry) { USB_DATA_XFER_RESET(xfer); - DPRINTF(("pci_xhci[%d]: retry:continuing with next TRBs\r\n", + DPRINTF(("pci_xhci[%d]: retry:continuing with next TRBs", __LINE__)); goto retry; } @@ -1903,16 +1905,16 @@ pci_xhci_device_doorbell(struct pci_xhci_softc *sc, uint32_t slot, uint64_t ringaddr; uint32_t ccs; - DPRINTF(("pci_xhci doorbell slot %u epid %u stream %u\r\n", + DPRINTF(("pci_xhci doorbell slot %u epid %u stream %u", slot, epid, streamid)); if (slot == 0 || slot > sc->ndevices) { - DPRINTF(("pci_xhci: invalid doorbell slot %u\r\n", slot)); + DPRINTF(("pci_xhci: invalid doorbell slot %u", slot)); return; } if (epid == 0 || epid >= XHCI_MAX_ENDPOINTS) { - DPRINTF(("pci_xhci: invalid endpoint %u\r\n", epid)); + DPRINTF(("pci_xhci: invalid endpoint %u", epid)); return; } @@ -1926,7 +1928,7 @@ pci_xhci_device_doorbell(struct pci_xhci_softc *sc, uint32_t slot, sctx_tr = NULL; - DPRINTF(("pci_xhci: device doorbell ep[%u] %08x %08x %016lx %08x\r\n", + DPRINTF(("pci_xhci: device doorbell ep[%u] %08x %08x %016lx %08x", epid, ep_ctx->dwEpCtx0, ep_ctx->dwEpCtx1, ep_ctx->qwEpCtx2, ep_ctx->dwEpCtx4)); @@ -1954,38 +1956,38 @@ pci_xhci_device_doorbell(struct pci_xhci_softc *sc, uint32_t slot, * (prime) are invalid. */ if (streamid == 0 || streamid == 65534 || streamid == 65535) { - DPRINTF(("pci_xhci: invalid stream %u\r\n", streamid)); + DPRINTF(("pci_xhci: invalid stream %u", streamid)); return; } sctx = NULL; pci_xhci_find_stream(sc, ep_ctx, streamid, &sctx); if (sctx == NULL) { - DPRINTF(("pci_xhci: invalid stream %u\r\n", streamid)); + DPRINTF(("pci_xhci: invalid stream %u", streamid)); return; } sctx_tr = &devep->ep_sctx_trbs[streamid]; ringaddr = sctx_tr->ringaddr; ccs = sctx_tr->ccs; trb = XHCI_GADDR(sc, sctx_tr->ringaddr & ~0xFUL); - DPRINTF(("doorbell, stream %u, ccs %lx, trb ccs %x\r\n", + DPRINTF(("doorbell, stream %u, ccs %lx, trb ccs %x", streamid, ep_ctx->qwEpCtx2 & XHCI_TRB_3_CYCLE_BIT, trb->dwTrb3 & XHCI_TRB_3_CYCLE_BIT)); } else { if (streamid != 0) { - DPRINTF(("pci_xhci: invalid stream %u\r\n", streamid)); + DPRINTF(("pci_xhci: invalid stream %u", streamid)); return; } ringaddr = devep->ep_ringaddr; ccs = devep->ep_ccs; trb = devep->ep_tr; - DPRINTF(("doorbell, ccs %lx, trb ccs %x\r\n", + DPRINTF(("doorbell, ccs %lx, trb ccs %x", ep_ctx->qwEpCtx2 & XHCI_TRB_3_CYCLE_BIT, trb->dwTrb3 & XHCI_TRB_3_CYCLE_BIT)); } if (XHCI_TRB_3_TYPE_GET(trb->dwTrb3) == 0) { - DPRINTF(("pci_xhci: ring %lx trb[%lx] EP %u is RESERVED?\r\n", + DPRINTF(("pci_xhci: ring %lx trb[%lx] EP %u is RESERVED?", ep_ctx->qwEpCtx2, devep->ep_ringaddr, epid)); return; } @@ -2001,11 +2003,11 @@ pci_xhci_dbregs_write(struct pci_xhci_softc *sc, uint64_t offset, offset = (offset - sc->dboff) / sizeof(uint32_t); - DPRINTF(("pci_xhci: doorbell write offset 0x%lx: 0x%lx\r\n", + DPRINTF(("pci_xhci: doorbell write offset 0x%lx: 0x%lx", offset, value)); if (XHCI_HALTED(sc)) { - DPRINTF(("pci_xhci: controller halted\r\n")); + DPRINTF(("pci_xhci: controller halted")); return; } @@ -2025,11 +2027,11 @@ pci_xhci_rtsregs_write(struct pci_xhci_softc *sc, uint64_t offset, offset -= sc->rtsoff; if (offset == 0) { - DPRINTF(("pci_xhci attempted write to MFINDEX\r\n")); + DPRINTF(("pci_xhci attempted write to MFINDEX")); return; } - DPRINTF(("pci_xhci: runtime regs write offset 0x%lx: 0x%lx\r\n", + DPRINTF(("pci_xhci: runtime regs write offset 0x%lx: 0x%lx", offset, value)); offset -= 0x20; /* start of intrreg */ @@ -2076,7 +2078,7 @@ pci_xhci_rtsregs_write(struct pci_xhci_softc *sc, uint64_t offset, rts->er_enq_idx = 0; rts->er_events_cnt = 0; - DPRINTF(("pci_xhci: wr erstba erst (%p) ptr 0x%lx, sz %u\r\n", + DPRINTF(("pci_xhci: wr erstba erst (%p) ptr 0x%lx, sz %u", rts->erstba_p, rts->erstba_p->qwEvrsTablePtr, rts->erstba_p->dwEvrsTableSize)); @@ -2117,14 +2119,14 @@ pci_xhci_rtsregs_write(struct pci_xhci_softc *sc, uint64_t offset, rts->erstba_p->dwEvrsTableSize - (erdp_i - rts->er_enq_idx); - DPRINTF(("pci_xhci: erdp 0x%lx, events cnt %u\r\n", + DPRINTF(("pci_xhci: erdp 0x%lx, events cnt %u", erdp, rts->er_events_cnt)); } break; default: - DPRINTF(("pci_xhci attempted write to RTS offset 0x%lx\r\n", + DPRINTF(("pci_xhci attempted write to RTS offset 0x%lx", offset)); break; } @@ -2142,7 +2144,7 @@ pci_xhci_portregs_read(struct pci_xhci_softc *sc, uint64_t offset) port = (offset - 0x3F0) / 0x10; if (port > XHCI_MAX_DEVS) { - DPRINTF(("pci_xhci: portregs_read port %d >= XHCI_MAX_DEVS\r\n", + DPRINTF(("pci_xhci: portregs_read port %d >= XHCI_MAX_DEVS", port)); /* return default value for unused port */ @@ -2154,7 +2156,7 @@ pci_xhci_portregs_read(struct pci_xhci_softc *sc, uint64_t offset) p = &sc->portregs[port].portsc; p += offset / sizeof(uint32_t); - DPRINTF(("pci_xhci: portregs read offset 0x%lx port %u -> 0x%x\r\n", + DPRINTF(("pci_xhci: portregs read offset 0x%lx port %u -> 0x%x", offset, port, *p)); return (*p); @@ -2167,7 +2169,7 @@ pci_xhci_hostop_write(struct pci_xhci_softc *sc, uint64_t offset, offset -= XHCI_CAPLEN; if (offset < 0x400) - DPRINTF(("pci_xhci: hostop write offset 0x%lx: 0x%lx\r\n", + DPRINTF(("pci_xhci: hostop write offset 0x%lx: 0x%lx", offset, value)); switch (offset) { @@ -2230,7 +2232,7 @@ pci_xhci_hostop_write(struct pci_xhci_softc *sc, uint64_t offset, (value << 32); sc->opregs.dcbaa_p = XHCI_GADDR(sc, sc->opregs.dcbaap & ~0x3FUL); - DPRINTF(("pci_xhci: opregs dcbaap = 0x%lx (vaddr 0x%lx)\r\n", + DPRINTF(("pci_xhci: opregs dcbaap = 0x%lx (vaddr 0x%lx)", sc->opregs.dcbaap, (uint64_t)sc->opregs.dcbaa_p)); break; @@ -2260,7 +2262,7 @@ pci_xhci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, pthread_mutex_lock(&sc->mtx); if (offset < XHCI_CAPLEN) /* read only registers */ - WPRINTF(("pci_xhci: write RO-CAPs offset %ld\r\n", offset)); + WPRINTF(("pci_xhci: write RO-CAPs offset %ld", offset)); else if (offset < sc->dboff) pci_xhci_hostop_write(sc, offset, value); else if (offset < sc->rtsoff) @@ -2268,7 +2270,7 @@ pci_xhci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, else if (offset < sc->regsend) pci_xhci_rtsregs_write(sc, offset, value); else - WPRINTF(("pci_xhci: write invalid offset %ld\r\n", offset)); + WPRINTF(("pci_xhci: write invalid offset %ld", offset)); pthread_mutex_unlock(&sc->mtx); } @@ -2316,7 +2318,7 @@ pci_xhci_hostcap_read(struct pci_xhci_softc *sc, uint64_t offset) break; } - DPRINTF(("pci_xhci: hostcap read offset 0x%lx -> 0x%lx\r\n", + DPRINTF(("pci_xhci: hostcap read offset 0x%lx -> 0x%lx", offset, value)); return (value); @@ -2376,7 +2378,7 @@ pci_xhci_hostop_read(struct pci_xhci_softc *sc, uint64_t offset) } if (offset < 0x400) - DPRINTF(("pci_xhci: hostop read offset 0x%lx -> 0x%lx\r\n", + DPRINTF(("pci_xhci: hostop read offset 0x%lx -> 0x%lx", offset, value)); return (value); @@ -2414,7 +2416,7 @@ pci_xhci_rtsregs_read(struct pci_xhci_softc *sc, uint64_t offset) value = *p; } - DPRINTF(("pci_xhci: rtsregs read offset 0x%lx -> 0x%x\r\n", + DPRINTF(("pci_xhci: rtsregs read offset 0x%lx -> 0x%x", offset, value)); return (value); @@ -2458,11 +2460,11 @@ pci_xhci_xecp_read(struct pci_xhci_softc *sc, uint64_t offset) case 28: break; default: - DPRINTF(("pci_xhci: xecp invalid offset 0x%lx\r\n", offset)); + DPRINTF(("pci_xhci: xecp invalid offset 0x%lx", offset)); break; } - DPRINTF(("pci_xhci: xecp read offset 0x%lx -> 0x%x\r\n", + DPRINTF(("pci_xhci: xecp read offset 0x%lx -> 0x%x", offset, value)); return (value); @@ -2493,7 +2495,7 @@ pci_xhci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, value = pci_xhci_xecp_read(sc, offset); else { value = 0; - WPRINTF(("pci_xhci: read invalid offset %ld\r\n", offset)); + WPRINTF(("pci_xhci: read invalid offset %ld", offset)); } pthread_mutex_unlock(&sc->mtx); @@ -2523,7 +2525,7 @@ pci_xhci_reset_port(struct pci_xhci_softc *sc, int portn, int warm) assert(portn <= XHCI_MAX_DEVS); - DPRINTF(("xhci reset port %d\r\n", portn)); + DPRINTF(("xhci reset port %d", portn)); port = XHCI_PORTREG_PTR(sc, portn); dev = XHCI_DEVINST_PTR(sc, portn); @@ -2545,7 +2547,7 @@ pci_xhci_reset_port(struct pci_xhci_softc *sc, int portn, int warm) error = pci_xhci_insert_event(sc, &evtrb, 1); if (error != XHCI_TRB_ERROR_SUCCESS) DPRINTF(("xhci reset port insert event " - "failed\r\n")); + "failed")); } } } @@ -2571,10 +2573,10 @@ pci_xhci_init_port(struct pci_xhci_softc *sc, int portn) XHCI_PS_SPEED_SET(dev->dev_ue->ue_usbspeed); } - DPRINTF(("Init port %d 0x%x\n", portn, port->portsc)); + DPRINTF(("Init port %d 0x%x", portn, port->portsc)); } else { port->portsc = XHCI_PS_PLS_SET(UPS_PORT_LS_RX_DET) | XHCI_PS_PP; - DPRINTF(("Init empty port %d 0x%x\n", portn, port->portsc)); + DPRINTF(("Init empty port %d 0x%x", portn, port->portsc)); } } @@ -2629,12 +2631,12 @@ pci_xhci_dev_intr(struct usb_hci *hci, int epctx) dev_ctx = dev->dev_ctx; ep_ctx = &dev_ctx->ctx_ep[epid]; if ((ep_ctx->dwEpCtx0 & 0x7) == XHCI_ST_EPCTX_DISABLED) { - DPRINTF(("xhci device interrupt on disabled endpoint %d\r\n", + DPRINTF(("xhci device interrupt on disabled endpoint %d", epid)); return (0); } - DPRINTF(("xhci device interrupt on endpoint %d\r\n", epid)); + DPRINTF(("xhci device interrupt on endpoint %d", epid)); pci_xhci_device_doorbell(sc, hci->hci_port, epid, 0); @@ -2646,7 +2648,7 @@ static int pci_xhci_dev_event(struct usb_hci *hci, enum hci_usbev evid, void *param) { - DPRINTF(("xhci device event port %d\r\n", hci->hci_port)); + DPRINTF(("xhci device event port %d", hci->hci_port)); return (0); } @@ -2656,7 +2658,7 @@ static void pci_xhci_device_usage(char *opt) { - fprintf(stderr, "Invalid USB emulation \"%s\"\r\n", opt); + EPRINTLN("Invalid USB emulation \"%s\"", opt); } static int @@ -2690,7 +2692,7 @@ pci_xhci_parse_opts(struct pci_xhci_softc *sc, char *opts) if (usb2_port == ((sc->usb2_port_start-1) + XHCI_MAX_DEVS/2) || usb3_port == ((sc->usb3_port_start-1) + XHCI_MAX_DEVS/2)) { WPRINTF(("pci_xhci max number of USB 2 or 3 " - "devices reached, max %d\r\n", XHCI_MAX_DEVS/2)); + "devices reached, max %d", XHCI_MAX_DEVS/2)); usb2_port = usb3_port = -1; goto done; } @@ -2704,12 +2706,12 @@ pci_xhci_parse_opts(struct pci_xhci_softc *sc, char *opts) ue = usb_emu_finddev(xopts); if (ue == NULL) { pci_xhci_device_usage(xopts); - DPRINTF(("pci_xhci device not found %s\r\n", xopts)); + DPRINTF(("pci_xhci device not found %s", xopts)); usb2_port = usb3_port = -1; goto done; } - DPRINTF(("pci_xhci adding device %s, opts \"%s\"\r\n", + DPRINTF(("pci_xhci adding device %s, opts \"%s\"", xopts, config)); dev = calloc(1, sizeof(struct pci_xhci_dev_emu)); @@ -2758,7 +2760,7 @@ portsfinal: pci_xhci_init_port(sc, i); } } else { - WPRINTF(("pci_xhci no USB devices configured\r\n")); + WPRINTF(("pci_xhci no USB devices configured")); sc->ndevices = 1; } @@ -2784,7 +2786,7 @@ pci_xhci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) int error; if (xhci_in_use) { - WPRINTF(("pci_xhci controller already defined\r\n")); + WPRINTF(("pci_xhci controller already defined")); return (-1); } xhci_in_use = 1; @@ -2828,7 +2830,7 @@ pci_xhci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) if (sc->rtsoff & 0x1F) sc->rtsoff = (sc->rtsoff + 0x1F) & ~0x1F; - DPRINTF(("pci_xhci dboff: 0x%x, rtsoff: 0x%x\r\n", sc->dboff, + DPRINTF(("pci_xhci dboff: 0x%x, rtsoff: 0x%x", sc->dboff, sc->rtsoff)); sc->opregs.usbsts = XHCI_STS_HCH; @@ -2855,7 +2857,7 @@ pci_xhci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) /* regsend + xecp registers */ pci_emul_alloc_bar(pi, 0, PCIBAR_MEM32, sc->regsend + 4*32); - DPRINTF(("pci_xhci pci_emu_alloc: %d\r\n", sc->regsend + 4*32)); + DPRINTF(("pci_xhci pci_emu_alloc: %d", sc->regsend + 4*32)); pci_lintr_request(pi); diff --git a/usr/src/cmd/bhyve/pm.c b/usr/src/cmd/bhyve/pm.c index be188b79f2..c57da7fd74 100644 --- a/usr/src/cmd/bhyve/pm.c +++ b/usr/src/cmd/bhyve/pm.c @@ -61,6 +61,10 @@ static sig_t old_power_handler; struct vmctx *pwr_ctx; #endif +static unsigned gpe0_active; +static unsigned gpe0_enabled; +static const unsigned gpe0_valid = (1u << GPE_VMGENC); + /* * Reset Control register at I/O port 0xcf9. Bit 2 forces a system * reset when it transitions from 0 to 1. Bit 1 selects the type of @@ -156,6 +160,9 @@ sci_update(struct vmctx *ctx) need_sci = 1; if ((pm1_enable & PM1_RTC_EN) && (pm1_status & PM1_RTC_STS)) need_sci = 1; + if ((gpe0_enabled & gpe0_active) != 0) + need_sci = 1; + if (need_sci) sci_assert(ctx); else @@ -306,6 +313,64 @@ INOUT_PORT(pm1_control, PM1A_CNT_ADDR, IOPORT_F_INOUT, pm1_control_handler); SYSRES_IO(PM1A_EVT_ADDR, 8); #endif +void +acpi_raise_gpe(struct vmctx *ctx, unsigned bit) +{ + unsigned mask; + + assert(bit < (IO_GPE0_LEN * (8 / 2))); + mask = (1u << bit); + assert((mask & ~gpe0_valid) == 0); + + pthread_mutex_lock(&pm_lock); + gpe0_active |= mask; + sci_update(ctx); + pthread_mutex_unlock(&pm_lock); +} + +static int +gpe0_sts(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + /* + * ACPI 6.2 specifies the GPE register blocks are accessed + * byte-at-a-time. + */ + if (bytes != 1) + return (-1); + + pthread_mutex_lock(&pm_lock); + if (in) + *eax = gpe0_active; + else { + /* W1C */ + gpe0_active &= ~(*eax & gpe0_valid); + sci_update(ctx); + } + pthread_mutex_unlock(&pm_lock); + return (0); +} +INOUT_PORT(gpe0_sts, IO_GPE0_STS, IOPORT_F_INOUT, gpe0_sts); + +static int +gpe0_en(struct vmctx *ctx, int vcpu, int in, int port, int bytes, + uint32_t *eax, void *arg) +{ + if (bytes != 1) + return (-1); + + pthread_mutex_lock(&pm_lock); + if (in) + *eax = gpe0_enabled; + else { + gpe0_enabled = (*eax & gpe0_valid); + sci_update(ctx); + } + pthread_mutex_unlock(&pm_lock); + return (0); +} +INOUT_PORT(gpe0_en, IO_GPE0_EN, IOPORT_F_INOUT, gpe0_en); + /* * ACPI SMI Command Register * diff --git a/usr/src/cmd/bhyve/ps2kbd.c b/usr/src/cmd/bhyve/ps2kbd.c index 5453a26949..3e6a1b67ca 100644 --- a/usr/src/cmd/bhyve/ps2kbd.c +++ b/usr/src/cmd/bhyve/ps2kbd.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include <pthread_np.h> #include "atkbdc.h" +#include "debug.h" #include "console.h" /* keyboard device commands */ @@ -253,8 +254,8 @@ ps2kbd_write(struct ps2kbd_softc *sc, uint8_t val) fifo_put(sc, PS2KC_ACK); break; default: - fprintf(stderr, "Unhandled ps2 keyboard current " - "command byte 0x%02x\n", val); + EPRINTLN("Unhandled ps2 keyboard current " + "command byte 0x%02x", val); break; } sc->curcmd = 0; @@ -298,8 +299,8 @@ ps2kbd_write(struct ps2kbd_softc *sc, uint8_t val) fifo_put(sc, PS2KC_ACK); break; default: - fprintf(stderr, "Unhandled ps2 keyboard command " - "0x%02x\n", val); + EPRINTLN("Unhandled ps2 keyboard command " + "0x%02x", val); break; } } @@ -336,7 +337,7 @@ ps2kbd_keysym_queue(struct ps2kbd_softc *sc, } if (!found) { - fprintf(stderr, "Unhandled ps2 keyboard keysym 0x%x\n", keysym); + EPRINTLN("Unhandled ps2 keyboard keysym 0x%x", keysym); return; } diff --git a/usr/src/cmd/bhyve/ps2mouse.c b/usr/src/cmd/bhyve/ps2mouse.c index b2e08262b1..f42d2e7260 100644 --- a/usr/src/cmd/bhyve/ps2mouse.c +++ b/usr/src/cmd/bhyve/ps2mouse.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include <pthread_np.h> #include "atkbdc.h" +#include "debug.h" #include "console.h" /* mouse device commands */ @@ -289,8 +290,8 @@ ps2mouse_write(struct ps2mouse_softc *sc, uint8_t val, int insert) fifo_put(sc, PS2MC_ACK); break; default: - fprintf(stderr, "Unhandled ps2 mouse current " - "command byte 0x%02x\n", val); + EPRINTLN("Unhandled ps2 mouse current " + "command byte 0x%02x", val); break; } sc->curcmd = 0; @@ -358,8 +359,8 @@ ps2mouse_write(struct ps2mouse_softc *sc, uint8_t val, int insert) break; default: fifo_put(sc, PS2MC_ACK); - fprintf(stderr, "Unhandled ps2 mouse command " - "0x%02x\n", val); + EPRINTLN("Unhandled ps2 mouse command " + "0x%02x", val); break; } } diff --git a/usr/src/cmd/bhyve/rfb.c b/usr/src/cmd/bhyve/rfb.c index 942c294775..a3d80197a8 100644 --- a/usr/src/cmd/bhyve/rfb.c +++ b/usr/src/cmd/bhyve/rfb.c @@ -68,6 +68,7 @@ __FBSDID("$FreeBSD$"); #endif #include "bhyvegc.h" +#include "debug.h" #include "console.h" #include "rfb.h" #include "sockstream.h" @@ -77,9 +78,10 @@ __FBSDID("$FreeBSD$"); #endif static int rfb_debug = 0; -#define DPRINTF(params) if (rfb_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (rfb_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params +#define VERSION_LENGTH 12 #define AUTH_LENGTH 16 #define PASSWD_LENGTH 8 @@ -361,7 +363,7 @@ rfb_send_rect(struct rfb_softc *rc, int cfd, struct bhyvegc_image *gc, /* Compress with zlib */ err = deflate(&rc->zstream, Z_SYNC_FLUSH); if (err != Z_OK) { - WPRINTF(("zlib[rect] deflate err: %d\n", err)); + WPRINTF(("zlib[rect] deflate err: %d", err)); rc->enc_zlib_ok = false; deflateEnd(&rc->zstream); goto doraw; @@ -445,7 +447,7 @@ rfb_send_all(struct rfb_softc *rc, int cfd, struct bhyvegc_image *gc) /* Compress with zlib */ err = deflate(&rc->zstream, Z_SYNC_FLUSH); if (err != Z_OK) { - WPRINTF(("zlib deflate err: %d\n", err)); + WPRINTF(("zlib deflate err: %d", err)); rc->enc_zlib_ok = false; deflateEnd(&rc->zstream); goto doraw; @@ -773,7 +775,7 @@ rfb_handle(struct rfb_softc *rc, int cfd) stream_write(cfd, vbuf, strlen(vbuf)); /* 1b. Read client version */ - len = read(cfd, buf, sizeof(buf)); + len = stream_read(cfd, buf, VERSION_LENGTH); /* 2a. Send security type */ buf[0] = 1; @@ -889,7 +891,7 @@ rfb_handle(struct rfb_softc *rc, int cfd) for (;;) { len = read(cfd, buf, 1); if (len <= 0) { - DPRINTF(("rfb client exiting\r\n")); + DPRINTF(("rfb client exiting")); break; } @@ -913,7 +915,7 @@ rfb_handle(struct rfb_softc *rc, int cfd) rfb_recv_cuttext_msg(rc, cfd); break; default: - WPRINTF(("rfb unknown cli-code %d!\n", buf[0] & 0xff)); + WPRINTF(("rfb unknown cli-code %d!", buf[0] & 0xff)); goto done; } } @@ -1014,7 +1016,7 @@ rfb_init(char *hostname, int port, int wait, char *password) hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV | AI_PASSIVE; if ((e = getaddrinfo(hostname, servname, &hints, &ai)) != 0) { - fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(e)); + EPRINTLN("getaddrinfo: %s", gai_strerror(e)); goto error; } @@ -1054,10 +1056,11 @@ rfb_init(char *hostname, int port, int wait, char *password) pthread_set_name_np(rc->tid, "rfb"); if (wait) { - DPRINTF(("Waiting for rfb client...\n")); + DPRINTF(("Waiting for rfb client...")); pthread_mutex_lock(&rc->mtx); pthread_cond_wait(&rc->cond, &rc->mtx); pthread_mutex_unlock(&rc->mtx); + DPRINTF(("rfb client connected")); } freeaddrinfo(ai); diff --git a/usr/src/cmd/bhyve/smbiostbl.c b/usr/src/cmd/bhyve/smbiostbl.c index da227f813a..8af8a85755 100644 --- a/usr/src/cmd/bhyve/smbiostbl.c +++ b/usr/src/cmd/bhyve/smbiostbl.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include <vmmapi.h> #include "bhyverun.h" +#include "debug.h" #include "smbiostbl.h" #define MB (1024*1024) @@ -255,7 +256,7 @@ struct smbios_table_type17 { uint16_t errhand; /* handle of mem error data */ uint16_t twidth; /* total width in bits */ uint16_t dwidth; /* data width in bits */ - uint16_t size; /* size in bytes */ + uint16_t size; /* size in kb or mb */ uint8_t form; /* form factor */ uint8_t set; /* set */ uint8_t dloc; /* device locator string */ @@ -268,7 +269,7 @@ struct smbios_table_type17 { uint8_t asset; /* asset tag string */ uint8_t part; /* part number string */ uint8_t attributes; /* attributes */ - uint32_t xsize; /* extended size in mbs */ + uint32_t xsize; /* extended size in mb */ uint16_t curspeed; /* current speed in mhz */ uint16_t minvoltage; /* minimum voltage */ uint16_t maxvoltage; /* maximum voltage */ @@ -444,7 +445,7 @@ struct smbios_table_type17 smbios_type17_template = { -1, /* handle of memory error data */ 64, /* total width in bits including ecc */ 64, /* data width in bits */ - 0x7fff, /* size in bytes (0x7fff=use extended)*/ + 0, /* size in kb or mb (0x7fff=use extended)*/ SMBIOS_MDFF_UNKNOWN, 0, /* set (0x00=none, 0xff=unknown) */ 1, /* device locator string */ @@ -695,20 +696,39 @@ smbios_type17_initializer(struct smbios_structure *template_entry, uint16_t *n, uint16_t *size) { struct smbios_table_type17 *type17; + uint64_t memsize, size_KB, size_MB; smbios_generic_initializer(template_entry, template_strings, curaddr, endaddr, n, size); type17 = (struct smbios_table_type17 *)curaddr; type17->arrayhand = type16_handle; - type17->xsize = guest_lomem; - if (guest_himem > 0) { - curaddr = *endaddr; - smbios_generic_initializer(template_entry, template_strings, - curaddr, endaddr, n, size); - type17 = (struct smbios_table_type17 *)curaddr; - type17->arrayhand = type16_handle; - type17->xsize = guest_himem; + memsize = guest_lomem + guest_himem; + size_KB = memsize / 1024; + size_MB = memsize / MB; + + /* A single Type 17 entry can't represent more than ~2PB RAM */ + if (size_MB > 0x7FFFFFFF) { + printf("Warning: guest memory too big for SMBIOS Type 17 table: " + "%luMB greater than max supported 2147483647MB\n", size_MB); + + size_MB = 0x7FFFFFFF; + } + + /* See SMBIOS 2.7.0 section 7.18 - Memory Device (Type 17) */ + if (size_KB <= 0x7FFF) { + /* Can represent up to 32767KB with the top bit set */ + type17->size = size_KB | (1 << 15); + } else if (size_MB < 0x7FFF) { + /* Can represent up to 32766MB with the top bit unset */ + type17->size = size_MB & 0x7FFF; + } else { + type17->size = 0x7FFF; + /* + * Can represent up to 2147483647MB (~2PB) + * The top bit is reserved + */ + type17->xsize = size_MB & 0x7FFFFFFF; } return (0); @@ -755,7 +775,7 @@ smbios_ep_initializer(struct smbios_entry_point *smbios_ep, uint32_t staddr) memcpy(smbios_ep->ianchor, SMBIOS_ENTRY_IANCHOR, SMBIOS_ENTRY_IANCHORLEN); smbios_ep->staddr = staddr; - smbios_ep->bcdrev = 0x24; + smbios_ep->bcdrev = (smbios_ep->major & 0xf) << 4 | (smbios_ep->minor & 0xf); } static void @@ -797,7 +817,7 @@ smbios_build(struct vmctx *ctx) startaddr = paddr_guest2host(ctx, SMBIOS_BASE, SMBIOS_MAX_LENGTH); if (startaddr == NULL) { - fprintf(stderr, "smbios table requires mapped mem\n"); + EPRINTLN("smbios table requires mapped mem"); return (ENOMEM); } diff --git a/usr/src/cmd/bhyve/task_switch.c b/usr/src/cmd/bhyve/task_switch.c index b5950a19d8..f1b564d560 100644 --- a/usr/src/cmd/bhyve/task_switch.c +++ b/usr/src/cmd/bhyve/task_switch.c @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #include <vmmapi.h> #include "bhyverun.h" +#include "debug.h" /* * Using 'struct i386tss' is tempting but causes myriad sign extension @@ -843,7 +844,7 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) } if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) { - fprintf(stderr, "Task switch to 16-bit TSS not supported\n"); + EPRINTLN("Task switch to 16-bit TSS not supported"); return (VMEXIT_ABORT); } diff --git a/usr/src/cmd/bhyve/test/tst/mevent/mevent.c b/usr/src/cmd/bhyve/test/tst/mevent/mevent.c index 17b6546847..971cf4aa77 100644 --- a/usr/src/cmd/bhyve/test/tst/mevent/mevent.c +++ b/usr/src/cmd/bhyve/test/tst/mevent/mevent.c @@ -42,7 +42,7 @@ test_mevent_count_lists(int *ret_global, int *ret_change, int *ret_del_pending) LIST_FOREACH(mevp, &change_head, me_list) { change++; - if (mevp->me_state == MEV_DEL_PENDING) { + if (mevp->me_state == EV_DELETE) { del_pending++; } VERBOSE(("on change: type %d fd %d state %d", mevp->me_type, diff --git a/usr/src/cmd/bhyve/uart_emul.c b/usr/src/cmd/bhyve/uart_emul.c index fc448152ad..077380a422 100644 --- a/usr/src/cmd/bhyve/uart_emul.c +++ b/usr/src/cmd/bhyve/uart_emul.c @@ -71,6 +71,7 @@ __FBSDID("$FreeBSD$"); #include "mevent.h" #include "uart_emul.h" +#include "debug.h" #define COM1_BASE 0x3F8 #define COM1_IRQ 4 @@ -178,6 +179,7 @@ ttyopen(struct ttyfd *tf) tio_stdio_orig = orig; atexit(ttyclose); } + raw_stdio = 1; } static int diff --git a/usr/src/cmd/bhyve/usb_mouse.c b/usr/src/cmd/bhyve/usb_mouse.c index 921fce5db9..8af86fcdc7 100644 --- a/usr/src/cmd/bhyve/usb_mouse.c +++ b/usr/src/cmd/bhyve/usb_mouse.c @@ -42,10 +42,11 @@ __FBSDID("$FreeBSD$"); #include "usb_emul.h" #include "console.h" #include "bhyvegc.h" +#include "debug.h" static int umouse_debug = 0; -#define DPRINTF(params) if (umouse_debug) printf params -#define WPRINTF(params) printf params +#define DPRINTF(params) if (umouse_debug) PRINTLN params +#define WPRINTF(params) PRINTLN params /* USB endpoint context (1-15) for reporting mouse data events*/ #define UMOUSE_INTR_ENDPT 1 @@ -350,7 +351,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) eshort = 0; if (!xfer->ureq) { - DPRINTF(("umouse_request: port %d\r\n", sc->hci->hci_port)); + DPRINTF(("umouse_request: port %d", sc->hci->hci_port)); goto done; } @@ -359,13 +360,13 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) len = UGETW(xfer->ureq->wLength); DPRINTF(("umouse_request: port %d, type 0x%x, req 0x%x, val 0x%x, " - "idx 0x%x, len %u\r\n", + "idx 0x%x, len %u", sc->hci->hci_port, xfer->ureq->bmRequestType, xfer->ureq->bRequest, value, index, len)); switch (UREQ(xfer->ureq->bRequest, xfer->ureq->bmRequestType)) { case UREQ(UR_GET_CONFIG, UT_READ_DEVICE): - DPRINTF(("umouse: (UR_GET_CONFIG, UT_READ_DEVICE)\r\n")); + DPRINTF(("umouse: (UR_GET_CONFIG, UT_READ_DEVICE)")); if (!data) break; @@ -376,7 +377,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) break; case UREQ(UR_GET_DESCRIPTOR, UT_READ_DEVICE): - DPRINTF(("umouse: (UR_GET_DESCRIPTOR, UT_READ_DEVICE) val %x\r\n", + DPRINTF(("umouse: (UR_GET_DESCRIPTOR, UT_READ_DEVICE) val %x", value >> 8)); if (!data) break; @@ -384,7 +385,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) switch (value >> 8) { case UDESC_DEVICE: DPRINTF(("umouse: (->UDESC_DEVICE) len %u ?= " - "sizeof(umouse_dev_desc) %lu\r\n", + "sizeof(umouse_dev_desc) %lu", len, sizeof(umouse_dev_desc))); if ((value & 0xFF) != 0) { err = USB_ERR_IOERROR; @@ -400,7 +401,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) break; case UDESC_CONFIG: - DPRINTF(("umouse: (->UDESC_CONFIG)\r\n")); + DPRINTF(("umouse: (->UDESC_CONFIG)")); if ((value & 0xFF) != 0) { err = USB_ERR_IOERROR; goto done; @@ -416,7 +417,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) break; case UDESC_STRING: - DPRINTF(("umouse: (->UDESC_STRING)\r\n")); + DPRINTF(("umouse: (->UDESC_STRING)")); str = NULL; if ((value & 0xFF) < UMSTR_MAX) str = umouse_desc_strings[value & 0xFF]; @@ -459,7 +460,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) break; case UDESC_BOS: - DPRINTF(("umouse: USB3 BOS\r\n")); + DPRINTF(("umouse: USB3 BOS")); if (len > sizeof(umouse_bosd)) { data->blen = len - sizeof(umouse_bosd); len = sizeof(umouse_bosd); @@ -470,7 +471,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) break; default: - DPRINTF(("umouse: unknown(%d)->ERROR\r\n", value >> 8)); + DPRINTF(("umouse: unknown(%d)->ERROR", value >> 8)); err = USB_ERR_IOERROR; goto done; } @@ -479,7 +480,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) case UREQ(UR_GET_DESCRIPTOR, UT_READ_INTERFACE): DPRINTF(("umouse: (UR_GET_DESCRIPTOR, UT_READ_INTERFACE) " - "0x%x\r\n", (value >> 8))); + "0x%x", (value >> 8))); if (!data) break; @@ -494,7 +495,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) data->bdone += len; break; default: - DPRINTF(("umouse: IO ERROR\r\n")); + DPRINTF(("umouse: IO ERROR")); err = USB_ERR_IOERROR; goto done; } @@ -502,9 +503,9 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) break; case UREQ(UR_GET_INTERFACE, UT_READ_INTERFACE): - DPRINTF(("umouse: (UR_GET_INTERFACE, UT_READ_INTERFACE)\r\n")); + DPRINTF(("umouse: (UR_GET_INTERFACE, UT_READ_INTERFACE)")); if (index != 0) { - DPRINTF(("umouse get_interface, invalid index %d\r\n", + DPRINTF(("umouse get_interface, invalid index %d", index)); err = USB_ERR_IOERROR; goto done; @@ -522,7 +523,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) break; case UREQ(UR_GET_STATUS, UT_READ_DEVICE): - DPRINTF(("umouse: (UR_GET_STATUS, UT_READ_DEVICE)\r\n")); + DPRINTF(("umouse: (UR_GET_STATUS, UT_READ_DEVICE)")); if (data != NULL && len > 1) { if (sc->hid.feature == UF_DEVICE_REMOTE_WAKEUP) USETW(udata, UDS_REMOTE_WAKEUP); @@ -537,7 +538,7 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) case UREQ(UR_GET_STATUS, UT_READ_INTERFACE): case UREQ(UR_GET_STATUS, UT_READ_ENDPOINT): - DPRINTF(("umouse: (UR_GET_STATUS, UT_READ_INTERFACE)\r\n")); + DPRINTF(("umouse: (UR_GET_STATUS, UT_READ_INTERFACE)")); if (data != NULL && len > 1) { USETW(udata, 0); data->blen = len - 2; @@ -548,26 +549,26 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) case UREQ(UR_SET_ADDRESS, UT_WRITE_DEVICE): /* XXX Controller should've handled this */ - DPRINTF(("umouse set address %u\r\n", value)); + DPRINTF(("umouse set address %u", value)); break; case UREQ(UR_SET_CONFIG, UT_WRITE_DEVICE): - DPRINTF(("umouse set config %u\r\n", value)); + DPRINTF(("umouse set config %u", value)); break; case UREQ(UR_SET_DESCRIPTOR, UT_WRITE_DEVICE): - DPRINTF(("umouse set descriptor %u\r\n", value)); + DPRINTF(("umouse set descriptor %u", value)); break; case UREQ(UR_CLEAR_FEATURE, UT_WRITE_DEVICE): - DPRINTF(("umouse: (UR_SET_FEATURE, UT_WRITE_DEVICE) %x\r\n", value)); + DPRINTF(("umouse: (UR_SET_FEATURE, UT_WRITE_DEVICE) %x", value)); if (value == UF_DEVICE_REMOTE_WAKEUP) sc->hid.feature = 0; break; case UREQ(UR_SET_FEATURE, UT_WRITE_DEVICE): - DPRINTF(("umouse: (UR_SET_FEATURE, UT_WRITE_DEVICE) %x\r\n", value)); + DPRINTF(("umouse: (UR_SET_FEATURE, UT_WRITE_DEVICE) %x", value)); if (value == UF_DEVICE_REMOTE_WAKEUP) sc->hid.feature = UF_DEVICE_REMOTE_WAKEUP; break; @@ -576,31 +577,31 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) case UREQ(UR_CLEAR_FEATURE, UT_WRITE_ENDPOINT): case UREQ(UR_SET_FEATURE, UT_WRITE_INTERFACE): case UREQ(UR_SET_FEATURE, UT_WRITE_ENDPOINT): - DPRINTF(("umouse: (UR_CLEAR_FEATURE, UT_WRITE_INTERFACE)\r\n")); + DPRINTF(("umouse: (UR_CLEAR_FEATURE, UT_WRITE_INTERFACE)")); err = USB_ERR_IOERROR; goto done; case UREQ(UR_SET_INTERFACE, UT_WRITE_INTERFACE): - DPRINTF(("umouse set interface %u\r\n", value)); + DPRINTF(("umouse set interface %u", value)); break; case UREQ(UR_ISOCH_DELAY, UT_WRITE_DEVICE): - DPRINTF(("umouse set isoch delay %u\r\n", value)); + DPRINTF(("umouse set isoch delay %u", value)); break; case UREQ(UR_SET_SEL, 0): - DPRINTF(("umouse set sel\r\n")); + DPRINTF(("umouse set sel")); break; case UREQ(UR_SYNCH_FRAME, UT_WRITE_ENDPOINT): - DPRINTF(("umouse synch frame\r\n")); + DPRINTF(("umouse synch frame")); break; /* HID device requests */ case UREQ(UMOUSE_GET_REPORT, UT_READ_CLASS_INTERFACE): DPRINTF(("umouse: (UMOUSE_GET_REPORT, UT_READ_CLASS_INTERFACE) " - "0x%x\r\n", (value >> 8))); + "0x%x", (value >> 8))); if (!data) break; @@ -641,23 +642,23 @@ umouse_request(void *scarg, struct usb_data_xfer *xfer) break; case UREQ(UMOUSE_SET_REPORT, UT_WRITE_CLASS_INTERFACE): - DPRINTF(("umouse: (UMOUSE_SET_REPORT, UT_WRITE_CLASS_INTERFACE) ignored\r\n")); + DPRINTF(("umouse: (UMOUSE_SET_REPORT, UT_WRITE_CLASS_INTERFACE) ignored")); break; case UREQ(UMOUSE_SET_IDLE, UT_WRITE_CLASS_INTERFACE): sc->hid.idle = UGETW(xfer->ureq->wValue) >> 8; - DPRINTF(("umouse: (UMOUSE_SET_IDLE, UT_WRITE_CLASS_INTERFACE) %x\r\n", + DPRINTF(("umouse: (UMOUSE_SET_IDLE, UT_WRITE_CLASS_INTERFACE) %x", sc->hid.idle)); break; case UREQ(UMOUSE_SET_PROTOCOL, UT_WRITE_CLASS_INTERFACE): sc->hid.protocol = UGETW(xfer->ureq->wValue) >> 8; - DPRINTF(("umouse: (UR_CLEAR_FEATURE, UT_WRITE_CLASS_INTERFACE) %x\r\n", + DPRINTF(("umouse: (UR_CLEAR_FEATURE, UT_WRITE_CLASS_INTERFACE) %x", sc->hid.protocol)); break; default: - DPRINTF(("**** umouse request unhandled\r\n")); + DPRINTF(("**** umouse request unhandled")); err = USB_ERR_IOERROR; break; } @@ -676,7 +677,7 @@ done: #endif - DPRINTF(("umouse request error code %d (0=ok), blen %u txlen %u\r\n", + DPRINTF(("umouse request error code %d (0=ok), blen %u txlen %u", err, (data ? data->blen : 0), (data ? data->bdone : 0))); return (err); @@ -692,7 +693,7 @@ umouse_data_handler(void *scarg, struct usb_data_xfer *xfer, int dir, int len, i, idx; int err; - DPRINTF(("umouse handle data - DIR=%s|EP=%d, blen %d\r\n", + DPRINTF(("umouse handle data - DIR=%s|EP=%d, blen %d", dir ? "IN" : "OUT", epctx, xfer->data[0].blen)); @@ -720,7 +721,7 @@ umouse_data_handler(void *scarg, struct usb_data_xfer *xfer, int dir, len = data->blen; if (udata == NULL) { - DPRINTF(("umouse no buffer provided for input\r\n")); + DPRINTF(("umouse no buffer provided for input")); err = USB_ERR_NOMEM; goto done; } diff --git a/usr/src/cmd/bhyve/virtio.c b/usr/src/cmd/bhyve/virtio.c index 2d78b016c6..d899a57795 100644 --- a/usr/src/cmd/bhyve/virtio.c +++ b/usr/src/cmd/bhyve/virtio.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include <pthread_np.h> #include "bhyverun.h" +#include "debug.h" #include "pci_emul.h" #include "virtio.h" @@ -102,6 +103,7 @@ vi_reset_dev(struct virtio_softc *vs) for (vq = vs->vs_queues, i = 0; i < nvq; vq++, i++) { vq->vq_flags = 0; vq->vq_last_avail = 0; + vq->vq_next_used = 0; vq->vq_save_used = 0; vq->vq_pfn = 0; vq->vq_msix_idx = VIRTIO_MSI_NO_VECTOR; @@ -199,6 +201,7 @@ vi_vq_init(struct virtio_softc *vs, uint32_t pfn) /* Mark queue as allocated, and start at 0 when we use it. */ vq->vq_flags = VQ_ALLOC; vq->vq_last_avail = 0; + vq->vq_next_used = 0; vq->vq_save_used = 0; } @@ -279,7 +282,7 @@ vq_getchain(struct vqueue_info *vq, uint16_t *pidx, * the guest has written are valid (including all their * vd_next fields and vd_flags). * - * Compute (last_avail - va_idx) in integers mod 2**16. This is + * Compute (va_idx - last_avail) in integers mod 2**16. This is * the number of descriptors the device has made available * since the last time we updated vq->vq_last_avail. * @@ -292,8 +295,8 @@ vq_getchain(struct vqueue_info *vq, uint16_t *pidx, return (0); if (ndesc > vq->vq_qsize) { /* XXX need better way to diagnose issues */ - fprintf(stderr, - "%s: ndesc (%u) out of range, driver confused?\r\n", + EPRINTLN( + "%s: ndesc (%u) out of range, driver confused?", name, (u_int)ndesc); return (-1); } @@ -311,9 +314,9 @@ vq_getchain(struct vqueue_info *vq, uint16_t *pidx, vq->vq_last_avail++; for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->vd_next) { if (next >= vq->vq_qsize) { - fprintf(stderr, + EPRINTLN( "%s: descriptor index %u out of range, " - "driver confused?\r\n", + "driver confused?", name, next); return (-1); } @@ -323,17 +326,17 @@ vq_getchain(struct vqueue_info *vq, uint16_t *pidx, i++; } else if ((vs->vs_vc->vc_hv_caps & VIRTIO_RING_F_INDIRECT_DESC) == 0) { - fprintf(stderr, + EPRINTLN( "%s: descriptor has forbidden INDIRECT flag, " - "driver confused?\r\n", + "driver confused?", name); return (-1); } else { n_indir = vdir->vd_len / 16; if ((vdir->vd_len & 0xf) || n_indir == 0) { - fprintf(stderr, + EPRINTLN( "%s: invalid indir len 0x%x, " - "driver confused?\r\n", + "driver confused?", name, (u_int)vdir->vd_len); return (-1); } @@ -350,9 +353,9 @@ vq_getchain(struct vqueue_info *vq, uint16_t *pidx, for (;;) { vp = &vindir[next]; if (vp->vd_flags & VRING_DESC_F_INDIRECT) { - fprintf(stderr, + EPRINTLN( "%s: indirect desc has INDIR flag," - " driver confused?\r\n", + " driver confused?", name); return (-1); } @@ -363,9 +366,9 @@ vq_getchain(struct vqueue_info *vq, uint16_t *pidx, break; next = vp->vd_next; if (next >= n_indir) { - fprintf(stderr, + EPRINTLN( "%s: invalid next %u > %u, " - "driver confused?\r\n", + "driver confused?", name, (u_int)next, n_indir); return (-1); } @@ -375,45 +378,37 @@ vq_getchain(struct vqueue_info *vq, uint16_t *pidx, return (i); } loopy: - fprintf(stderr, - "%s: descriptor loop? count > %d - driver confused?\r\n", + EPRINTLN( + "%s: descriptor loop? count > %d - driver confused?", name, i); return (-1); } /* - * Return the currently-first request chain back to the available queue. + * Return the first n_chain request chains back to the available queue. * - * (This chain is the one you handled when you called vq_getchain() + * (These chains are the ones you handled when you called vq_getchain() * and used its positive return value.) */ void -vq_retchain(struct vqueue_info *vq) +vq_retchains(struct vqueue_info *vq, uint16_t n_chains) { - vq->vq_last_avail--; + vq->vq_last_avail -= n_chains; } -/* - * Return specified request chain to the guest, setting its I/O length - * to the provided value. - * - * (This chain is the one you handled when you called vq_getchain() - * and used its positive return value.) - */ void -vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen) +vq_relchain_prepare(struct vqueue_info *vq, uint16_t idx, uint32_t iolen) { - uint16_t uidx, mask; volatile struct vring_used *vuh; volatile struct virtio_used *vue; + uint16_t mask; /* * Notes: * - mask is N-1 where N is a power of 2 so computes x % N * - vuh points to the "used" data shared with guest * - vue points to the "used" ring entry we want to update - * - head is the same value we compute in vq_iovecs(). * * (I apologize for the two fields named vu_idx; the * virtio spec calls the one that vue points to, "id"...) @@ -421,18 +416,35 @@ vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen) mask = vq->vq_qsize - 1; vuh = vq->vq_used; - uidx = vuh->vu_idx; - vue = &vuh->vu_ring[uidx++ & mask]; + vue = &vuh->vu_ring[vq->vq_next_used++ & mask]; vue->vu_idx = idx; vue->vu_tlen = iolen; +} +void +vq_relchain_publish(struct vqueue_info *vq) +{ /* * Ensure the used descriptor is visible before updating the index. * This is necessary on ISAs with memory ordering less strict than x86 * (and even on x86 to act as a compiler barrier). */ atomic_thread_fence_rel(); - vuh->vu_idx = uidx; + vq->vq_used->vu_idx = vq->vq_next_used; +} + +/* + * Return specified request chain to the guest, setting its I/O length + * to the provided value. + * + * (This chain is the one you handled when you called vq_getchain() + * and used its positive return value.) + */ +void +vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen) +{ + vq_relchain_prepare(vq, idx, iolen); + vq_relchain_publish(vq); } /* @@ -598,12 +610,12 @@ bad: if (cr == NULL || cr->cr_size != size) { if (cr != NULL) { /* offset must be OK, so size must be bad */ - fprintf(stderr, - "%s: read from %s: bad size %d\r\n", + EPRINTLN( + "%s: read from %s: bad size %d", name, cr->cr_name, size); } else { - fprintf(stderr, - "%s: read from bad offset/size %jd/%d\r\n", + EPRINTLN( + "%s: read from bad offset/size %jd/%d", name, (uintmax_t)offset, size); } goto done; @@ -718,16 +730,16 @@ bad: if (cr != NULL) { /* offset must be OK, wrong size and/or reg is R/O */ if (cr->cr_size != size) - fprintf(stderr, - "%s: write to %s: bad size %d\r\n", + EPRINTLN( + "%s: write to %s: bad size %d", name, cr->cr_name, size); if (cr->cr_ro) - fprintf(stderr, - "%s: write to read-only reg %s\r\n", + EPRINTLN( + "%s: write to read-only reg %s", name, cr->cr_name); } else { - fprintf(stderr, - "%s: write to bad offset/size %jd/%d\r\n", + EPRINTLN( + "%s: write to bad offset/size %jd/%d", name, (uintmax_t)offset, size); } goto done; @@ -755,7 +767,7 @@ bad: break; case VTCFG_R_QNOTIFY: if (value >= vc->vc_nvq) { - fprintf(stderr, "%s: queue %d notify out of range\r\n", + EPRINTLN("%s: queue %d notify out of range", name, (int)value); goto done; } @@ -765,8 +777,8 @@ bad: else if (vc->vc_qnotify) (*vc->vc_qnotify)(DEV_SOFTC(vs), vq); else - fprintf(stderr, - "%s: qnotify queue %d: missing vq/vc notify\r\n", + EPRINTLN( + "%s: qnotify queue %d: missing vq/vc notify", name, (int)value); break; case VTCFG_R_STATUS: @@ -787,8 +799,8 @@ bad: goto done; bad_qindex: - fprintf(stderr, - "%s: write config reg %s: curq %d >= max %d\r\n", + EPRINTLN( + "%s: write config reg %s: curq %d >= max %d", name, cr->cr_name, vs->vs_curq, vc->vc_nvq); done: if (vs->vs_mtx) diff --git a/usr/src/cmd/bhyve/virtio.h b/usr/src/cmd/bhyve/virtio.h index 521bfac681..b28e61d353 100644 --- a/usr/src/cmd/bhyve/virtio.h +++ b/usr/src/cmd/bhyve/virtio.h @@ -393,6 +393,7 @@ struct vqueue_info { uint16_t vq_flags; /* flags (see above) */ uint16_t vq_last_avail; /* a recent value of vq_avail->va_idx */ + uint16_t vq_next_used; /* index of the next used slot to be filled */ uint16_t vq_save_used; /* saved vq_used->vu_idx; see vq_endchains */ uint16_t vq_msix_idx; /* MSI-X index, or VIRTIO_MSI_NO_VECTOR */ @@ -494,7 +495,10 @@ void vi_set_io_bar(struct virtio_softc *, int); int vq_getchain(struct vqueue_info *vq, uint16_t *pidx, struct iovec *iov, int n_iov, uint16_t *flags); -void vq_retchain(struct vqueue_info *vq); +void vq_retchains(struct vqueue_info *vq, uint16_t n_chains); +void vq_relchain_prepare(struct vqueue_info *vq, uint16_t idx, + uint32_t iolen); +void vq_relchain_publish(struct vqueue_info *vq); void vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen); void vq_endchains(struct vqueue_info *vq, int used_all_avail); diff --git a/usr/src/cmd/bhyve/vmgenc.c b/usr/src/cmd/bhyve/vmgenc.c new file mode 100644 index 0000000000..7d0b2faf24 --- /dev/null +++ b/usr/src/cmd/bhyve/vmgenc.c @@ -0,0 +1,119 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright 2020 Conrad Meyer <cem@FreeBSD.org>. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/mman.h> +#include <sys/uuid.h> + +#include <assert.h> +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <stdbool.h> +#include <unistd.h> + +#include <machine/vmm.h> +#include <vmmapi.h> + +#include "acpi.h" +#include "bootrom.h" +#include "vmgenc.h" + +static uint64_t vmgen_gpa; + +void +vmgenc_init(struct vmctx *ctx) +{ + char *region; + int error; + + error = bootrom_alloc(ctx, PAGE_SIZE, PROT_READ, 0, ®ion, + &vmgen_gpa); + if (error != 0) + errx(4, "%s: bootrom_alloc", __func__); + + /* + * It is basically harmless to always generate a random ID when + * starting a VM. + */ + error = getentropy(region, sizeof(struct uuid)); + if (error == -1) + err(4, "%s: getentropy", __func__); + + /* XXX When we have suspend/resume/rollback. */ +#if 0 + acpi_raise_gpe(ctx, GPE_VMGENC); +#endif +} + +void +vmgenc_write_dsdt(void) +{ + dsdt_line(""); + dsdt_indent(1); + dsdt_line("Scope (_SB)"); + dsdt_line("{"); + + dsdt_line(" Device (GENC)"); + dsdt_line(" {"); + + dsdt_indent(2); + dsdt_line("Name (_CID, \"VM_Gen_Counter\")"); + dsdt_line("Method (_HID, 0, NotSerialized)"); + dsdt_line("{"); + dsdt_line(" Return (\"Bhyve_V_Gen_Counter_V1\")"); + dsdt_line("}"); + dsdt_line("Name (_UID, 0)"); + dsdt_line("Name (_DDN, \"VM_Gen_Counter\")"); + dsdt_line("Name (ADDR, Package (0x02)"); + dsdt_line("{"); + dsdt_line(" 0x%08x,", (uint32_t)vmgen_gpa); + dsdt_line(" 0x%08x", (uint32_t)(vmgen_gpa >> 32)); + dsdt_line("})"); + + dsdt_unindent(2); + dsdt_line(" }"); /* Device (GENC) */ + + dsdt_line("}"); /* Scope (_SB) */ + dsdt_line(""); + + dsdt_line("Scope (_GPE)"); + dsdt_line("{"); + dsdt_line(" Method (_E%02x, 0, NotSerialized)", GPE_VMGENC); + dsdt_line(" {"); + dsdt_line(" Notify (\\_SB.GENC, 0x80)"); + dsdt_line(" }"); + dsdt_line("}"); + dsdt_unindent(1); +} diff --git a/usr/src/cmd/bhyve/vmgenc.h b/usr/src/cmd/bhyve/vmgenc.h new file mode 100644 index 0000000000..437824c102 --- /dev/null +++ b/usr/src/cmd/bhyve/vmgenc.h @@ -0,0 +1,31 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright 2020 Conrad Meyer <cem@FreeBSD.org>. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +#pragma once +void vmgenc_init(struct vmctx *); +void vmgenc_write_dsdt(void); diff --git a/usr/src/cmd/bhyve/xmsr.c b/usr/src/cmd/bhyve/xmsr.c index 994445b3e3..26f6a86adf 100644 --- a/usr/src/cmd/bhyve/xmsr.c +++ b/usr/src/cmd/bhyve/xmsr.c @@ -43,9 +43,10 @@ __FBSDID("$FreeBSD$"); #include <stdlib.h> #include <string.h> +#include "debug.h" #include "xmsr.h" -static int cpu_vendor_intel, cpu_vendor_amd; +static int cpu_vendor_intel, cpu_vendor_amd, cpu_vendor_hygon; int emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t val) @@ -70,7 +71,7 @@ emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t val) default: break; } - } else if (cpu_vendor_amd) { + } else if (cpu_vendor_amd || cpu_vendor_hygon) { switch (num) { case MSR_HWCR: /* @@ -134,7 +135,7 @@ emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t *val) error = -1; break; } - } else if (cpu_vendor_amd) { + } else if (cpu_vendor_amd || cpu_vendor_hygon) { switch (num) { case MSR_BIOS_SIGN: *val = 0; @@ -242,10 +243,12 @@ init_msr(void) error = 0; if (strcmp(cpu_vendor, "AuthenticAMD") == 0) { cpu_vendor_amd = 1; + } else if (strcmp(cpu_vendor, "HygonGenuine") == 0) { + cpu_vendor_hygon = 1; } else if (strcmp(cpu_vendor, "GenuineIntel") == 0) { cpu_vendor_intel = 1; } else { - fprintf(stderr, "Unknown cpu vendor \"%s\"\n", cpu_vendor); + EPRINTLN("Unknown cpu vendor \"%s\"", cpu_vendor); error = -1; } return (error); diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c index bbe36917fd..5299791091 100644 --- a/usr/src/cmd/bhyvectl/bhyvectl.c +++ b/usr/src/cmd/bhyvectl/bhyvectl.c @@ -683,6 +683,8 @@ cpu_vendor_intel(void) if (strcmp(cpu_vendor, "AuthenticAMD") == 0) { return (false); + } else if (strcmp(cpu_vendor, "HygonGenuine") == 0) { + return (false); } else if (strcmp(cpu_vendor, "GenuineIntel") == 0) { return (true); } else { diff --git a/usr/src/compat/bhyve/sys/cdefs.h b/usr/src/compat/bhyve/sys/cdefs.h index 0b857437e3..0f3146ea43 100644 --- a/usr/src/compat/bhyve/sys/cdefs.h +++ b/usr/src/compat/bhyve/sys/cdefs.h @@ -55,6 +55,12 @@ #if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L || defined(lint) +#if defined(__cplusplus) && __cplusplus >= 201103L +#define _Alignof(x) alignof(x) +#else +#define _Alignof(x) __alignof(x) +#endif + #if !__has_extension(c_static_assert) #if (defined(__cplusplus) && __cplusplus >= 201103L) || \ __has_extension(cxx_static_assert) @@ -74,4 +80,18 @@ #endif /* __STDC_VERSION__ || __STDC_VERSION__ < 201112L */ +#if __GNUC_PREREQ__(4, 1) +#define __offsetof(type, field) __builtin_offsetof(type, field) +#else +#ifndef __cplusplus +#define __offsetof(type, field) \ + ((__size_t)(__uintptr_t)((const volatile void *)&((type *)0)->field)) +#else +#define __offsetof(type, field) \ + (__offsetof__ (reinterpret_cast <__size_t> \ + (&reinterpret_cast <const volatile char &> \ + (static_cast<type *> (0)->field)))) +#endif +#endif + #endif /* _COMPAT_FREEBSD_SYS_CDEFS_H_ */ diff --git a/usr/src/compat/bhyve/sys/mman.h b/usr/src/compat/bhyve/sys/mman.h new file mode 100644 index 0000000000..a2af4354b2 --- /dev/null +++ b/usr/src/compat/bhyve/sys/mman.h @@ -0,0 +1,23 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Oxide Computer Company + */ + +#ifndef _COMPAT_FREEBSD_SYS_MMAN_H_ +#define _COMPAT_FREEBSD_SYS_MMAN_H_ + +#include_next <sys/mman.h> + +#define _PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC) + +#endif /* _COMPAT_FREEBSD_SYS_MMAN_H_ */ diff --git a/usr/src/compat/bhyve/x86/segments.h b/usr/src/compat/bhyve/x86/segments.h index 11edc582b5..384af079aa 100644 --- a/usr/src/compat/bhyve/x86/segments.h +++ b/usr/src/compat/bhyve/x86/segments.h @@ -18,6 +18,7 @@ #define _COMPAT_FREEBSD_X86_SEGMENTS_H #if defined(_COMPAT_FREEBSD_AMD64_MACHINE_VMM_H_) || defined(_KERNEL) +#define IDT_BP 3 /* #BP: Breakpoint */ #define IDT_UD 6 /* #UD: Undefined/Invalid Opcode */ #define IDT_SS 12 /* #SS: Stack Segment Fault */ #define IDT_GP 13 /* #GP: General Protection Fault */ diff --git a/usr/src/lib/libvmmapi/common/vmmapi.c b/usr/src/lib/libvmmapi/common/vmmapi.c index bae214aba0..b4c96d5455 100644 --- a/usr/src/lib/libvmmapi/common/vmmapi.c +++ b/usr/src/lib/libvmmapi/common/vmmapi.c @@ -47,7 +47,11 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/sysctl.h> #include <sys/ioctl.h> +#ifdef __FreeBSD__ +#include <sys/linker.h> +#endif #include <sys/mman.h> +#include <sys/module.h> #include <sys/_iovec.h> #include <sys/cpuset.h> @@ -156,7 +160,11 @@ vm_device_open(const char *name) int vm_create(const char *name) { - +#ifdef __FreeBSD__ + /* Try to load vmm(4) module before creating a guest. */ + if (modfind("vmm") < 0) + kldload("vmm"); +#endif return (CREATE((char *)name)); } @@ -898,6 +906,25 @@ vm_ioapic_pincount(struct vmctx *ctx, int *pincount) } int +vm_readwrite_kernemu_device(struct vmctx *ctx, int vcpu, vm_paddr_t gpa, + bool write, int size, uint64_t *value) +{ + struct vm_readwrite_kernemu_device irp = { + .vcpuid = vcpu, + .access_width = fls(size) - 1, + .gpa = gpa, + .value = write ? *value : ~0ul, + }; + long cmd = (write ? VM_SET_KERNEMU_DEV : VM_GET_KERNEMU_DEV); + int rc; + + rc = ioctl(ctx->fd, cmd, &irp); + if (rc == 0 && !write) + *value = irp.value; + return (rc); +} + +int vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) { struct vm_isa_irq isa_irq; @@ -957,16 +984,13 @@ vm_inject_nmi(struct vmctx *ctx, int vcpu) return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi)); } -static struct { - const char *name; - int type; -} capstrmap[] = { - { "hlt_exit", VM_CAP_HALT_EXIT }, - { "mtrap_exit", VM_CAP_MTRAP_EXIT }, - { "pause_exit", VM_CAP_PAUSE_EXIT }, - { "unrestricted_guest", VM_CAP_UNRESTRICTED_GUEST }, - { "enable_invpcid", VM_CAP_ENABLE_INVPCID }, - { 0 } +static const char *capstrmap[] = { + [VM_CAP_HALT_EXIT] = "hlt_exit", + [VM_CAP_MTRAP_EXIT] = "mtrap_exit", + [VM_CAP_PAUSE_EXIT] = "pause_exit", + [VM_CAP_UNRESTRICTED_GUEST] = "unrestricted_guest", + [VM_CAP_ENABLE_INVPCID] = "enable_invpcid", + [VM_CAP_BPT_EXIT] = "bpt_exit", }; int @@ -974,9 +998,9 @@ vm_capability_name2type(const char *capname) { int i; - for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) { - if (strcmp(capstrmap[i].name, capname) == 0) - return (capstrmap[i].type); + for (i = 0; i < nitems(capstrmap); i++) { + if (strcmp(capstrmap[i], capname) == 0) + return (i); } return (-1); @@ -985,12 +1009,8 @@ vm_capability_name2type(const char *capname) const char * vm_capability_type2name(int type) { - int i; - - for (i = 0; capstrmap[i].name != NULL; i++) { - if (capstrmap[i].type == type) - return (capstrmap[i].name); - } + if (type >= 0 && type < nitems(capstrmap)) + return (capstrmap[type]); return (NULL); } @@ -1808,6 +1828,7 @@ vm_get_ioctls(size_t *len) VM_MMAP_GETNEXT, VM_SET_REGISTER, VM_GET_REGISTER, VM_SET_SEGMENT_DESCRIPTOR, VM_GET_SEGMENT_DESCRIPTOR, VM_SET_REGISTER_SET, VM_GET_REGISTER_SET, + VM_SET_KERNEMU_DEV, VM_GET_KERNEMU_DEV, VM_INJECT_EXCEPTION, VM_LAPIC_IRQ, VM_LAPIC_LOCAL_IRQ, VM_LAPIC_MSI, VM_IOAPIC_ASSERT_IRQ, VM_IOAPIC_DEASSERT_IRQ, VM_IOAPIC_PULSE_IRQ, VM_IOAPIC_PINCOUNT, VM_ISA_ASSERT_IRQ, diff --git a/usr/src/lib/libvmmapi/common/vmmapi.h b/usr/src/lib/libvmmapi/common/vmmapi.h index 6cb7a1186d..f7a8731c9a 100644 --- a/usr/src/lib/libvmmapi/common/vmmapi.h +++ b/usr/src/lib/libvmmapi/common/vmmapi.h @@ -47,6 +47,8 @@ #include <sys/param.h> #include <sys/cpuset.h> +#include <stdbool.h> + /* * API version for out-of-tree consumers like grub-bhyve for making compile * time decisions. @@ -175,6 +177,8 @@ int vm_ioapic_assert_irq(struct vmctx *ctx, int irq); int vm_ioapic_deassert_irq(struct vmctx *ctx, int irq); int vm_ioapic_pulse_irq(struct vmctx *ctx, int irq); int vm_ioapic_pincount(struct vmctx *ctx, int *pincount); +int vm_readwrite_kernemu_device(struct vmctx *ctx, int vcpu, + vm_paddr_t gpa, bool write, int size, uint64_t *value); int vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq); int vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq); int vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq); diff --git a/usr/src/uts/i86pc/io/vmm/README.sync b/usr/src/uts/i86pc/io/vmm/README.sync index e8aeaaffcf..1b766008a8 100644 --- a/usr/src/uts/i86pc/io/vmm/README.sync +++ b/usr/src/uts/i86pc/io/vmm/README.sync @@ -1,30 +1,37 @@ The bhyve kernel module and its associated userland consumers have been updated to the latest upstream FreeBSD sources as of: -commit 37e8a0e0058c226e6bd0ed5c3a07ee15b1146122 -Author: mav <mav@FreeBSD.org> -Date: Mon Sep 23 17:53:47 2019 +0000 +commit 8ade7383cafed0f7555cac16ef7f9e956e46eaeb +Author: grehan <grehan@FreeBSD.org> +Date: Mon May 25 06:25:31 2020 +0000 - Make nvme(4) driver some more NUMA aware. + Fix pci-passthru MSI issues with OpenBSD guests - - For each queue pair precalculate CPU and domain it is bound to. - If queue pairs are not per-CPU, then use the domain of the device. - - Allocate most of queue pair memory from the domain it is bound to. - - Bind callouts to the same CPUs as queue pair to avoid migrations. - - Do not assign queue pairs to each SMT thread. It just wasted - resources and increased lock congestions. - - Remove fixed multiplier of CPUs per queue pair, spread them even. - This allows to use more queue pairs in some hardware configurations. - - If queue pair serves multiple CPUs, bind different NVMe devices to - different CPUs. + - Return 2 x 16-bit registers in the correct byte order + for a 4-byte read that spans the CMD/STATUS register. + This reversal was hiding the capabilities-list, which prevented + the MSI capability from being found for XHCI passthru. - MFC after: 1 month - Sponsored by: iXsystems, Inc. + - Reorganize MSI/MSI-x config writes so that a 4-byte write at the + capability offset would have the read-only portion skipped. + This prevented MSI interrupts from being enabled. -Which corresponds to SVN revision: 352630 + Reported and extensively tested by Anatoli (me at anatoli dot ws) + PR: 245392 + Reported by: Anatoli (me at anatoli dot ws) + Reviewed by: jhb (bhyve) + Approved by: jhb, bz (mentor) + MFC after: 1 week + Differential Revision: https://reviews.freebsd.org/D24951 -NOTE: -This sync ignores commit c8edafdabc27533d9c51eddc2896e772c16d965c. -There are big changes to the virtio net devices that we haven't synced up yet -because SmartOS relies heavily on viona instead. +Divergence Notes: +A previous sync skipped commit c8edafdabc27533d9c51eddc2896e772c16d965c which +introduced a generic backend functionality to network devices. Without that in +place, subsequent updates reflect the absence of that subsystem. Integrating +net backends has not been a priority, given the common use of viona on illumos. + +The draft Save/Restore functionality, added in FreeBSD commit +d3e4e512238b072fb9282e024610b981ba679869, has not been synced into illumos bhyve +yet. It is not built by default in FreeBSD, so we're not interested in taking +it until it successfully endures more in-depth testing. diff --git a/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c b/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c index f6b6e60363..9cf9200b3d 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c +++ b/usr/src/uts/i86pc/io/vmm/amd/amdvi_hw.c @@ -58,7 +58,8 @@ __FBSDID("$FreeBSD$"); #include "amdvi_priv.h" SYSCTL_DECL(_hw_vmm); -SYSCTL_NODE(_hw_vmm, OID_AUTO, amdvi, CTLFLAG_RW, NULL, NULL); +SYSCTL_NODE(_hw_vmm, OID_AUTO, amdvi, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, + NULL); #define MOD_INC(a, s, m) (((a) + (s)) % ((m) * (s))) #define MOD_DEC(a, s, m) (((a) - (s)) % ((m) * (s))) @@ -66,7 +67,7 @@ SYSCTL_NODE(_hw_vmm, OID_AUTO, amdvi, CTLFLAG_RW, NULL, NULL); /* Print RID or device ID in PCI string format. */ #define RID2PCI_STR(d) PCI_RID2BUS(d), PCI_RID2SLOT(d), PCI_RID2FUNC(d) -static void amdvi_dump_cmds(struct amdvi_softc *softc); +static void amdvi_dump_cmds(struct amdvi_softc *softc, int count); static void amdvi_print_dev_cap(struct amdvi_softc *softc); MALLOC_DEFINE(M_AMDVI, "amdvi", "amdvi"); @@ -96,7 +97,7 @@ SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, host_ptp, CTLFLAG_RDTUN, TUNABLE_INT("hw.vmm.amdvi.host_ptp", &amdvi_host_ptp); /* Page table level used <= supported by h/w[v1=7]. */ -static int amdvi_ptp_level = 4; +int amdvi_ptp_level = 4; SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, ptp_level, CTLFLAG_RDTUN, &amdvi_ptp_level, 0, NULL); TUNABLE_INT("hw.vmm.amdvi.ptp_level", &amdvi_ptp_level); @@ -321,9 +322,7 @@ amdvi_cmd_cmp(struct amdvi_softc *softc, const uint64_t data) pa = vtophys(&softc->cmp_data); cmd->opcode = AMDVI_CMP_WAIT_OPCODE; - cmd->word0 = (pa & 0xFFFFFFF8) | - (AMDVI_CMP_WAIT_STORE); - //(AMDVI_CMP_WAIT_FLUSH | AMDVI_CMP_WAIT_STORE); + cmd->word0 = (pa & 0xFFFFFFF8) | AMDVI_CMP_WAIT_STORE; cmd->word1 = (pa >> 32) & 0xFFFFF; cmd->addr = data; @@ -492,26 +491,26 @@ amdvi_wait(struct amdvi_softc *softc) device_printf(softc->dev, "Error: completion failed" " tail:0x%x, head:0x%x.\n", ctrl->cmd_tail, ctrl->cmd_head); - amdvi_dump_cmds(softc); + /* Dump the last command. */ + amdvi_dump_cmds(softc, 1); } static void -amdvi_dump_cmds(struct amdvi_softc *softc) +amdvi_dump_cmds(struct amdvi_softc *softc, int count) { struct amdvi_ctrl *ctrl; struct amdvi_cmd *cmd; int off, i; ctrl = softc->ctrl; - device_printf(softc->dev, "Dump all the commands:\n"); + device_printf(softc->dev, "Dump last %d command(s):\n", count); /* * If h/w is stuck in completion, it is the previous command, * start dumping from previous command onward. */ off = MOD_DEC(ctrl->cmd_head, sizeof(struct amdvi_cmd), softc->cmd_max); - for (i = 0; off != ctrl->cmd_tail && - i < softc->cmd_max; i++) { + for (i = 0; off != ctrl->cmd_tail && i < count; i++) { cmd = (struct amdvi_cmd *)((uint8_t *)softc->cmd + off); printf(" [CMD%d, off:0x%x] opcode= 0x%x 0x%x" " 0x%x 0x%lx\n", i, off, cmd->opcode, @@ -949,16 +948,16 @@ amdvi_add_sysctl(struct amdvi_softc *softc) SYSCTL_ADD_U16(ctx, child, OID_AUTO, "end_dev_rid", CTLFLAG_RD, &softc->end_dev_rid, 0, "End of device under this IOMMU"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_head", - CTLTYPE_UINT | CTLFLAG_RD, softc, 0, + CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, softc, 0, amdvi_handle_sysctl, "IU", "Command head"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_tail", - CTLTYPE_UINT | CTLFLAG_RD, softc, 1, + CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, softc, 1, amdvi_handle_sysctl, "IU", "Command tail"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "event_head", - CTLTYPE_UINT | CTLFLAG_RD, softc, 2, + CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, softc, 2, amdvi_handle_sysctl, "IU", "Command head"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "event_tail", - CTLTYPE_UINT | CTLFLAG_RD, softc, 3, + CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, softc, 3, amdvi_handle_sysctl, "IU", "Command tail"); } diff --git a/usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h b/usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h index 6ee6c36632..2db6914f08 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h +++ b/usr/src/uts/i86pc/io/vmm/amd/amdvi_priv.h @@ -357,33 +357,15 @@ struct amdvi_domain { }; /* - * I/O Virtualization Hardware Definition Block (IVHD) type 0x10 (legacy) - * uses ACPI_IVRS_HARDWARE define in contrib/dev/acpica/include/actbl2.h - * New IVHD types 0x11 and 0x40 as defined in AMD IOMMU spec[48882] are missing in - * ACPI code. These new types add extra field EFR(Extended Feature Register). - * XXX : Use definition from ACPI when it is available. - */ -typedef struct acpi_ivrs_hardware_efr_sup -{ - ACPI_IVRS_HEADER Header; - UINT16 CapabilityOffset; /* Offset for IOMMU control fields */ - UINT64 BaseAddress; /* IOMMU control registers */ - UINT16 PciSegmentGroup; - UINT16 Info; /* MSI number and unit ID */ - UINT32 Attr; /* IOMMU Feature */ - UINT64 ExtFR; /* IOMMU Extended Feature */ - UINT64 Reserved; /* v1 feature or v2 attribute */ -} __attribute__ ((__packed__)) ACPI_IVRS_HARDWARE_EFRSUP; -CTASSERT(sizeof(ACPI_IVRS_HARDWARE_EFRSUP) == 40); - -/* * Different type of IVHD. * XXX: Use AcpiIvrsType once new IVHD types are available. */ enum IvrsType { - IVRS_TYPE_HARDWARE_LEGACY = 0x10, /* Legacy without EFRi support. */ - IVRS_TYPE_HARDWARE_EFR = 0x11, /* With EFR support. */ + IVRS_TYPE_HARDWARE_LEGACY = ACPI_IVRS_TYPE_HARDWARE1, + /* Legacy without EFRi support. */ + IVRS_TYPE_HARDWARE_EFR = ACPI_IVRS_TYPE_HARDWARE2, + /* With EFR support. */ IVRS_TYPE_HARDWARE_MIXED = 0x40, /* Mixed with EFR support. */ }; diff --git a/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c b/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c index 370c20fb01..b754058c07 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c +++ b/usr/src/uts/i86pc/io/vmm/amd/ivrs_drv.c @@ -54,7 +54,7 @@ int ivhd_count; /* Number of IVHD header. */ * Cached IVHD header list. * Single entry for each IVHD, filtered the legacy one. */ -ACPI_IVRS_HARDWARE *ivhd_hdrs[10]; +ACPI_IVRS_HARDWARE1 *ivhd_hdrs[10]; extern int amdvi_ptp_level; /* Page table levels. */ @@ -161,7 +161,7 @@ ivrs_hdr_find_iter(ACPI_IVRS_HEADER * ivrs_hdr, void *args) return (1); } -static ACPI_IVRS_HARDWARE * +static ACPI_IVRS_HARDWARE1 * ivhd_find_by_index(int idx) { struct find_ivrs_hdr_args fi; @@ -171,7 +171,7 @@ ivhd_find_by_index(int idx) ivrs_hdr_iterate_tbl(ivrs_hdr_find_iter, &fi); - return ((ACPI_IVRS_HARDWARE *)fi.ptr); + return ((ACPI_IVRS_HARDWARE1 *)fi.ptr); } static void @@ -195,7 +195,7 @@ ivhd_dev_add_entry(struct amdvi_softc *softc, uint32_t start_id, * Record device attributes as suggested by BIOS. */ static int -ivhd_dev_parse(ACPI_IVRS_HARDWARE* ivhd, struct amdvi_softc *softc) +ivhd_dev_parse(ACPI_IVRS_HARDWARE1 *ivhd, struct amdvi_softc *softc) { ACPI_IVRS_DE_HEADER *de; uint8_t *p, *end; @@ -209,12 +209,12 @@ ivhd_dev_parse(ACPI_IVRS_HARDWARE* ivhd, struct amdvi_softc *softc) switch (ivhd->Header.Type) { case IVRS_TYPE_HARDWARE_LEGACY: - p = (uint8_t *)ivhd + sizeof(ACPI_IVRS_HARDWARE); + p = (uint8_t *)ivhd + sizeof(ACPI_IVRS_HARDWARE1); break; case IVRS_TYPE_HARDWARE_EFR: case IVRS_TYPE_HARDWARE_MIXED: - p = (uint8_t *)ivhd + sizeof(ACPI_IVRS_HARDWARE_EFRSUP); + p = (uint8_t *)ivhd + sizeof(ACPI_IVRS_HARDWARE2); break; default: @@ -327,7 +327,7 @@ static void ivhd_identify(driver_t *driver, device_t parent) { ACPI_TABLE_IVRS *ivrs; - ACPI_IVRS_HARDWARE *ivhd; + ACPI_IVRS_HARDWARE1 *ivhd; ACPI_STATUS status; int i, count = 0; uint32_t ivrs_ivinfo; @@ -408,7 +408,7 @@ ivhd_identify(driver_t *driver, device_t parent) static int ivhd_probe(device_t dev) { - ACPI_IVRS_HARDWARE *ivhd; + ACPI_IVRS_HARDWARE1 *ivhd; int unit; if (acpi_get_handle(dev) != NULL) @@ -582,7 +582,7 @@ ivhd_print_ext_feature(device_t dev, uint64_t ext_feature) } static int -ivhd_print_cap(struct amdvi_softc *softc, ACPI_IVRS_HARDWARE * ivhd) +ivhd_print_cap(struct amdvi_softc *softc, ACPI_IVRS_HARDWARE1 * ivhd) { device_t dev; int max_ptp_level; @@ -612,8 +612,8 @@ ivhd_print_cap(struct amdvi_softc *softc, ACPI_IVRS_HARDWARE * ivhd) static int ivhd_attach(device_t dev) { - ACPI_IVRS_HARDWARE *ivhd; - ACPI_IVRS_HARDWARE_EFRSUP *ivhd_efr; + ACPI_IVRS_HARDWARE1 *ivhd; + ACPI_IVRS_HARDWARE2 *ivhd_efr; struct amdvi_softc *softc; int status, unit; @@ -637,7 +637,7 @@ ivhd_attach(device_t dev) * On lgeacy IVHD type(0x10), it is documented as feature * but in newer type it is attribute. */ - softc->ivhd_feature = ivhd->Reserved; + softc->ivhd_feature = ivhd->FeatureReporting; /* * PCI capability has more capabilities that are not part of IVRS. */ @@ -648,12 +648,11 @@ ivhd_attach(device_t dev) softc->event_msix = ivhd->Info & 0x1F; #endif switch (ivhd->Header.Type) { - case IVRS_TYPE_HARDWARE_EFR: - case IVRS_TYPE_HARDWARE_MIXED: - ivhd_efr = (ACPI_IVRS_HARDWARE_EFRSUP *)ivhd; - softc->ext_feature = ivhd_efr->ExtFR; - break; - + case IVRS_TYPE_HARDWARE_EFR: + case IVRS_TYPE_HARDWARE_MIXED: + ivhd_efr = (ACPI_IVRS_HARDWARE2 *)ivhd; + softc->ext_feature = ivhd_efr->EfrRegisterImage; + break; } softc->ctrl = (struct amdvi_ctrl *) PHYS_TO_DMAP(ivhd->BaseAddress); diff --git a/usr/src/uts/i86pc/io/vmm/amd/npt.c b/usr/src/uts/i86pc/io/vmm/amd/npt.c index e61464a964..862f6a0ecf 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/npt.c +++ b/usr/src/uts/i86pc/io/vmm/amd/npt.c @@ -41,7 +41,8 @@ __FBSDID("$FreeBSD$"); #include "npt.h" SYSCTL_DECL(_hw_vmm); -SYSCTL_NODE(_hw_vmm, OID_AUTO, npt, CTLFLAG_RW, NULL, NULL); +SYSCTL_NODE(_hw_vmm, OID_AUTO, npt, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, + NULL); static int npt_flags; SYSCTL_INT(_hw_vmm_npt, OID_AUTO, pmap_flags, CTLFLAG_RD, diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c index c194e3d818..d1d7343d82 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c @@ -77,7 +77,8 @@ __FBSDID("$FreeBSD$"); #include "npt.h" SYSCTL_DECL(_hw_vmm); -SYSCTL_NODE(_hw_vmm, OID_AUTO, svm, CTLFLAG_RW, NULL, NULL); +SYSCTL_NODE(_hw_vmm, OID_AUTO, svm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, + NULL); /* * SVM CPUID function 0x8000_000A, edx bit decoding. @@ -2296,6 +2297,11 @@ svm_setreg(void *arg, int vcpu, int ident, uint64_t val) return (0); } + if (ident == VM_REG_GUEST_ENTRY_INST_LENGTH) { + /* Ignore. */ + return (0); + } + /* * XXX deal with CR3 and invalidate TLB entries tagged with the * vcpu's ASID. This needs to be treated differently depending on diff --git a/usr/src/uts/i86pc/io/vmm/intel/ept.c b/usr/src/uts/i86pc/io/vmm/intel/ept.c index 4915537b0a..dcc4e3c330 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/ept.c +++ b/usr/src/uts/i86pc/io/vmm/intel/ept.c @@ -83,7 +83,8 @@ __FBSDID("$FreeBSD$"); #define EPT_ENABLE_AD_BITS (1 << 6) SYSCTL_DECL(_hw_vmm); -SYSCTL_NODE(_hw_vmm, OID_AUTO, ept, CTLFLAG_RW, NULL, NULL); +SYSCTL_NODE(_hw_vmm, OID_AUTO, ept, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, + NULL); static int ept_enable_ad_bits; diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c index d19f6bc262..bb7ee45048 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c @@ -135,6 +135,8 @@ vmcs_field_encoding(int ident) return (VMCS_GUEST_PDPTE2); case VM_REG_GUEST_PDPTE3: return (VMCS_GUEST_PDPTE3); + case VM_REG_GUEST_ENTRY_INST_LENGTH: + return (VMCS_ENTRY_INST_LENGTH); default: return (-1); } diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c index ede3a54d66..b0e399c31a 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c @@ -162,7 +162,8 @@ static MALLOC_DEFINE(M_VMX, "vmx", "vmx"); static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic"); SYSCTL_DECL(_hw_vmm); -SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW, NULL, NULL); +SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, + NULL); #ifdef __FreeBSD__ int vmxon_enabled[MAXCPU]; @@ -191,7 +192,11 @@ SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, initialized, CTLFLAG_RD, /* * Optional capabilities */ -SYSCTL_NODE(_hw_vmm_vmx, OID_AUTO, cap, CTLFLAG_RW, NULL, NULL); +#ifdef __FreeBSD__ +static SYSCTL_NODE(_hw_vmm_vmx, OID_AUTO, cap, + CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, + NULL); +#endif static int cap_halt_exit; SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, halt_exit, CTLFLAG_RD, &cap_halt_exit, 0, @@ -213,6 +218,10 @@ static int cap_invpcid; SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, invpcid, CTLFLAG_RD, &cap_invpcid, 0, "Guests are allowed to use INVPCID"); +static int tpr_shadowing; +SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, tpr_shadowing, CTLFLAG_RD, + &tpr_shadowing, 0, "TPR shadowing support"); + static int virtual_interrupt_delivery; SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, virtual_interrupt_delivery, CTLFLAG_RD, &virtual_interrupt_delivery, 0, "APICv virtual interrupt delivery support"); @@ -232,10 +241,10 @@ static u_int vpid_alloc_failed; SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD, &vpid_alloc_failed, 0, NULL); -static int guest_l1d_flush; +int guest_l1d_flush; SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush, CTLFLAG_RD, &guest_l1d_flush, 0, NULL); -static int guest_l1d_flush_sw; +int guest_l1d_flush_sw; SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush_sw, CTLFLAG_RD, &guest_l1d_flush_sw, 0, NULL); @@ -699,7 +708,7 @@ vmx_restore(void) static int vmx_init(int ipinum) { - int error, use_tpr_shadow; + int error; #ifdef __FreeBSD__ uint64_t basic, fixed0, fixed1, feature_control; #else @@ -829,6 +838,24 @@ vmx_init(int ipinum) &tmp) == 0); /* + * Check support for TPR shadow. + */ + error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, + MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0, + &tmp); + if (error == 0) { + tpr_shadowing = 1; + TUNABLE_INT_FETCH("hw.vmm.vmx.use_tpr_shadowing", + &tpr_shadowing); + } + + if (tpr_shadowing) { + procbased_ctls |= PROCBASED_USE_TPR_SHADOW; + procbased_ctls &= ~PROCBASED_CR8_LOAD_EXITING; + procbased_ctls &= ~PROCBASED_CR8_STORE_EXITING; + } + + /* * Check support for virtual interrupt delivery. */ procbased2_vid_bits = (PROCBASED2_VIRTUALIZE_APIC_ACCESSES | @@ -836,13 +863,9 @@ vmx_init(int ipinum) PROCBASED2_APIC_REGISTER_VIRTUALIZATION | PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY); - use_tpr_shadow = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, - MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0, - &tmp) == 0); - error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2, procbased2_vid_bits, 0, &tmp); - if (error == 0 && use_tpr_shadow) { + if (error == 0 && tpr_shadowing) { virtual_interrupt_delivery = 1; TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_vid", &virtual_interrupt_delivery); @@ -854,13 +877,6 @@ vmx_init(int ipinum) procbased_ctls2 &= ~PROCBASED2_VIRTUALIZE_X2APIC_MODE; /* - * No need to emulate accesses to %CR8 if virtual - * interrupt delivery is enabled. - */ - procbased_ctls &= ~PROCBASED_CR8_LOAD_EXITING; - procbased_ctls &= ~PROCBASED_CR8_STORE_EXITING; - - /* * Check for Posted Interrupts only if Virtual Interrupt * Delivery is enabled. */ @@ -1181,14 +1197,17 @@ vmx_vminit(struct vm *vm, pmap_t pmap) vmx->ctx[i].guest_dr6 = DBREG_DR6_RESERVED1; error += vmwrite(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1); - if (virtual_interrupt_delivery) { - error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS); + if (tpr_shadowing) { #ifdef __FreeBSD__ error += vmwrite(VMCS_VIRTUAL_APIC, vtophys(&vmx->apic_page[i])); #else error += vmwrite(VMCS_VIRTUAL_APIC, apic_page_pa); #endif + } + + if (virtual_interrupt_delivery) { + error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS); error += vmwrite(VMCS_EOI_EXIT0, 0); error += vmwrite(VMCS_EOI_EXIT1, 0); error += vmwrite(VMCS_EOI_EXIT2, 0); @@ -1209,6 +1228,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap) vmx->cap[i].set = 0; vmx->cap[i].proc_ctls = procbased_ctls; vmx->cap[i].proc_ctls2 = procbased_ctls2; + vmx->cap[i].exc_bitmap = exc_bitmap; vmx->state[i].nextrip = ~0; vmx->state[i].lastcpu = NOCPU; @@ -2942,6 +2962,18 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) return (1); } + /* + * If the hypervisor has requested user exits for + * debug exceptions, bounce them out to userland. + */ + if (intr_type == VMCS_INTR_T_SWEXCEPTION && intr_vec == IDT_BP && + (vmx->cap[vcpu].set & (1 << VM_CAP_BPT_EXIT))) { + vmexit->exitcode = VM_EXITCODE_BPT; + vmexit->u.bpt.inst_length = vmexit->inst_length; + vmexit->inst_length = 0; + break; + } + if (intr_vec == IDT_PF) { error = vmxctx_setreg(vmxctx, VM_REG_GUEST_CR2, qual); KASSERT(error == 0, ("%s: vmxctx_setreg(cr2) error %d", @@ -3040,6 +3072,12 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) SDT_PROBE3(vmm, vmx, exit, mwait, vmx, vcpu, vmexit); vmexit->exitcode = VM_EXITCODE_MWAIT; break; + case EXIT_REASON_TPR: + vlapic = vm_lapic(vmx->vm, vcpu); + vlapic_sync_tpr(vlapic); + vmexit->inst_length = 0; + handled = HANDLED; + break; case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: case EXIT_REASON_VMLAUNCH: @@ -3410,6 +3448,16 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, ldt_sel = sldt(); #endif + /* + * If TPR Shadowing is enabled, the TPR Threshold + * must be updated right before entering the guest. + */ + if (tpr_shadowing && !virtual_interrupt_delivery) { + if ((vmx->cap[vcpu].proc_ctls & PROCBASED_USE_TPR_SHADOW) != 0) { + vmcs_write(VMCS_TPR_THRESHOLD, vlapic_get_cr8(vlapic)); + } + } + vmx_run_trace(vmx, vcpu); vmx_dr_enter_guest(vmxctx); rc = vmx_enter_guest(vmxctx, vmx, launched); @@ -3772,6 +3820,9 @@ vmx_getcap(void *arg, int vcpu, int type, int *retval) if (cap_invpcid) ret = 0; break; + case VM_CAP_BPT_EXIT: + ret = 0; + break; default: break; } @@ -3843,11 +3894,25 @@ vmx_setcap(void *arg, int vcpu, int type, int val) reg = VMCS_SEC_PROC_BASED_CTLS; } break; + case VM_CAP_BPT_EXIT: + retval = 0; + + /* Don't change the bitmap if we are tracing all exceptions. */ + if (vmx->cap[vcpu].exc_bitmap != 0xffffffff) { + pptr = &vmx->cap[vcpu].exc_bitmap; + baseval = *pptr; + flag = (1 << IDT_BP); + reg = VMCS_EXCEPTION_BITMAP; + } + break; default: break; } - if (retval == 0) { + if (retval) + return (retval); + + if (pptr != NULL) { if (val) { baseval |= flag; } else { @@ -3857,26 +3922,23 @@ vmx_setcap(void *arg, int vcpu, int type, int val) error = vmwrite(reg, baseval); VMCLEAR(vmcs); - if (error) { - retval = error; - } else { - /* - * Update optional stored flags, and record - * setting - */ - if (pptr != NULL) { - *pptr = baseval; - } + if (error) + return (error); - if (val) { - vmx->cap[vcpu].set |= (1 << type); - } else { - vmx->cap[vcpu].set &= ~(1 << type); - } - } + /* + * Update optional stored flags, and record + * setting + */ + *pptr = baseval; } - return (retval); + if (val) { + vmx->cap[vcpu].set |= (1 << type); + } else { + vmx->cap[vcpu].set &= ~(1 << type); + } + + return (0); } struct vlapic_vtx { @@ -4060,7 +4122,30 @@ vmx_set_tmr(struct vlapic *vlapic, const uint32_t *masks) } static void -vmx_enable_x2apic_mode(struct vlapic *vlapic) +vmx_enable_x2apic_mode_ts(struct vlapic *vlapic) +{ + struct vmx *vmx; + struct vmcs *vmcs; + uint32_t proc_ctls; + int vcpuid; + + vcpuid = vlapic->vcpuid; + vmx = ((struct vlapic_vtx *)vlapic)->vmx; + vmcs = &vmx->vmcs[vcpuid]; + + proc_ctls = vmx->cap[vcpuid].proc_ctls; + proc_ctls &= ~PROCBASED_USE_TPR_SHADOW; + proc_ctls |= PROCBASED_CR8_LOAD_EXITING; + proc_ctls |= PROCBASED_CR8_STORE_EXITING; + vmx->cap[vcpuid].proc_ctls = proc_ctls; + + VMPTRLD(vmcs); + vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls); + VMCLEAR(vmcs); +} + +static void +vmx_enable_x2apic_mode_vid(struct vlapic *vlapic) { struct vmx *vmx; struct vmcs *vmcs; @@ -4224,12 +4309,16 @@ vmx_vlapic_init(void *arg, int vcpuid) vlapic_vtx->pir_desc = &vmx->pir_desc[vcpuid]; vlapic_vtx->vmx = vmx; + if (tpr_shadowing) { + vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode_ts; + } + if (virtual_interrupt_delivery) { vlapic->ops.set_intr_ready = vmx_set_intr_ready; vlapic->ops.pending_intr = vmx_pending_intr; vlapic->ops.intr_accepted = vmx_intr_accepted; vlapic->ops.set_tmr = vmx_set_tmr; - vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode; + vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode_vid; } if (posted_interrupts) diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.h b/usr/src/uts/i86pc/io/vmm/intel/vmx.h index 2d16799bdd..3c88efba48 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.h +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.h @@ -94,6 +94,7 @@ struct vmxcap { int set; uint32_t proc_ctls; uint32_t proc_ctls2; + uint32_t exc_bitmap; }; struct vmxstate { diff --git a/usr/src/uts/i86pc/io/vmm/io/iommu.c b/usr/src/uts/i86pc/io/vmm/io/iommu.c index b949573fe2..918a9ec3e4 100644 --- a/usr/src/uts/i86pc/io/vmm/io/iommu.c +++ b/usr/src/uts/i86pc/io/vmm/io/iommu.c @@ -52,7 +52,8 @@ __FBSDID("$FreeBSD$"); #include "iommu.h" SYSCTL_DECL(_hw_vmm); -SYSCTL_NODE(_hw_vmm, OID_AUTO, iommu, CTLFLAG_RW, 0, "bhyve iommu parameters"); +SYSCTL_NODE(_hw_vmm, OID_AUTO, iommu, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "bhyve iommu parameters"); static int iommu_avail; SYSCTL_INT(_hw_vmm_iommu, OID_AUTO, initialized, CTLFLAG_RD, &iommu_avail, @@ -215,7 +216,7 @@ iommu_init(void) if (vmm_is_intel()) ops = &iommu_ops_intel; - else if (vmm_is_amd()) + else if (vmm_is_svm()) ops = &iommu_ops_amd; else ops = NULL; diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.c b/usr/src/uts/i86pc/io/vmm/io/vlapic.c index 687e0e6a8e..f16068e9c7 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vlapic.c +++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.c @@ -579,6 +579,12 @@ vlapic_update_ppr(struct vlapic *vlapic) VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); } +void +vlapic_sync_tpr(struct vlapic *vlapic) +{ + vlapic_update_ppr(vlapic); +} + static VMM_STAT(VLAPIC_GRATUITOUS_EOI, "EOI without any in-service interrupt"); static void @@ -1110,6 +1116,8 @@ vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) int idx, i, bitpos, vector; uint32_t *irrptr, val; + vlapic_update_ppr(vlapic); + if (vlapic->ops.pending_intr) return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); @@ -1167,7 +1175,6 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector) panic("isrvec_stk_top overflow %d", stk_top); vlapic->isrvec_stk[stk_top] = vector; - vlapic_update_ppr(vlapic); } void diff --git a/usr/src/uts/i86pc/io/vmm/io/vlapic.h b/usr/src/uts/i86pc/io/vmm/io/vlapic.h index e1a52551a9..47ca3cd0b8 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vlapic.h +++ b/usr/src/uts/i86pc/io/vmm/io/vlapic.h @@ -78,6 +78,8 @@ void vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum); void vlapic_fire_cmci(struct vlapic *vlapic); int vlapic_trigger_lvt(struct vlapic *vlapic, int vector); +void vlapic_sync_tpr(struct vlapic *vlapic); + uint64_t vlapic_get_apicbase(struct vlapic *vlapic); int vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val); void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state s); diff --git a/usr/src/uts/i86pc/io/vmm/io/vrtc.c b/usr/src/uts/i86pc/io/vmm/io/vrtc.c index f12d22fc26..a3635fc9f0 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vrtc.c +++ b/usr/src/uts/i86pc/io/vmm/io/vrtc.c @@ -110,7 +110,8 @@ static void vrtc_set_reg_c(struct vrtc *vrtc, uint8_t newval); static MALLOC_DEFINE(M_VRTC, "vrtc", "bhyve virtual rtc"); SYSCTL_DECL(_hw_vmm); -SYSCTL_NODE(_hw_vmm, OID_AUTO, vrtc, CTLFLAG_RW, NULL, NULL); +SYSCTL_NODE(_hw_vmm, OID_AUTO, vrtc, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, + NULL); static int rtc_flag_broken_time = 1; SYSCTL_INT(_hw_vmm_vrtc, OID_AUTO, flag_broken_time, CTLFLAG_RDTUN, diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c index 2238536121..38f9a5ad71 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm.c +++ b/usr/src/uts/i86pc/io/vmm/vmm.c @@ -164,7 +164,7 @@ struct mem_map { int prot; int flags; }; -#define VM_MAX_MEMMAPS 4 +#define VM_MAX_MEMMAPS 8 /* * Initialization: @@ -243,7 +243,8 @@ static MALLOC_DEFINE(M_VM, "vm", "vm"); /* statistics */ static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); -SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); +SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, + NULL); /* * Halt the guest if all vcpus are executing a HLT instruction with @@ -410,7 +411,7 @@ vmm_init(void) if (vmm_is_intel()) ops = &vmm_ops_intel; - else if (vmm_is_amd()) + else if (vmm_is_svm()) ops = &vmm_ops_amd; else return (ENXIO); @@ -1677,52 +1678,90 @@ static int vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) { #ifdef __FreeBSD__ - int i, done; + int error, i; struct vcpu *vcpu; + struct thread *td; - done = 0; + error = 0; + vcpu = &vm->vcpu[vcpuid]; + td = curthread; #else int i; struct vcpu *vcpu; -#endif + vcpu = &vm->vcpu[vcpuid]; +#endif CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus); +#ifdef __FreeBSD__ /* * Wait until all 'active_cpus' have suspended themselves. + * + * Since a VM may be suspended at any time including when one or + * more vcpus are doing a rendezvous we need to call the rendezvous + * handler while we are waiting to prevent a deadlock. */ vcpu_lock(vcpu); + while (error == 0) { + if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { + VCPU_CTR0(vm, vcpuid, "All vcpus suspended"); + break; + } + + if (vm->rendezvous_func == NULL) { + VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); + vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING); + msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); + vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); + if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) { + vcpu_unlock(vcpu); + error = thread_check_susp(td, false); + vcpu_lock(vcpu); + } + } else { + VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend"); + vcpu_unlock(vcpu); + error = vm_handle_rendezvous(vm, vcpuid); + vcpu_lock(vcpu); + } + } + vcpu_unlock(vcpu); +#else + vcpu_lock(vcpu); while (1) { + int rc; + if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { VCPU_CTR0(vm, vcpuid, "All vcpus suspended"); break; } - VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING); -#ifdef __FreeBSD__ - msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); -#else + rc = cv_reltimedwait_sig(&vcpu->vcpu_cv, &vcpu->mtx.m, hz, + TR_CLOCK_TICK); + vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); + /* - * To prevent vm_handle_suspend from becoming stuck in the - * kernel if the bhyve process driving its vCPUs is killed, - * offer a bail-out, even though not all the vCPUs have reached - * the suspended state. + * If the userspace process driving the instance is killed, any + * vCPUs yet to be marked suspended (because they are not + * VM_RUN-ing in the kernel presently) will never reach that + * state. + * + * To avoid vm_handle_suspend() getting stuck in the kernel + * waiting for those vCPUs, offer a bail-out even though it + * means returning without all vCPUs in a suspended state. */ - if (cv_reltimedwait_sig(&vcpu->vcpu_cv, &vcpu->mtx.m, - hz, TR_CLOCK_TICK) <= 0) { + if (rc <= 0) { if ((curproc->p_flag & SEXITING) != 0) { - vcpu_require_state_locked(vm, vcpuid, - VCPU_FROZEN); break; } } -#endif - vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); } vcpu_unlock(vcpu); +#endif + /* * Wakeup the other sleeping vcpus and return to userspace. */ @@ -1775,7 +1814,7 @@ vm_suspend(struct vm *vm, enum vm_suspend_how how) if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) return (EINVAL); - if (atomic_cmpset_int((uint_t *)&vm->suspend, 0, how) == 0) { + if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { VM_CTR2(vm, "virtual machine already suspended %d/%d", vm->suspend, how); return (EALREADY); diff --git a/usr/src/uts/i86pc/io/vmm/vmm_host.h b/usr/src/uts/i86pc/io/vmm/vmm_host.h index e0ea1ec927..1b3e84184a 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_host.h +++ b/usr/src/uts/i86pc/io/vmm/vmm_host.h @@ -91,7 +91,7 @@ vmm_get_host_gdtrbase(void) { #ifdef __FreeBSD__ - return ((uint64_t)&gdt[NGDT * curcpu]); + return ((uint64_t)*PCPU_PTR(gdt)); #else desctbr_t gdtr; diff --git a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c index 4a4fb07eba..0d32fe0b9a 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c @@ -63,9 +63,14 @@ __FBSDID("$FreeBSD$"); #include <machine/vmm.h> +#include <err.h> #include <assert.h> +#include <stdbool.h> +#include <stdio.h> +#include <strings.h> #include <vmmapi.h> #define KASSERT(exp,msg) assert((exp)) +#define panic(...) errx(4, __VA_ARGS__) #endif /* _KERNEL */ #include <machine/vmm_instruction_emul.h> @@ -92,6 +97,7 @@ enum { VIE_OP_TYPE_TWOB_GRP15, VIE_OP_TYPE_ADD, VIE_OP_TYPE_TEST, + VIE_OP_TYPE_BEXTR, VIE_OP_TYPE_LAST }; @@ -102,11 +108,17 @@ enum { #define VIE_OP_F_NO_MODRM (1 << 3) #define VIE_OP_F_NO_GLA_VERIFICATION (1 << 4) -#ifdef _KERNEL +static const struct vie_op three_byte_opcodes_0f38[256] = { + [0xF7] = { + .op_byte = 0xF7, + .op_type = VIE_OP_TYPE_BEXTR, + }, +}; + static const struct vie_op two_byte_opcodes[256] = { [0xAE] = { - .op_byte = 0xAE, - .op_type = VIE_OP_TYPE_TWOB_GRP15, + .op_byte = 0xAE, + .op_type = VIE_OP_TYPE_TWOB_GRP15, }, [0xB6] = { .op_byte = 0xB6, @@ -248,7 +260,6 @@ static const struct vie_op one_byte_opcodes[256] = { .op_type = VIE_OP_TYPE_PUSH, } }; -#endif /* struct vie.mod */ #define VIE_MOD_INDIRECT 0 @@ -1325,6 +1336,83 @@ emulate_test(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, } static int +emulate_bextr(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, + struct vm_guest_paging *paging, mem_region_read_t memread, + mem_region_write_t memwrite, void *arg) +{ + uint64_t src1, src2, dst, rflags; + unsigned start, len; + int error, size; + + size = vie->opsize; + error = EINVAL; + + /* + * VEX.LZ.0F38.W0 F7 /r BEXTR r32a, r/m32, r32b + * VEX.LZ.0F38.W1 F7 /r BEXTR r64a, r/m64, r64b + * + * Destination operand is ModRM:reg. Source operands are ModRM:r/m and + * Vex.vvvv. + * + * Operand size is always 32-bit if not in 64-bit mode (W1 is ignored). + */ + if (size != 4 && paging->cpu_mode != CPU_MODE_64BIT) + size = 4; + + /* + * Extracts contiguous bits from the first /source/ operand (second + * operand) using an index and length specified in the second /source/ + * operand (third operand). + */ + error = memread(vm, vcpuid, gpa, &src1, size, arg); + if (error) + return (error); + error = vie_read_register(vm, vcpuid, gpr_map[vie->vex_reg], &src2); + if (error) + return (error); + error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags); + if (error) + return (error); + + start = (src2 & 0xff); + len = (src2 & 0xff00) >> 8; + + /* If no bits are extracted, the destination register is cleared. */ + dst = 0; + + /* If START exceeds the operand size, no bits are extracted. */ + if (start > size * 8) + goto done; + /* Length is bounded by both the destination size and start offset. */ + if (start + len > size * 8) + len = (size * 8) - start; + if (len == 0) + goto done; + + if (start > 0) + src1 = (src1 >> start); + if (len < 64) + src1 = src1 & ((1ull << len) - 1); + dst = src1; + +done: + error = vie_update_register(vm, vcpuid, gpr_map[vie->reg], dst, size); + if (error) + return (error); + + /* + * AMD: OF, CF cleared; SF/AF/PF undefined; ZF set by result. + * Intel: ZF is set by result; AF/SF/PF undefined; all others cleared. + */ + rflags &= ~RFLAGS_STATUS_BITS; + if (dst == 0) + rflags |= PSL_Z; + error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, + 8); + return (error); +} + +static int emulate_add(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, mem_region_read_t memread, mem_region_write_t memwrite, void *arg) { @@ -1753,6 +1841,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, error = emulate_test(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; + case VIE_OP_TYPE_BEXTR: + error = emulate_bextr(vm, vcpuid, gpa, vie, paging, + memread, memwrite, memarg); + break; default: error = EINVAL; break; @@ -1926,7 +2018,6 @@ vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, return (0); } -#ifdef _KERNEL void vie_init(struct vie *vie, const char *inst_bytes, int inst_length) { @@ -1945,6 +2036,7 @@ vie_init(struct vie *vie, const char *inst_bytes, int inst_length) } } +#ifdef _KERNEL static int pf_error_code(int usermode, int prot, int rsvd, uint64_t pte) { @@ -2227,6 +2319,7 @@ vmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, vie->num_valid = inst_length; return (0); } +#endif /* _KERNEL */ static int vie_peek(struct vie *vie, uint8_t *x) @@ -2318,6 +2411,81 @@ decode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d) } /* + * ยง 2.3.5, "The VEX Prefix", SDM Vol 2. + */ + if ((cpu_mode == CPU_MODE_64BIT || cpu_mode == CPU_MODE_COMPATIBILITY) + && x == 0xC4) { + const struct vie_op *optab; + + /* 3-byte VEX prefix. */ + vie->vex_present = 1; + + vie_advance(vie); + if (vie_peek(vie, &x)) + return (-1); + + /* + * 2nd byte: [R', X', B', mmmmm[4:0]]. Bits are inverted + * relative to REX encoding. + */ + vie->rex_r = x & 0x80 ? 0 : 1; + vie->rex_x = x & 0x40 ? 0 : 1; + vie->rex_b = x & 0x20 ? 0 : 1; + + switch (x & 0x1F) { + case 0x2: + /* 0F 38. */ + optab = three_byte_opcodes_0f38; + break; + case 0x1: + /* 0F class - nothing handled here yet. */ + /* FALLTHROUGH */ + case 0x3: + /* 0F 3A class - nothing handled here yet. */ + /* FALLTHROUGH */ + default: + /* Reserved (#UD). */ + return (-1); + } + + vie_advance(vie); + if (vie_peek(vie, &x)) + return (-1); + + /* 3rd byte: [W, vvvv[6:3], L, pp[1:0]]. */ + vie->rex_w = x & 0x80 ? 1 : 0; + + vie->vex_reg = ((~(unsigned)x & 0x78u) >> 3); + vie->vex_l = !!(x & 0x4); + vie->vex_pp = (x & 0x3); + + /* PP: 1=66 2=F3 3=F2 prefixes. */ + switch (vie->vex_pp) { + case 0x1: + vie->opsize_override = 1; + break; + case 0x2: + vie->repz_present = 1; + break; + case 0x3: + vie->repnz_present = 1; + break; + } + + vie_advance(vie); + + /* Opcode, sans literal prefix prefix. */ + if (vie_peek(vie, &x)) + return (-1); + + vie->op = optab[x]; + if (vie->op.op_type == VIE_OP_TYPE_NONE) + return (-1); + + vie_advance(vie); + } + + /* * Section "Operand-Size And Address-Size Attributes", Intel SDM, Vol 1 */ if (cpu_mode == CPU_MODE_64BIT) { @@ -2369,6 +2537,10 @@ decode_opcode(struct vie *vie) if (vie_peek(vie, &x)) return (-1); + /* Already did this via VEX prefix. */ + if (vie->op.op_type != VIE_OP_TYPE_NONE) + return (0); + vie->op = one_byte_opcodes[x]; if (vie->op.op_type == VIE_OP_TYPE_NONE) @@ -2649,6 +2821,7 @@ decode_moffset(struct vie *vie) return (0); } +#ifdef _KERNEL /* * Verify that the 'guest linear address' provided as collateral of the nested * page table fault matches with our instruction decoding. @@ -2740,10 +2913,15 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie, return (0); } +#endif /* _KERNEL */ int +#ifdef _KERNEL vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie) +#else +vmm_decode_instruction(enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie) +#endif { if (decode_prefixes(vie, cpu_mode, cs_d)) @@ -2767,13 +2945,14 @@ vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, if (decode_moffset(vie)) return (-1); +#ifdef _KERNEL if ((vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) == 0) { if (verify_gla(vm, cpuid, gla, vie, cpu_mode)) return (-1); } +#endif vie->decoded = 1; /* success */ return (0); } -#endif /* _KERNEL */ diff --git a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c index 4773cdf621..70ff33b726 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c @@ -33,6 +33,7 @@ #include <sys/kernel.h> #include <sys/hma.h> #include <sys/x86_archext.h> +#include <x86/apicreg.h> #include <sys/vmm.h> #include <sys/vmm_instruction_emul.h> @@ -431,6 +432,8 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, case VM_SET_INTINFO: case VM_GET_INTINFO: case VM_RESTART_INSTRUCTION: + case VM_SET_KERNEMU_DEV: + case VM_GET_KERNEMU_DEV: /* * Copy in the ID of the vCPU chosen for this operation. * Since a nefarious caller could update their struct between @@ -967,6 +970,62 @@ vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md, break; } + case VM_SET_KERNEMU_DEV: + case VM_GET_KERNEMU_DEV: { + struct vm_readwrite_kernemu_device kemu; + size_t size = 0; + mem_region_write_t mwrite = NULL; + mem_region_read_t mread = NULL; + uint64_t ignored = 0; + + if (ddi_copyin(datap, &kemu, sizeof (kemu), md)) { + error = EFAULT; + break; + } + + if (kemu.access_width > 3) { + error = EINVAL; + break; + } + size = (1 << kemu.access_width); + ASSERT(size >= 1 && size <= 8); + + if (kemu.gpa >= DEFAULT_APIC_BASE && + kemu.gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { + mread = lapic_mmio_read; + mwrite = lapic_mmio_write; + } else if (kemu.gpa >= VIOAPIC_BASE && + kemu.gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { + mread = vioapic_mmio_read; + mwrite = vioapic_mmio_write; + } else if (kemu.gpa >= VHPET_BASE && + kemu.gpa < VHPET_BASE + VHPET_SIZE) { + mread = vhpet_mmio_read; + mwrite = vhpet_mmio_write; + } else { + error = EINVAL; + break; + } + + if (cmd == VM_SET_KERNEMU_DEV) { + VERIFY(mwrite != NULL); + error = mwrite(sc->vmm_vm, vcpu, kemu.gpa, kemu.value, + size, &ignored); + } else { + VERIFY(mread != NULL); + error = mread(sc->vmm_vm, vcpu, kemu.gpa, &kemu.value, + size, &ignored); + } + + if (error == 0) { + if (ddi_copyout(&kemu, datap, sizeof (kemu), md)) { + error = EFAULT; + break; + } + } + break; + } + case VM_GET_CAPABILITY: { struct vm_capability vmcap; @@ -1912,7 +1971,7 @@ vmm_is_supported(intptr_t arg) if (vmm_is_intel()) { r = vmx_x86_supported(&msg); - } else if (vmm_is_amd()) { + } else if (vmm_is_svm()) { /* * HMA already ensured that the features necessary for SVM * operation were present and online during vmm_attach(). diff --git a/usr/src/uts/i86pc/io/vmm/vmm_stat.c b/usr/src/uts/i86pc/io/vmm/vmm_stat.c index 2cbcce9590..a6af75e40a 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_stat.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_stat.c @@ -67,7 +67,7 @@ vmm_stat_register(void *arg) if (vst->scope == VMM_STAT_SCOPE_INTEL && !vmm_is_intel()) return; - if (vst->scope == VMM_STAT_SCOPE_AMD && !vmm_is_amd()) + if (vst->scope == VMM_STAT_SCOPE_AMD && !vmm_is_svm()) return; if (vst_num_elems + vst->nelems >= MAX_VMM_STAT_ELEMS) { diff --git a/usr/src/uts/i86pc/io/vmm/vmm_util.c b/usr/src/uts/i86pc/io/vmm/vmm_util.c index b8acff9bbc..d6ed67f4b3 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_util.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_util.c @@ -58,9 +58,10 @@ vmm_is_intel(void) } bool -vmm_is_amd(void) +vmm_is_svm(void) { - return (strcmp(cpu_vendor, "AuthenticAMD") == 0); + return (strcmp(cpu_vendor, "AuthenticAMD") == 0 || + strcmp(cpu_vendor, "HygonGenuine") == 0); } bool diff --git a/usr/src/uts/i86pc/io/vmm/vmm_util.h b/usr/src/uts/i86pc/io/vmm/vmm_util.h index 8c65e7e3a6..ff93ce5733 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_util.h +++ b/usr/src/uts/i86pc/io/vmm/vmm_util.h @@ -34,7 +34,7 @@ struct trapframe; bool vmm_is_intel(void); -bool vmm_is_amd(void); +bool vmm_is_svm(void); bool vmm_supports_1G_pages(void); void dump_trapframe(struct trapframe *tf); diff --git a/usr/src/uts/i86pc/io/vmm/x86.c b/usr/src/uts/i86pc/io/vmm/x86.c index 6213173587..248014ae24 100644 --- a/usr/src/uts/i86pc/io/vmm/x86.c +++ b/usr/src/uts/i86pc/io/vmm/x86.c @@ -65,7 +65,10 @@ __FBSDID("$FreeBSD$"); #include "x86.h" SYSCTL_DECL(_hw_vmm); -SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD, 0, NULL); +#ifdef __FreeBSD__ +static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + NULL); +#endif #define CPUID_VM_HIGH 0x40000000 @@ -145,7 +148,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, break; case CPUID_8000_0008: cpuid_count(func, param, regs); - if (vmm_is_amd()) { + if (vmm_is_svm()) { /* * As on Intel (0000_0007:0, EDX), mask out * unsupported or unsafe AMD extended features @@ -259,7 +262,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, case CPUID_8000_001D: /* AMD Cache topology, like 0000_0004 for Intel. */ - if (!vmm_is_amd()) + if (!vmm_is_svm()) goto default_leaf; /* @@ -301,8 +304,11 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, break; case CPUID_8000_001E: - /* AMD Family 16h+ additional identifiers */ - if (!vmm_is_amd() || CPUID_TO_FAMILY(cpu_id) < 0x16) + /* + * AMD Family 16h+ and Hygon Family 18h additional + * identifiers. + */ + if (!vmm_is_svm() || CPUID_TO_FAMILY(cpu_id) < 0x16) goto default_leaf; vm_get_topology(vm, &sockets, &cores, &threads, @@ -581,6 +587,18 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint64_t *rax, uint64_t *rbx, } break; + case CPUID_0000_0015: + /* + * Don't report CPU TSC/Crystal ratio and clock + * values since guests may use these to derive the + * local APIC frequency.. + */ + regs[0] = 0; + regs[1] = 0; + regs[2] = 0; + regs[3] = 0; + break; + case 0x40000000: regs[0] = CPUID_VM_HIGH; bcopy(bhyve_id, ®s[1], 4); diff --git a/usr/src/uts/i86pc/io/vmm/x86.h b/usr/src/uts/i86pc/io/vmm/x86.h index cb8e12fcd2..7c8fccf78f 100644 --- a/usr/src/uts/i86pc/io/vmm/x86.h +++ b/usr/src/uts/i86pc/io/vmm/x86.h @@ -41,6 +41,7 @@ #define CPUID_0000_000A (0xA) #define CPUID_0000_000B (0xB) #define CPUID_0000_000D (0xD) +#define CPUID_0000_0015 (0x15) #define CPUID_8000_0000 (0x80000000) #define CPUID_8000_0001 (0x80000001) #define CPUID_8000_0002 (0x80000002) diff --git a/usr/src/uts/i86pc/sys/vmm.h b/usr/src/uts/i86pc/sys/vmm.h index 0bbc219b7f..f4a2fe4e3a 100644 --- a/usr/src/uts/i86pc/sys/vmm.h +++ b/usr/src/uts/i86pc/sys/vmm.h @@ -108,6 +108,7 @@ enum vm_reg_name { VM_REG_GUEST_DR2, VM_REG_GUEST_DR3, VM_REG_GUEST_DR6, + VM_REG_GUEST_ENTRY_INST_LENGTH, VM_REG_LAST }; @@ -480,6 +481,7 @@ enum vm_cap_type { VM_CAP_PAUSE_EXIT, VM_CAP_UNRESTRICTED_GUEST, VM_CAP_ENABLE_INVPCID, + VM_CAP_BPT_EXIT, VM_CAP_MAX }; @@ -538,6 +540,8 @@ struct vie_op { uint8_t op_type; /* type of operation (e.g. MOV) */ uint16_t op_flags; }; +_Static_assert(sizeof(struct vie_op) == 4, "ABI"); +_Static_assert(_Alignof(struct vie_op) == 2, "ABI"); #define VIE_INST_SIZE 15 struct vie { @@ -562,13 +566,22 @@ struct vie { rm:4; uint8_t ss:2, /* SIB byte */ - index:4, - base:4; + vex_present:1, /* VEX prefixed */ + vex_l:1, /* L bit */ + index:4, /* SIB byte */ + base:4; /* SIB byte */ uint8_t disp_bytes; uint8_t imm_bytes; uint8_t scale; + + uint8_t vex_reg:4, /* vvvv: first source register specifier */ + vex_pp:2, /* pp */ + _sparebits:2; + + uint8_t _sparebytes[2]; + int base_register; /* VM_REG_GUEST_xyz */ int index_register; /* VM_REG_GUEST_xyz */ int segment_register; /* VM_REG_GUEST_xyz */ @@ -578,8 +591,14 @@ struct vie { uint8_t decoded; /* set to 1 if successfully decoded */ + uint8_t _sparebyte; + struct vie_op op; /* opcode description */ }; +_Static_assert(sizeof(struct vie) == 64, "ABI"); +_Static_assert(__offsetof(struct vie, disp_bytes) == 22, "ABI"); +_Static_assert(__offsetof(struct vie, scale) == 24, "ABI"); +_Static_assert(__offsetof(struct vie, base_register) == 28, "ABI"); enum vm_exitcode { VM_EXITCODE_INOUT, @@ -605,6 +624,7 @@ enum vm_exitcode { VM_EXITCODE_REQIDLE, VM_EXITCODE_DEBUG, VM_EXITCODE_VMINSN, + VM_EXITCODE_BPT, #ifndef __FreeBSD__ VM_EXITCODE_HT, #endif @@ -695,6 +715,9 @@ struct vm_exit { uint64_t exitinfo2; } svm; struct { + int inst_length; + } bpt; + struct { uint32_t code; /* ecx value */ uint64_t wval; } msr; diff --git a/usr/src/uts/i86pc/sys/vmm_dev.h b/usr/src/uts/i86pc/sys/vmm_dev.h index dd87dcb0a6..48e2c5f306 100644 --- a/usr/src/uts/i86pc/sys/vmm_dev.h +++ b/usr/src/uts/i86pc/sys/vmm_dev.h @@ -64,6 +64,13 @@ struct vm_memseg { char name[SPECNAMELEN + 1]; }; +struct vm_memseg_fbsd12 { + int segid; + size_t len; + char name[64]; +}; +_Static_assert(sizeof(struct vm_memseg_fbsd12) == 80, "COMPAT_FREEBSD12 ABI"); + struct vm_register { int cpuid; int regnum; /* enum vm_reg_name */ @@ -303,6 +310,15 @@ struct vm_cpu_topology { uint16_t maxcpus; }; +struct vm_readwrite_kernemu_device { + int vcpuid; + unsigned access_width : 3; + unsigned _unused : 29; + uint64_t gpa; + uint64_t value; +}; +_Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI"); + enum { /* general routines */ IOCNUM_ABIVERS = 0, @@ -330,6 +346,8 @@ enum { IOCNUM_GET_SEGMENT_DESCRIPTOR = 23, IOCNUM_SET_REGISTER_SET = 24, IOCNUM_GET_REGISTER_SET = 25, + IOCNUM_GET_KERNEMU_DEV = 26, + IOCNUM_SET_KERNEMU_DEV = 27, /* interrupt injection */ IOCNUM_GET_INTINFO = 28, @@ -397,8 +415,12 @@ enum { _IOW('v', IOCNUM_SUSPEND, struct vm_suspend) #define VM_REINIT \ _IO('v', IOCNUM_REINIT) +#define VM_ALLOC_MEMSEG_FBSD12 \ + _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_fbsd12) #define VM_ALLOC_MEMSEG \ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg) +#define VM_GET_MEMSEG_FBSD12 \ + _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_fbsd12) #define VM_GET_MEMSEG \ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg) #define VM_MMAP_MEMSEG \ @@ -417,6 +439,12 @@ enum { _IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set) #define VM_GET_REGISTER_SET \ _IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set) +#define VM_SET_KERNEMU_DEV \ + _IOW('v', IOCNUM_SET_KERNEMU_DEV, \ + struct vm_readwrite_kernemu_device) +#define VM_GET_KERNEMU_DEV \ + _IOWR('v', IOCNUM_GET_KERNEMU_DEV, \ + struct vm_readwrite_kernemu_device) #define VM_INJECT_EXCEPTION \ _IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception) #define VM_LAPIC_IRQ \ diff --git a/usr/src/uts/i86pc/sys/vmm_instruction_emul.h b/usr/src/uts/i86pc/sys/vmm_instruction_emul.h index f10f407164..d4007c37e3 100644 --- a/usr/src/uts/i86pc/sys/vmm_instruction_emul.h +++ b/usr/src/uts/i86pc/sys/vmm_instruction_emul.h @@ -115,6 +115,7 @@ int vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, */ int vm_gla2gpa_nofault(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, uint64_t gla, int prot, uint64_t *gpa, int *is_fault); +#endif /* _KERNEL */ void vie_init(struct vie *vie, const char *inst_bytes, int inst_length); @@ -129,9 +130,17 @@ void vie_init(struct vie *vie, const char *inst_bytes, int inst_length); * To skip the 'gla' verification for this or any other reason pass * in VIE_INVALID_GLA instead. */ +#ifdef _KERNEL #define VIE_INVALID_GLA (1UL << 63) /* a non-canonical address */ int vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla, enum vm_cpu_mode cpu_mode, int csd, struct vie *vie); +#else /* !_KERNEL */ +/* + * Permit instruction decoding logic to be compiled outside of the kernel for + * rapid iteration and validation. No GLA validation is performed, obviously. + */ +int vmm_decode_instruction(enum vm_cpu_mode cpu_mode, int csd, + struct vie *vie); #endif /* _KERNEL */ #endif /* _VMM_INSTRUCTION_EMUL_H_ */ |