diff options
| author | John Levon <john.levon@joyent.com> | 2019-02-21 16:10:35 +0000 |
|---|---|---|
| committer | John Levon <john.levon@joyent.com> | 2019-02-21 16:10:35 +0000 |
| commit | a204e9a1d3ee69f2131c57285b16ac4a77170c57 (patch) | |
| tree | e550536a137feebce5a507268e61a2d8d00d8a71 /usr/src | |
| parent | d024ad7e44b1e11d9250ea9be53265a22d7b368d (diff) | |
| parent | 50c68d602321e451c36464d5c7d903c892e2f775 (diff) | |
| download | illumos-joyent-a204e9a1d3ee69f2131c57285b16ac4a77170c57.tar.gz | |
Merge branch 'master' into uefi
Diffstat (limited to 'usr/src')
56 files changed, 1593 insertions, 461 deletions
diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c index 7afc47111b..ccf89b4613 100644 --- a/usr/src/cmd/bhyve/bhyverun.c +++ b/usr/src/cmd/bhyve/bhyverun.c @@ -52,6 +52,13 @@ __FBSDID("$FreeBSD$"); #include <sys/time.h> #include <sys/cpuset.h> +#ifdef __FreeBSD__ +#include <amd64/vmm/intel/vmcs.h> +#else +#include <intel/vmcs.h> +#endif + +#include <machine/atomic.h> #include <machine/segments.h> #ifndef WITHOUT_CAPSICUM @@ -65,7 +72,6 @@ __FBSDID("$FreeBSD$"); #include <libgen.h> #include <unistd.h> #include <assert.h> -#include <errno.h> #include <pthread.h> #include <pthread_np.h> #include <sysexits.h> @@ -109,6 +115,73 @@ __FBSDID("$FreeBSD$"); #define MB (1024UL * 1024) #define GB (1024UL * MB) +static const char * const vmx_exit_reason_desc[] = { + [EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)", + [EXIT_REASON_EXT_INTR] = "External interrupt", + [EXIT_REASON_TRIPLE_FAULT] = "Triple fault", + [EXIT_REASON_INIT] = "INIT signal", + [EXIT_REASON_SIPI] = "Start-up IPI (SIPI)", + [EXIT_REASON_IO_SMI] = "I/O system-management interrupt (SMI)", + [EXIT_REASON_SMI] = "Other SMI", + [EXIT_REASON_INTR_WINDOW] = "Interrupt window", + [EXIT_REASON_NMI_WINDOW] = "NMI window", + [EXIT_REASON_TASK_SWITCH] = "Task switch", + [EXIT_REASON_CPUID] = "CPUID", + [EXIT_REASON_GETSEC] = "GETSEC", + [EXIT_REASON_HLT] = "HLT", + [EXIT_REASON_INVD] = "INVD", + [EXIT_REASON_INVLPG] = "INVLPG", + [EXIT_REASON_RDPMC] = "RDPMC", + [EXIT_REASON_RDTSC] = "RDTSC", + [EXIT_REASON_RSM] = "RSM", + [EXIT_REASON_VMCALL] = "VMCALL", + [EXIT_REASON_VMCLEAR] = "VMCLEAR", + [EXIT_REASON_VMLAUNCH] = "VMLAUNCH", + [EXIT_REASON_VMPTRLD] = "VMPTRLD", + [EXIT_REASON_VMPTRST] = "VMPTRST", + [EXIT_REASON_VMREAD] = "VMREAD", + [EXIT_REASON_VMRESUME] = "VMRESUME", + [EXIT_REASON_VMWRITE] = "VMWRITE", + [EXIT_REASON_VMXOFF] = "VMXOFF", + [EXIT_REASON_VMXON] = "VMXON", + [EXIT_REASON_CR_ACCESS] = "Control-register accesses", + [EXIT_REASON_DR_ACCESS] = "MOV DR", + [EXIT_REASON_INOUT] = "I/O instruction", + [EXIT_REASON_RDMSR] = "RDMSR", + [EXIT_REASON_WRMSR] = "WRMSR", + [EXIT_REASON_INVAL_VMCS] = + "VM-entry failure due to invalid guest state", + [EXIT_REASON_INVAL_MSR] = "VM-entry failure due to MSR loading", + [EXIT_REASON_MWAIT] = "MWAIT", + [EXIT_REASON_MTF] = "Monitor trap flag", + [EXIT_REASON_MONITOR] = "MONITOR", + [EXIT_REASON_PAUSE] = "PAUSE", + [EXIT_REASON_MCE_DURING_ENTRY] = + "VM-entry failure due to machine-check event", + [EXIT_REASON_TPR] = "TPR below threshold", + [EXIT_REASON_APIC_ACCESS] = "APIC access", + [EXIT_REASON_VIRTUALIZED_EOI] = "Virtualized EOI", + [EXIT_REASON_GDTR_IDTR] = "Access to GDTR or IDTR", + [EXIT_REASON_LDTR_TR] = "Access to LDTR or TR", + [EXIT_REASON_EPT_FAULT] = "EPT violation", + [EXIT_REASON_EPT_MISCONFIG] = "EPT misconfiguration", + [EXIT_REASON_INVEPT] = "INVEPT", + [EXIT_REASON_RDTSCP] = "RDTSCP", + [EXIT_REASON_VMX_PREEMPT] = "VMX-preemption timer expired", + [EXIT_REASON_INVVPID] = "INVVPID", + [EXIT_REASON_WBINVD] = "WBINVD", + [EXIT_REASON_XSETBV] = "XSETBV", + [EXIT_REASON_APIC_WRITE] = "APIC write", + [EXIT_REASON_RDRAND] = "RDRAND", + [EXIT_REASON_INVPCID] = "INVPCID", + [EXIT_REASON_VMFUNC] = "VMFUNC", + [EXIT_REASON_ENCLS] = "ENCLS", + [EXIT_REASON_RDSEED] = "RDSEED", + [EXIT_REASON_PM_LOG_FULL] = "Page-modification log full", + [EXIT_REASON_XSAVES] = "XSAVES", + [EXIT_REASON_XRSTORS] = "XRSTORS" +}; + typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); @@ -594,14 +667,22 @@ vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) #define DEBUG_EPT_MISCONFIG #ifdef DEBUG_EPT_MISCONFIG -#define EXIT_REASON_EPT_MISCONFIG 49 #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 -#define VMCS_IDENT(x) ((x) | 0x80000000) static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; static int ept_misconfig_ptenum; #endif +static const char * +vmexit_vmx_desc(uint32_t exit_reason) +{ + + if (exit_reason >= nitems(vmx_exit_reason_desc) || + vmx_exit_reason_desc[exit_reason] == NULL) + return ("Unknown"); + return (vmx_exit_reason_desc[exit_reason]); +} + static int vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { @@ -611,7 +692,8 @@ vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); - fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); + fprintf(stderr, "\texit_reason\t%u (%s)\n", vmexit->u.vmx.exit_reason, + vmexit_vmx_desc(vmexit->u.vmx.exit_reason)); fprintf(stderr, "\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification); fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); @@ -962,15 +1044,13 @@ do_open(const char *vmname) #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW); - if (cap_rights_limit(vm_get_device_fd(ctx), &rights) == -1 && - errno != ENOSYS) + if (caph_rights_limit(vm_get_device_fd(ctx), &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); vm_get_ioctls(&ncmds); cmds = vm_get_ioctls(NULL); if (cmds == NULL) errx(EX_OSERR, "out of memory"); - if (cap_ioctls_limit(vm_get_device_fd(ctx), cmds, ncmds) == -1 && - errno != ENOSYS) + if (caph_ioctls_limit(vm_get_device_fd(ctx), cmds, ncmds) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); free((cap_ioctl_t *)cmds); #endif diff --git a/usr/src/cmd/bhyve/block_if.c b/usr/src/cmd/bhyve/block_if.c index 8ac15dbd33..fcb4149b62 100644 --- a/usr/src/cmd/bhyve/block_if.c +++ b/usr/src/cmd/bhyve/block_if.c @@ -51,6 +51,9 @@ __FBSDID("$FreeBSD$"); #endif #include <assert.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <fcntl.h> #include <stdio.h> @@ -459,8 +462,10 @@ blockif_open(const char *optstr, const char *ident) char tname[MAXCOMLEN + 1]; #ifdef __FreeBSD__ char name[MAXPATHLEN]; -#endif char *nopt, *xopts, *cp; +#else + char *nopt, *xopts, *cp = NULL; +#endif struct blockif_ctxt *bc; struct stat sbuf; #ifdef __FreeBSD__ @@ -538,7 +543,7 @@ blockif_open(const char *optstr, const char *ident) if (ro) cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE); - if (cap_rights_limit(fd, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif @@ -628,7 +633,7 @@ blockif_open(const char *optstr, const char *ident) #endif #ifndef WITHOUT_CAPSICUM - if (cap_ioctls_limit(fd, cmds, nitems(cmds)) == -1 && errno != ENOSYS) + if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif @@ -700,6 +705,13 @@ blockif_open(const char *optstr, const char *ident) err: if (fd >= 0) close(fd); +#ifdef __FreeBSD__ + free(cp); + free(xopts); + free(nopt); +#else + free(nopt); +#endif return (NULL); } diff --git a/usr/src/cmd/bhyve/consport.c b/usr/src/cmd/bhyve/consport.c index f630cec1f3..cda2df2414 100644 --- a/usr/src/cmd/bhyve/consport.c +++ b/usr/src/cmd/bhyve/consport.c @@ -37,6 +37,9 @@ __FBSDID("$FreeBSD$"); #endif #include <sys/select.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <errno.h> #include <stdio.h> @@ -142,11 +145,9 @@ console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_IOCTL, CAP_READ, CAP_WRITE); - if (cap_rights_limit(STDIN_FILENO, &rights) == -1 && - errno != ENOSYS) + if (caph_rights_limit(STDIN_FILENO, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); - if (cap_ioctls_limit(STDIN_FILENO, cmds, nitems(cmds)) == -1 && - errno != ENOSYS) + if (caph_ioctls_limit(STDIN_FILENO, cmds, nitems(cmds)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif ttyopen(); diff --git a/usr/src/cmd/bhyve/dbgport.c b/usr/src/cmd/bhyve/dbgport.c index 6b3d26336f..88a616b50d 100644 --- a/usr/src/cmd/bhyve/dbgport.c +++ b/usr/src/cmd/bhyve/dbgport.c @@ -40,6 +40,9 @@ __FBSDID("$FreeBSD$"); #include <netinet/tcp.h> #include <sys/uio.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <stdio.h> #include <stdlib.h> @@ -169,7 +172,7 @@ init_dbgport(int sport) #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_ACCEPT, CAP_READ, CAP_WRITE); - if (cap_rights_limit(listen_fd, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(listen_fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif diff --git a/usr/src/cmd/bhyve/gdb.c b/usr/src/cmd/bhyve/gdb.c index 8f464816f0..69bcf53c31 100644 --- a/usr/src/cmd/bhyve/gdb.c +++ b/usr/src/cmd/bhyve/gdb.c @@ -1269,9 +1269,9 @@ limit_gdb_socket(int s) cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE, CAP_SETSOCKOPT, CAP_IOCTL); - if (cap_rights_limit(s, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(s, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); - if (cap_ioctls_limit(s, ioctls, nitems(ioctls)) == -1 && errno != ENOSYS) + if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); } #endif diff --git a/usr/src/cmd/bhyve/iov.c b/usr/src/cmd/bhyve/iov.c index c564bd8ae5..54ea22aa94 100644 --- a/usr/src/cmd/bhyve/iov.c +++ b/usr/src/cmd/bhyve/iov.c @@ -2,6 +2,7 @@ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016 Jakub Klama <jceel@FreeBSD.org>. + * Copyright (c) 2018 Alexander Motin <mav@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,12 +40,12 @@ __FBSDID("$FreeBSD$"); #include "iov.h" void -seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2, size_t *niov2, +seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2, int *niov2, size_t seek) { size_t remainder = 0; size_t left = seek; - size_t i, j; + int i, j; for (i = 0; i < niov1; i++) { size_t toseek = MIN(left, iov1[i].iov_len); @@ -69,9 +70,10 @@ seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2, size_t *niov2, } size_t -count_iov(struct iovec *iov, size_t niov) +count_iov(const struct iovec *iov, int niov) { - size_t i, total = 0; + size_t total = 0; + int i; for (i = 0; i < niov; i++) total += iov[i].iov_len; @@ -79,35 +81,36 @@ count_iov(struct iovec *iov, size_t niov) return (total); } -size_t -truncate_iov(struct iovec *iov, size_t niov, size_t length) +void +truncate_iov(struct iovec *iov, int *niov, size_t length) { - size_t i, done = 0; + size_t done = 0; + int i; - for (i = 0; i < niov; i++) { + for (i = 0; i < *niov; i++) { size_t toseek = MIN(length - done, iov[i].iov_len); done += toseek; - if (toseek < iov[i].iov_len) { + if (toseek <= iov[i].iov_len) { iov[i].iov_len = toseek; - return (i + 1); + *niov = i + 1; + return; } } - - return (niov); } ssize_t -iov_to_buf(struct iovec *iov, size_t niov, void **buf) +iov_to_buf(const struct iovec *iov, int niov, void **buf) { - size_t i, ptr = 0, total = 0; + size_t ptr, total; + int i; - for (i = 0; i < niov; i++) { - total += iov[i].iov_len; - *buf = realloc(*buf, total); - if (*buf == NULL) - return (-1); + total = count_iov(iov, niov); + *buf = realloc(*buf, total); + if (*buf == NULL) + return (-1); + for (i = 0, ptr = 0; i < niov; i++) { memcpy(*buf + ptr, iov[i].iov_base, iov[i].iov_len); ptr += iov[i].iov_len; } @@ -116,12 +119,12 @@ iov_to_buf(struct iovec *iov, size_t niov, void **buf) } ssize_t -buf_to_iov(void *buf, size_t buflen, struct iovec *iov, size_t niov, +buf_to_iov(const void *buf, size_t buflen, struct iovec *iov, int niov, size_t seek) { struct iovec *diov; - size_t ndiov, i; - uintptr_t off = 0; + int ndiov, i; + size_t off = 0, len; if (seek > 0) { diov = malloc(sizeof(struct iovec) * niov); @@ -131,11 +134,15 @@ buf_to_iov(void *buf, size_t buflen, struct iovec *iov, size_t niov, ndiov = niov; } - for (i = 0; i < ndiov; i++) { - memcpy(diov[i].iov_base, buf + off, diov[i].iov_len); - off += diov[i].iov_len; + for (i = 0; i < ndiov && off < buflen; i++) { + len = MIN(diov[i].iov_len, buflen - off); + memcpy(diov[i].iov_base, buf + off, len); + off += len; } + if (seek > 0) + free(diov); + return ((ssize_t)off); } diff --git a/usr/src/cmd/bhyve/iov.h b/usr/src/cmd/bhyve/iov.h index 87fa4c1dcf..e3b5916edb 100644 --- a/usr/src/cmd/bhyve/iov.h +++ b/usr/src/cmd/bhyve/iov.h @@ -2,6 +2,7 @@ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016 Jakub Klama <jceel@FreeBSD.org>. + * Copyright (c) 2018 Alexander Motin <mav@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,12 +33,12 @@ #ifndef _IOV_H_ #define _IOV_H_ -void seek_iov(struct iovec *iov1, size_t niov1, struct iovec *iov2, - size_t *niov2, size_t seek); -size_t truncate_iov(struct iovec *iov, size_t niov, size_t length); -size_t count_iov(struct iovec *iov, size_t niov); -ssize_t iov_to_buf(struct iovec *iov, size_t niov, void **buf); -ssize_t buf_to_iov(void *buf, size_t buflen, struct iovec *iov, size_t niov, +void seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2, + int *niov2, size_t seek); +void truncate_iov(struct iovec *iov, int *niov, size_t length); +size_t count_iov(const struct iovec *iov, int niov); +ssize_t iov_to_buf(const struct iovec *iov, int niov, void **buf); +ssize_t buf_to_iov(const void *buf, size_t buflen, struct iovec *iov, int niov, size_t seek); #endif /* _IOV_H_ */ diff --git a/usr/src/cmd/bhyve/mevent.c b/usr/src/cmd/bhyve/mevent.c index 4ad33a9f01..a258fd3047 100644 --- a/usr/src/cmd/bhyve/mevent.c +++ b/usr/src/cmd/bhyve/mevent.c @@ -41,6 +41,9 @@ __FBSDID("$FreeBSD$"); #include <assert.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <errno.h> #include <stdlib.h> @@ -599,7 +602,7 @@ mevent_dispatch(void) #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_KQUEUE); - if (cap_rights_limit(mfd, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(mfd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif @@ -616,9 +619,9 @@ mevent_dispatch(void) #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); - if (cap_rights_limit(mevent_pipefd[0], &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(mevent_pipefd[0], &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); - if (cap_rights_limit(mevent_pipefd[1], &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(mevent_pipefd[1], &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif diff --git a/usr/src/cmd/bhyve/pci_ahci.c b/usr/src/cmd/bhyve/pci_ahci.c index 39b441d876..1e3feffcc2 100644 --- a/usr/src/cmd/bhyve/pci_ahci.c +++ b/usr/src/cmd/bhyve/pci_ahci.c @@ -105,7 +105,7 @@ enum sata_fis_type { * ATA commands */ #define ATA_SF_ENAB_SATA_SF 0x10 -#define ATA_SATA_SF_AN 0x05 +#define ATA_SATA_SF_AN 0x05 #define ATA_SF_DIS_SATA_SF 0x90 /* @@ -119,6 +119,8 @@ static FILE *dbg; #endif #define WPRINTF(format, arg...) printf(format, ##arg) +#define AHCI_PORT_IDENT 20 + 1 + struct ahci_ioreq { struct blockif_req io_req; struct ahci_port *io_pr; @@ -136,7 +138,7 @@ struct ahci_port { struct pci_ahci_softc *pr_sc; uint8_t *cmd_lst; uint8_t *rfis; - char ident[20 + 1]; + char ident[AHCI_PORT_IDENT]; int port; int atapi; int reset; @@ -2385,7 +2387,8 @@ pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi) MD5Init(&mdctx); MD5Update(&mdctx, opts, strlen(opts)); MD5Final(digest, &mdctx); - sprintf(sc->port[p].ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X", + snprintf(sc->port[p].ident, AHCI_PORT_IDENT, + "BHYVE-%02X%02X-%02X%02X-%02X%02X", digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]); diff --git a/usr/src/cmd/bhyve/pci_e82545.c b/usr/src/cmd/bhyve/pci_e82545.c index 3f5a6ef0c5..e211b5cf9c 100644 --- a/usr/src/cmd/bhyve/pci_e82545.c +++ b/usr/src/cmd/bhyve/pci_e82545.c @@ -46,6 +46,9 @@ __FBSDID("$FreeBSD$"); #include <sys/filio.h> #endif +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <errno.h> #include <fcntl.h> @@ -2265,7 +2268,7 @@ e82545_open_tap(struct e82545_softc *sc, char *opts) #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); - if (cap_rights_limit(sc->esc_tapfd, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(sc->esc_tapfd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif diff --git a/usr/src/cmd/bhyve/pci_fbuf.c b/usr/src/cmd/bhyve/pci_fbuf.c index 5a04c41e54..8d24dde9da 100644 --- a/usr/src/cmd/bhyve/pci_fbuf.c +++ b/usr/src/cmd/bhyve/pci_fbuf.c @@ -121,8 +121,9 @@ static void pci_fbuf_usage(char *opt) { - fprintf(stderr, "Invalid fbuf emulation \"%s\"\r\n", opt); - fprintf(stderr, "fbuf: {wait,}{vga=on|io|off,}rfb=<ip>:port\r\n"); + fprintf(stderr, "Invalid fbuf emulation option \"%s\"\r\n", opt); + fprintf(stderr, "fbuf: {wait,}{vga=on|io|off,}rfb=<ip>:port" + "{,w=width}{,h=height}\r\n"); } static void @@ -254,13 +255,33 @@ pci_fbuf_parse_opts(struct pci_fbuf_softc *sc, char *opts) xopts, config)); if (!strcmp(xopts, "tcp") || !strcmp(xopts, "rfb")) { - /* parse host-ip:port */ - tmpstr = strsep(&config, ":"); - if (!config) - sc->rfb_port = atoi(tmpstr); - else { - sc->rfb_port = atoi(config); + /* + * IPv4 -- host-ip:port + * IPv6 -- [host-ip%zone]:port + * XXX for now port is mandatory. + */ + tmpstr = strsep(&config, "]"); + if (config) { + if (tmpstr[0] == '[') + tmpstr++; sc->rfb_host = tmpstr; + if (config[0] == ':') + config++; + else { + pci_fbuf_usage(xopts); + ret = -1; + goto done; + } + sc->rfb_port = atoi(config); + } else { + config = tmpstr; + tmpstr = strsep(&config, ":"); + if (!config) + sc->rfb_port = atoi(tmpstr); + else { + sc->rfb_port = atoi(config); + sc->rfb_host = tmpstr; + } } #ifndef __FreeBSD__ } else if (!strcmp(xopts, "unix")) { @@ -276,7 +297,7 @@ pci_fbuf_parse_opts(struct pci_fbuf_softc *sc, char *opts) sc->vga_enabled = 1; sc->vga_full = 1; } else { - pci_fbuf_usage(opts); + pci_fbuf_usage(xopts); ret = -1; goto done; } diff --git a/usr/src/cmd/bhyve/pci_nvme.c b/usr/src/cmd/bhyve/pci_nvme.c index a274b19b0b..387611c888 100644 --- a/usr/src/cmd/bhyve/pci_nvme.c +++ b/usr/src/cmd/bhyve/pci_nvme.c @@ -93,6 +93,16 @@ static int nvme_debug = 0; /* helpers */ +/* Convert a zero-based value into a one-based value */ +#define ONE_BASED(zero) ((zero) + 1) +/* Convert a one-based value into a zero-based value */ +#define ZERO_BASED(one) ((one) - 1) + +/* Encode number of SQ's and CQ's for Set/Get Features */ +#define NVME_FEATURE_NUM_QUEUES(sc) \ + (ZERO_BASED((sc)->num_squeues) & 0xffff) | \ + (ZERO_BASED((sc)->num_cqueues) & 0xffff) << 16; + #define NVME_DOORBELL_OFFSET offsetof(struct nvme_registers, doorbell) enum nvme_controller_register_offsets { @@ -192,8 +202,8 @@ struct pci_nvme_softc { struct pci_nvme_blockstore nvstore; - uint16_t max_qentries; /* max entries per queue */ - uint32_t max_queues; + uint16_t max_qentries; /* max entries per queue */ + uint32_t max_queues; /* max number of IO SQ's or CQ's */ uint32_t num_cqueues; uint32_t num_squeues; @@ -203,7 +213,10 @@ struct pci_nvme_softc { uint32_t ioslots; sem_t iosemlock; - /* status and guest memory mapped queues */ + /* + * Memory mapped Submission and Completion queues + * Each array includes both Admin and IO queues + */ struct nvme_completion_queue *compl_queues; struct nvme_submission_queue *submit_queues; @@ -251,11 +264,13 @@ static void pci_nvme_io_partial(struct blockif_req *br, int err); (NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT)) static __inline void -cpywithpad(char *dst, int dst_size, const char *src, char pad) +cpywithpad(char *dst, size_t dst_size, const char *src, char pad) { - int len = strnlen(src, dst_size); + size_t len; + + len = strnlen(src, dst_size); + memset(dst, pad, dst_size); memcpy(dst, src, len); - memset(dst + len, pad, dst_size - len); } static __inline void @@ -355,7 +370,7 @@ pci_nvme_reset_locked(struct pci_nvme_softc *sc) { DPRINTF(("%s\r\n", __func__)); - sc->regs.cap_lo = (sc->max_qentries & NVME_CAP_LO_REG_MQES_MASK) | + sc->regs.cap_lo = (ZERO_BASED(sc->max_qentries) & NVME_CAP_LO_REG_MQES_MASK) | (1 << NVME_CAP_LO_REG_CQR_SHIFT) | (60 << NVME_CAP_LO_REG_TO_SHIFT); @@ -368,7 +383,7 @@ pci_nvme_reset_locked(struct pci_nvme_softc *sc) sc->num_cqueues = sc->num_squeues = sc->max_queues; if (sc->submit_queues != NULL) { - for (int i = 0; i <= sc->max_queues; i++) { + for (int i = 0; i < sc->num_squeues + 1; i++) { /* * The Admin Submission Queue is at index 0. * It must not be changed at reset otherwise the @@ -378,26 +393,31 @@ pci_nvme_reset_locked(struct pci_nvme_softc *sc) sc->submit_queues[i].qbase = NULL; sc->submit_queues[i].size = 0; sc->submit_queues[i].cqid = 0; - - sc->compl_queues[i].qbase = NULL; - sc->compl_queues[i].size = 0; } sc->submit_queues[i].tail = 0; sc->submit_queues[i].head = 0; sc->submit_queues[i].busy = 0; - - sc->compl_queues[i].tail = 0; - sc->compl_queues[i].head = 0; } } else - sc->submit_queues = calloc(sc->max_queues + 1, + sc->submit_queues = calloc(sc->num_squeues + 1, sizeof(struct nvme_submission_queue)); - if (sc->compl_queues == NULL) { - sc->compl_queues = calloc(sc->max_queues + 1, + if (sc->compl_queues != NULL) { + for (int i = 0; i < sc->num_cqueues + 1; i++) { + /* See Admin Submission Queue note above */ + if (i != 0) { + sc->compl_queues[i].qbase = NULL; + sc->compl_queues[i].size = 0; + } + + sc->compl_queues[i].tail = 0; + sc->compl_queues[i].head = 0; + } + } else { + sc->compl_queues = calloc(sc->num_cqueues + 1, sizeof(struct nvme_completion_queue)); - for (int i = 0; i <= sc->num_cqueues; i++) + for (int i = 0; i < sc->num_cqueues + 1; i++) pthread_mutex_init(&sc->compl_queues[i].mtx, NULL); } } @@ -441,7 +461,7 @@ nvme_opc_delete_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, uint16_t qid = command->cdw10 & 0xffff; DPRINTF(("%s DELETE_IO_SQ %u\r\n", __func__, qid)); - if (qid == 0 || qid > sc->num_cqueues) { + if (qid == 0 || qid > sc->num_squeues) { WPRINTF(("%s NOT PERMITTED queue id %u / num_squeues %u\r\n", __func__, qid, sc->num_squeues)); pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC, @@ -462,7 +482,7 @@ nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, uint16_t qid = command->cdw10 & 0xffff; struct nvme_submission_queue *nsq; - if (qid > sc->num_squeues) { + if ((qid == 0) || (qid > sc->num_squeues)) { WPRINTF(("%s queue index %u > num_squeues %u\r\n", __func__, qid, sc->num_squeues)); pci_nvme_status_tc(&compl->status, @@ -472,7 +492,7 @@ nvme_opc_create_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command, } nsq = &sc->submit_queues[qid]; - nsq->size = ((command->cdw10 >> 16) & 0xffff) + 1; + nsq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff); nsq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, sizeof(struct nvme_command) * (size_t)nsq->size); @@ -527,7 +547,7 @@ nvme_opc_create_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, uint16_t qid = command->cdw10 & 0xffff; struct nvme_completion_queue *ncq; - if (qid > sc->num_cqueues) { + if ((qid == 0) || (qid > sc->num_cqueues)) { WPRINTF(("%s queue index %u > num_cqueues %u\r\n", __func__, qid, sc->num_cqueues)); pci_nvme_status_tc(&compl->status, @@ -539,7 +559,7 @@ nvme_opc_create_io_cq(struct pci_nvme_softc* sc, struct nvme_command* command, ncq = &sc->compl_queues[qid]; ncq->intr_en = (command->cdw11 & NVME_CMD_CDW11_IEN) >> 1; ncq->intr_vec = (command->cdw11 >> 16) & 0xffff; - ncq->size = ((command->cdw10 >> 16) & 0xffff) + 1; + ncq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff); ncq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1, @@ -652,6 +672,45 @@ nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command, } static int +nvme_set_feature_queues(struct pci_nvme_softc* sc, struct nvme_command* command, + struct nvme_completion* compl) +{ + uint16_t nqr; /* Number of Queues Requested */ + + nqr = command->cdw11 & 0xFFFF; + if (nqr == 0xffff) { + WPRINTF(("%s: Illegal NSQR value %#x\n", __func__, nqr)); + pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); + return (-1); + } + + sc->num_squeues = ONE_BASED(nqr); + if (sc->num_squeues > sc->max_queues) { + DPRINTF(("NSQR=%u is greater than max %u\n", sc->num_squeues, + sc->max_queues)); + sc->num_squeues = sc->max_queues; + } + + nqr = (command->cdw11 >> 16) & 0xFFFF; + if (nqr == 0xffff) { + WPRINTF(("%s: Illegal NCQR value %#x\n", __func__, nqr)); + pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD); + return (-1); + } + + sc->num_cqueues = ONE_BASED(nqr); + if (sc->num_cqueues > sc->max_queues) { + DPRINTF(("NCQR=%u is greater than max %u\n", sc->num_cqueues, + sc->max_queues)); + sc->num_cqueues = sc->max_queues; + } + + compl->cdw0 = NVME_FEATURE_NUM_QUEUES(sc); + + return (0); +} + +static int nvme_opc_set_features(struct pci_nvme_softc* sc, struct nvme_command* command, struct nvme_completion* compl) { @@ -681,19 +740,7 @@ nvme_opc_set_features(struct pci_nvme_softc* sc, struct nvme_command* command, DPRINTF((" volatile write cache 0x%x\r\n", command->cdw11)); break; case NVME_FEAT_NUMBER_OF_QUEUES: - sc->num_squeues = command->cdw11 & 0xFFFF; - sc->num_cqueues = (command->cdw11 >> 16) & 0xFFFF; - DPRINTF((" number of queues (submit %u, completion %u)\r\n", - sc->num_squeues, sc->num_cqueues)); - - if (sc->num_squeues == 0 || sc->num_squeues > sc->max_queues) - sc->num_squeues = sc->max_queues; - if (sc->num_cqueues == 0 || sc->num_cqueues > sc->max_queues) - sc->num_cqueues = sc->max_queues; - - compl->cdw0 = (sc->num_squeues & 0xFFFF) | - ((sc->num_cqueues & 0xFFFF) << 16); - + nvme_set_feature_queues(sc, command, compl); break; case NVME_FEAT_INTERRUPT_COALESCING: DPRINTF((" interrupt coalescing 0x%x\r\n", command->cdw11)); @@ -709,7 +756,7 @@ nvme_opc_set_features(struct pci_nvme_softc* sc, struct nvme_command* command, DPRINTF((" interrupt vector configuration 0x%x\r\n", command->cdw11)); - for (uint32_t i = 0; i <= sc->num_cqueues; i++) { + for (uint32_t i = 0; i < sc->num_cqueues + 1; i++) { if (sc->compl_queues[i].intr_vec == iv) { if (command->cdw11 & (1 << 16)) sc->compl_queues[i].intr_en |= @@ -791,16 +838,7 @@ nvme_opc_get_features(struct pci_nvme_softc* sc, struct nvme_command* command, DPRINTF((" volatile write cache\r\n")); break; case NVME_FEAT_NUMBER_OF_QUEUES: - compl->cdw0 = 0; - if (sc->num_squeues == 0) - compl->cdw0 |= sc->max_queues & 0xFFFF; - else - compl->cdw0 |= sc->num_squeues & 0xFFFF; - - if (sc->num_cqueues == 0) - compl->cdw0 |= (sc->max_queues & 0xFFFF) << 16; - else - compl->cdw0 |= (sc->num_cqueues & 0xFFFF) << 16; + compl->cdw0 = NVME_FEATURE_NUM_QUEUES(sc); DPRINTF((" number of queues (submit %u, completion %u)\r\n", compl->cdw0 & 0xFFFF, @@ -954,6 +992,7 @@ pci_nvme_handle_admin_cmd(struct pci_nvme_softc* sc, uint64_t value) cq = &sc->compl_queues[0]; cp = &(cq->qbase)[cq->tail]; + cp->cdw0 = compl.cdw0; cp->sqid = 0; cp->sqhd = sqhead; cp->cid = cmd->cid; @@ -1819,7 +1858,7 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) /* allocate size of nvme registers + doorbell space for all queues */ pci_membar_sz = sizeof(struct nvme_registers) + - 2*sizeof(uint32_t)*(sc->max_queues); + 2*sizeof(uint32_t)*(sc->max_queues + 1); DPRINTF(("nvme membar size: %u\r\n", pci_membar_sz)); @@ -1829,7 +1868,7 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) goto done; } - error = pci_emul_add_msixcap(pi, sc->max_queues, NVME_MSIX_BAR); + error = pci_emul_add_msixcap(pi, sc->max_queues + 1, NVME_MSIX_BAR); if (error) { WPRINTF(("%s pci add msixcap failed\r\n", __func__)); goto done; diff --git a/usr/src/cmd/bhyve/pci_passthru.c b/usr/src/cmd/bhyve/pci_passthru.c index 2ed490c71a..3782914cd5 100644 --- a/usr/src/cmd/bhyve/pci_passthru.c +++ b/usr/src/cmd/bhyve/pci_passthru.c @@ -47,6 +47,9 @@ __FBSDID("$FreeBSD$"); #include <machine/iodev.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <stdio.h> #include <stdlib.h> #include <string.h> diff --git a/usr/src/cmd/bhyve/pci_virtio_block.c b/usr/src/cmd/bhyve/pci_virtio_block.c index 4040ed8305..b0c3b06187 100644 --- a/usr/src/cmd/bhyve/pci_virtio_block.c +++ b/usr/src/cmd/bhyve/pci_virtio_block.c @@ -79,7 +79,7 @@ __FBSDID("$FreeBSD$"); #define VTBLK_S_IOERR 1 #define VTBLK_S_UNSUPP 2 -#define VTBLK_BLK_ID_BYTES 20 +#define VTBLK_BLK_ID_BYTES 20 + 1 /* Capability bits */ #define VTBLK_F_SEG_MAX (1 << 2) /* Maximum request segments */ @@ -391,7 +391,8 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) MD5Init(&mdctx); MD5Update(&mdctx, opts, strlen(opts)); MD5Final(digest, &mdctx); - sprintf(sc->vbsc_ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X", + snprintf(sc->vbsc_ident, VTBLK_BLK_ID_BYTES, + "BHYVE-%02X%02X-%02X%02X-%02X%02X", digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]); /* setup virtio block config space */ diff --git a/usr/src/cmd/bhyve/pci_virtio_console.c b/usr/src/cmd/bhyve/pci_virtio_console.c index e1448780f1..90437662df 100644 --- a/usr/src/cmd/bhyve/pci_virtio_console.c +++ b/usr/src/cmd/bhyve/pci_virtio_console.c @@ -47,6 +47,9 @@ __FBSDID("$FreeBSD$"); #include <sys/socket.h> #include <sys/un.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <errno.h> #include <fcntl.h> @@ -347,7 +350,7 @@ pci_vtcon_sock_add(struct pci_vtcon_softc *sc, const char *name, #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE); - if (cap_rights_limit(s, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(s, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif diff --git a/usr/src/cmd/bhyve/pci_virtio_net.c b/usr/src/cmd/bhyve/pci_virtio_net.c index f5eadf4a2c..74efbcaee1 100644 --- a/usr/src/cmd/bhyve/pci_virtio_net.c +++ b/usr/src/cmd/bhyve/pci_virtio_net.c @@ -61,6 +61,9 @@ __FBSDID("$FreeBSD$"); #include <net/netmap_user.h> #endif +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <errno.h> #include <fcntl.h> @@ -881,7 +884,7 @@ pci_vtnet_tap_setup(struct pci_vtnet_softc *sc, char *devname) #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); - if (cap_rights_limit(sc->vsc_tapfd, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(sc->vsc_tapfd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif diff --git a/usr/src/cmd/bhyve/pci_virtio_rnd.c b/usr/src/cmd/bhyve/pci_virtio_rnd.c index 44bc55e003..5f470c03a6 100644 --- a/usr/src/cmd/bhyve/pci_virtio_rnd.c +++ b/usr/src/cmd/bhyve/pci_virtio_rnd.c @@ -43,6 +43,9 @@ __FBSDID("$FreeBSD$"); #include <sys/linker_set.h> #include <sys/uio.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <errno.h> #include <fcntl.h> @@ -158,7 +161,7 @@ pci_vtrnd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_READ); - if (cap_rights_limit(fd, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif @@ -168,6 +171,7 @@ pci_vtrnd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) len = read(fd, &v, sizeof(v)); if (len <= 0) { WPRINTF(("vtrnd: /dev/random not ready, read(): %d", len)); + close(fd); return (1); } diff --git a/usr/src/cmd/bhyve/pci_virtio_scsi.c b/usr/src/cmd/bhyve/pci_virtio_scsi.c index aa906bb854..238f07398b 100644 --- a/usr/src/cmd/bhyve/pci_virtio_scsi.c +++ b/usr/src/cmd/bhyve/pci_virtio_scsi.c @@ -105,7 +105,6 @@ struct pci_vtscsi_config { struct pci_vtscsi_queue { struct pci_vtscsi_softc * vsq_sc; struct vqueue_info * vsq_vq; - int vsq_ctl_fd; pthread_mutex_t vsq_mtx; pthread_mutex_t vsq_qmtx; pthread_cond_t vsq_cv; @@ -389,7 +388,7 @@ pci_vtscsi_tmf_handle(struct pci_vtscsi_softc *sc, ctl_scsi_zero_io(io); io->io_hdr.io_type = CTL_IO_TASK; - io->io_hdr.nexus.targ_port = tmf->lun[1]; + io->io_hdr.nexus.initid = sc->vss_iid; io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(tmf->lun); io->taskio.tag_type = CTL_TAG_SIMPLE; io->taskio.tag_num = (uint32_t)tmf->id; @@ -462,7 +461,7 @@ pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in, struct pci_vtscsi_req_cmd_wr *cmd_wr; struct iovec data_iov_in[VTSCSI_MAXSEG], data_iov_out[VTSCSI_MAXSEG]; union ctl_io *io; - size_t data_niov_in, data_niov_out; + int data_niov_in, data_niov_out; void *ext_data_ptr = NULL; uint32_t ext_data_len = 0, ext_sg_entries = 0; int err; @@ -472,15 +471,15 @@ pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in, seek_iov(iov_out, niov_out, data_iov_out, &data_niov_out, VTSCSI_OUT_HEADER_LEN(sc)); - truncate_iov(iov_in, niov_in, VTSCSI_IN_HEADER_LEN(sc)); - truncate_iov(iov_out, niov_out, VTSCSI_OUT_HEADER_LEN(sc)); + truncate_iov(iov_in, &niov_in, VTSCSI_IN_HEADER_LEN(sc)); + truncate_iov(iov_out, &niov_out, VTSCSI_OUT_HEADER_LEN(sc)); iov_to_buf(iov_in, niov_in, (void **)&cmd_rd); cmd_wr = malloc(VTSCSI_OUT_HEADER_LEN(sc)); io = ctl_scsi_alloc_io(sc->vss_iid); ctl_scsi_zero_io(io); - io->io_hdr.nexus.targ_port = cmd_rd->lun[1]; + io->io_hdr.nexus.initid = sc->vss_iid; io->io_hdr.nexus.targ_lun = pci_vtscsi_get_lun(cmd_rd->lun); io->io_hdr.io_type = CTL_IO_SCSI; @@ -499,7 +498,21 @@ pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in, io->scsiio.sense_len = sc->vss_config.sense_size; io->scsiio.tag_num = (uint32_t)cmd_rd->id; - io->scsiio.tag_type = CTL_TAG_SIMPLE; + switch (cmd_rd->task_attr) { + case VIRTIO_SCSI_S_ORDERED: + io->scsiio.tag_type = CTL_TAG_ORDERED; + break; + case VIRTIO_SCSI_S_HEAD: + io->scsiio.tag_type = CTL_TAG_HEAD_OF_QUEUE; + break; + case VIRTIO_SCSI_S_ACA: + io->scsiio.tag_type = CTL_TAG_ACA; + break; + case VIRTIO_SCSI_S_SIMPLE: + default: + io->scsiio.tag_type = CTL_TAG_SIMPLE; + break; + } io->scsiio.ext_sg_entries = ext_sg_entries; io->scsiio.ext_data_ptr = ext_data_ptr; io->scsiio.ext_data_len = ext_data_len; @@ -515,7 +528,7 @@ pci_vtscsi_request_handle(struct pci_vtscsi_queue *q, struct iovec *iov_in, sbuf_delete(sb); } - err = ioctl(q->vsq_ctl_fd, CTL_IO, io); + err = ioctl(sc->vss_ctl_fd, CTL_IO, io); if (err != 0) { WPRINTF(("CTL_IO: err=%d (%s)\n", errno, strerror(errno))); cmd_wr->response = VIRTIO_SCSI_S_FAILURE; @@ -552,7 +565,8 @@ pci_vtscsi_controlq_notify(void *vsc, struct vqueue_info *vq) n = vq_getchain(vq, &idx, iov, VTSCSI_MAXSEG, NULL); bufsize = iov_to_buf(iov, n, &buf); iolen = pci_vtscsi_control_handle(sc, buf, bufsize); - buf_to_iov(buf + bufsize - iolen, iolen, iov, n, iolen); + buf_to_iov(buf + bufsize - iolen, iolen, iov, n, + bufsize - iolen); /* * Release this chain and handle more @@ -560,6 +574,7 @@ pci_vtscsi_controlq_notify(void *vsc, struct vqueue_info *vq) vq_relchain(vq, idx, iolen); } vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ + free(buf); } static void @@ -623,14 +638,8 @@ pci_vtscsi_init_queue(struct pci_vtscsi_softc *sc, int i; queue->vsq_sc = sc; - queue->vsq_ctl_fd = open("/dev/cam/ctl", O_RDWR); queue->vsq_vq = &sc->vss_vq[num + 2]; - if (queue->vsq_ctl_fd < 0) { - WPRINTF(("cannot open /dev/cam/ctl: %s\n", strerror(errno))); - return (-1); - } - pthread_mutex_init(&queue->vsq_mtx, NULL); pthread_mutex_init(&queue->vsq_qmtx, NULL); pthread_cond_init(&queue->vsq_cv, NULL); @@ -656,26 +665,36 @@ static int pci_vtscsi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { struct pci_vtscsi_softc *sc; - char *optname = NULL; - char *opt; - int i; + char *opt, *optname; + const char *devname; + int i, optidx = 0; sc = calloc(1, sizeof(struct pci_vtscsi_softc)); - sc->vss_ctl_fd = open("/dev/cam/ctl", O_RDWR); + devname = "/dev/cam/ctl"; + while ((opt = strsep(&opts, ",")) != NULL) { + optname = strsep(&opt, "="); + if (opt == NULL && optidx == 0) { + if (optname[0] != 0) + devname = optname; + } else if (strcmp(optname, "dev") == 0 && opt != NULL) { + devname = opt; + } else if (strcmp(optname, "iid") == 0 && opt != NULL) { + sc->vss_iid = strtoul(opt, NULL, 10); + } else { + fprintf(stderr, "Invalid option %s\n", optname); + free(sc); + return (1); + } + optidx++; + } + sc->vss_ctl_fd = open(devname, O_RDWR); if (sc->vss_ctl_fd < 0) { - WPRINTF(("cannot open /dev/cam/ctl: %s\n", strerror(errno))); + WPRINTF(("cannot open %s: %s\n", devname, strerror(errno))); + free(sc); return (1); } - while ((opt = strsep(&opts, ",")) != NULL) { - if ((optname = strsep(&opt, "=")) != NULL) { - if (strcmp(optname, "iid") == 0) { - sc->vss_iid = strtoul(opt, NULL, 10); - } - } - } - vi_softc_linkup(&sc->vss_vs, &vtscsi_vi_consts, sc, pi, sc->vss_vq); sc->vss_vs.vs_mtx = &sc->vss_mtx; diff --git a/usr/src/cmd/bhyve/pci_xhci.c b/usr/src/cmd/bhyve/pci_xhci.c index be87453bf1..988e6933cc 100644 --- a/usr/src/cmd/bhyve/pci_xhci.c +++ b/usr/src/cmd/bhyve/pci_xhci.c @@ -2640,7 +2640,11 @@ pci_xhci_parse_opts(struct pci_xhci_softc *sc, char *opts) struct pci_xhci_dev_emu *dev; struct usb_devemu *ue; void *devsc; +#ifdef __FreeBSD__ char *uopt, *xopts, *config; +#else + char *uopt = NULL, *xopts, *config; +#endif int usb3_port, usb2_port, i; usb3_port = sc->usb3_port_start - 1; @@ -2717,6 +2721,10 @@ pci_xhci_parse_opts(struct pci_xhci_softc *sc, char *opts) sc->ndevices++; } +#ifdef __FreeBSD__ + if (uopt != NULL) + free(uopt); +#endif portsfinal: sc->portregs = calloc(XHCI_MAX_DEVS, sizeof(struct pci_xhci_portregs)); @@ -2746,6 +2754,7 @@ done: free(devices); } } + free(uopt); return (sc->ndevices); } diff --git a/usr/src/cmd/bhyve/ps2kbd.c b/usr/src/cmd/bhyve/ps2kbd.c index ae82957ffa..5453a26949 100644 --- a/usr/src/cmd/bhyve/ps2kbd.c +++ b/usr/src/cmd/bhyve/ps2kbd.c @@ -76,6 +76,107 @@ struct ps2kbd_softc { uint8_t curcmd; /* current command for next byte */ }; +#define SCANCODE_E0_PREFIX 1 +struct extended_translation { + uint32_t keysym; + uint8_t scancode; + int flags; +}; + +/* + * FIXME: Pause/break and Print Screen/SysRq require special handling. + */ +static const struct extended_translation extended_translations[] = { + {0xff08, 0x66}, /* Back space */ + {0xff09, 0x0d}, /* Tab */ + {0xff0d, 0x5a}, /* Return */ + {0xff1b, 0x76}, /* Escape */ + {0xff50, 0x6c, SCANCODE_E0_PREFIX}, /* Home */ + {0xff51, 0x6b, SCANCODE_E0_PREFIX}, /* Left arrow */ + {0xff52, 0x75, SCANCODE_E0_PREFIX}, /* Up arrow */ + {0xff53, 0x74, SCANCODE_E0_PREFIX}, /* Right arrow */ + {0xff54, 0x72, SCANCODE_E0_PREFIX}, /* Down arrow */ + {0xff55, 0x7d, SCANCODE_E0_PREFIX}, /* PgUp */ + {0xff56, 0x7a, SCANCODE_E0_PREFIX}, /* PgDown */ + {0xff57, 0x69, SCANCODE_E0_PREFIX}, /* End */ + {0xff63, 0x70, SCANCODE_E0_PREFIX}, /* Ins */ + {0xff8d, 0x5a, SCANCODE_E0_PREFIX}, /* Keypad Enter */ + {0xffe1, 0x12}, /* Left shift */ + {0xffe2, 0x59}, /* Right shift */ + {0xffe3, 0x14}, /* Left control */ + {0xffe4, 0x14, SCANCODE_E0_PREFIX}, /* Right control */ + /* {0xffe7, XXX}, Left meta */ + /* {0xffe8, XXX}, Right meta */ + {0xffe9, 0x11}, /* Left alt */ + {0xfe03, 0x11, SCANCODE_E0_PREFIX}, /* AltGr */ + {0xffea, 0x11, SCANCODE_E0_PREFIX}, /* Right alt */ + {0xffeb, 0x1f, SCANCODE_E0_PREFIX}, /* Left Windows */ + {0xffec, 0x27, SCANCODE_E0_PREFIX}, /* Right Windows */ + {0xffbe, 0x05}, /* F1 */ + {0xffbf, 0x06}, /* F2 */ + {0xffc0, 0x04}, /* F3 */ + {0xffc1, 0x0c}, /* F4 */ + {0xffc2, 0x03}, /* F5 */ + {0xffc3, 0x0b}, /* F6 */ + {0xffc4, 0x83}, /* F7 */ + {0xffc5, 0x0a}, /* F8 */ + {0xffc6, 0x01}, /* F9 */ + {0xffc7, 0x09}, /* F10 */ + {0xffc8, 0x78}, /* F11 */ + {0xffc9, 0x07}, /* F12 */ + {0xffff, 0x71, SCANCODE_E0_PREFIX}, /* Del */ + {0xff14, 0x7e}, /* ScrollLock */ + /* NumLock and Keypads*/ + {0xff7f, 0x77}, /* NumLock */ + {0xffaf, 0x4a, SCANCODE_E0_PREFIX}, /* Keypad slash */ + {0xffaa, 0x7c}, /* Keypad asterisk */ + {0xffad, 0x7b}, /* Keypad minus */ + {0xffab, 0x79}, /* Keypad plus */ + {0xffb7, 0x6c}, /* Keypad 7 */ + {0xff95, 0x6c}, /* Keypad home */ + {0xffb8, 0x75}, /* Keypad 8 */ + {0xff97, 0x75}, /* Keypad up arrow */ + {0xffb9, 0x7d}, /* Keypad 9 */ + {0xff9a, 0x7d}, /* Keypad PgUp */ + {0xffb4, 0x6b}, /* Keypad 4 */ + {0xff96, 0x6b}, /* Keypad left arrow */ + {0xffb5, 0x73}, /* Keypad 5 */ + {0xff9d, 0x73}, /* Keypad empty */ + {0xffb6, 0x74}, /* Keypad 6 */ + {0xff98, 0x74}, /* Keypad right arrow */ + {0xffb1, 0x69}, /* Keypad 1 */ + {0xff9c, 0x69}, /* Keypad end */ + {0xffb2, 0x72}, /* Keypad 2 */ + {0xff99, 0x72}, /* Keypad down arrow */ + {0xffb3, 0x7a}, /* Keypad 3 */ + {0xff9b, 0x7a}, /* Keypad PgDown */ + {0xffb0, 0x70}, /* Keypad 0 */ + {0xff9e, 0x70}, /* Keypad ins */ + {0xffae, 0x71}, /* Keypad . */ + {0xff9f, 0x71}, /* Keypad del */ + {0, 0, 0} /* Terminator */ +}; + +/* ASCII to type 2 scancode lookup table */ +static const uint8_t ascii_translations[128] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x29, 0x16, 0x52, 0x26, 0x25, 0x2e, 0x3d, 0x52, + 0x46, 0x45, 0x3e, 0x55, 0x41, 0x4e, 0x49, 0x4a, + 0x45, 0x16, 0x1e, 0x26, 0x25, 0x2e, 0x36, 0x3d, + 0x3e, 0x46, 0x4c, 0x4c, 0x41, 0x55, 0x49, 0x4a, + 0x1e, 0x1c, 0x32, 0x21, 0x23, 0x24, 0x2b, 0x34, + 0x33, 0x43, 0x3b, 0x42, 0x4b, 0x3a, 0x31, 0x44, + 0x4d, 0x15, 0x2d, 0x1b, 0x2c, 0x3c, 0x2a, 0x1d, + 0x22, 0x35, 0x1a, 0x54, 0x5d, 0x5b, 0x36, 0x4e, + 0x0e, 0x1c, 0x32, 0x21, 0x23, 0x24, 0x2b, 0x34, + 0x33, 0x43, 0x3b, 0x42, 0x4b, 0x3a, 0x31, 0x44, + 0x4d, 0x15, 0x2d, 0x1b, 0x2c, 0x3c, 0x2a, 0x1d, + 0x22, 0x35, 0x1a, 0x54, 0x5d, 0x5b, 0x0e, 0x00, +}; + static void fifo_init(struct ps2kbd_softc *sc) { @@ -212,236 +313,38 @@ static void ps2kbd_keysym_queue(struct ps2kbd_softc *sc, int down, uint32_t keysym) { - /* ASCII to type 2 scancode lookup table */ - const uint8_t translation[128] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x29, 0x16, 0x52, 0x26, 0x25, 0x2e, 0x3d, 0x52, - 0x46, 0x45, 0x3e, 0x55, 0x41, 0x4e, 0x49, 0x4a, - 0x45, 0x16, 0x1e, 0x26, 0x25, 0x2e, 0x36, 0x3d, - 0x3e, 0x46, 0x4c, 0x4c, 0x41, 0x55, 0x49, 0x4a, - 0x1e, 0x1c, 0x32, 0x21, 0x23, 0x24, 0x2b, 0x34, - 0x33, 0x43, 0x3b, 0x42, 0x4b, 0x3a, 0x31, 0x44, - 0x4d, 0x15, 0x2d, 0x1b, 0x2c, 0x3c, 0x2a, 0x1d, - 0x22, 0x35, 0x1a, 0x54, 0x5d, 0x5b, 0x36, 0x4e, - 0x0e, 0x1c, 0x32, 0x21, 0x23, 0x24, 0x2b, 0x34, - 0x33, 0x43, 0x3b, 0x42, 0x4b, 0x3a, 0x31, 0x44, - 0x4d, 0x15, 0x2d, 0x1b, 0x2c, 0x3c, 0x2a, 0x1d, - 0x22, 0x35, 0x1a, 0x54, 0x5d, 0x5b, 0x0e, 0x00, - }; - assert(pthread_mutex_isowned_np(&sc->mtx)); + int e0_prefix, found; + uint8_t code; + const struct extended_translation *trans; + + found = 0; + if (keysym < 0x80) { + code = ascii_translations[keysym]; + e0_prefix = 0; + found = 1; + } else { + for (trans = &(extended_translations[0]); trans->keysym != 0; + trans++) { + if (keysym == trans->keysym) { + code = trans->scancode; + e0_prefix = trans->flags & SCANCODE_E0_PREFIX; + found = 1; + break; + } + } + } - switch (keysym) { - case 0x0 ... 0x7f: - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, translation[keysym]); - break; - case 0xff08: /* Back space */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x66); - break; - case 0xff09: /* Tab */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x0d); - break; - case 0xff0d: /* Return */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x5a); - break; - case 0xff1b: /* Escape */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x76); - break; - case 0xff50: /* Home */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x6c); - break; - case 0xff51: /* Left arrow */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x6b); - break; - case 0xff52: /* Up arrow */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x75); - break; - case 0xff53: /* Right arrow */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x74); - break; - case 0xff54: /* Down arrow */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x72); - break; - case 0xff55: /* PgUp */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x7d); - break; - case 0xff56: /* PgDwn */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x7a); - break; - case 0xff57: /* End */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x69); - break; - case 0xff63: /* Ins */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x70); - break; - case 0xff8d: /* Keypad Enter */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x5a); - break; - case 0xffe1: /* Left shift */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x12); - break; - case 0xffe2: /* Right shift */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x59); - break; - case 0xffe3: /* Left control */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x14); - break; - case 0xffe4: /* Right control */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x14); - break; - case 0xffe7: /* Left meta */ - /* XXX */ - break; - case 0xffe8: /* Right meta */ - /* XXX */ - break; - case 0xffe9: /* Left alt */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x11); - break; - case 0xfe03: /* AltGr */ - case 0xffea: /* Right alt */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x11); - break; - case 0xffeb: /* Left Windows */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x1f); - break; - case 0xffec: /* Right Windows */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x27); - break; - case 0xffbe: /* F1 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x05); - break; - case 0xffbf: /* F2 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x06); - break; - case 0xffc0: /* F3 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x04); - break; - case 0xffc1: /* F4 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x0C); - break; - case 0xffc2: /* F5 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x03); - break; - case 0xffc3: /* F6 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x0B); - break; - case 0xffc4: /* F7 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x83); - break; - case 0xffc5: /* F8 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x0A); - break; - case 0xffc6: /* F9 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x01); - break; - case 0xffc7: /* F10 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x09); - break; - case 0xffc8: /* F11 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x78); - break; - case 0xffc9: /* F12 */ - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x07); - break; - case 0xffff: /* Del */ - fifo_put(sc, 0xe0); - if (!down) - fifo_put(sc, 0xf0); - fifo_put(sc, 0x71); - break; - default: - fprintf(stderr, "Unhandled ps2 keyboard keysym 0x%x\n", - keysym); - break; + if (!found) { + fprintf(stderr, "Unhandled ps2 keyboard keysym 0x%x\n", keysym); + return; } + + if (e0_prefix) + fifo_put(sc, 0xe0); + if (!down) + fifo_put(sc, 0xf0); + fifo_put(sc, code); } static void diff --git a/usr/src/cmd/bhyve/rfb.c b/usr/src/cmd/bhyve/rfb.c index f761646fc7..39ea1611f9 100644 --- a/usr/src/cmd/bhyve/rfb.c +++ b/usr/src/cmd/bhyve/rfb.c @@ -43,8 +43,12 @@ __FBSDID("$FreeBSD$"); #include <machine/cpufunc.h> #include <machine/specialreg.h> #include <netinet/in.h> +#include <netdb.h> #include <assert.h> +#ifndef WITHOUT_CAPSICUM +#include <capsicum_helpers.h> +#endif #include <err.h> #include <errno.h> #include <pthread.h> @@ -971,8 +975,11 @@ sse42_supported(void) int rfb_init(char *hostname, int port, int wait, char *password) { + int e; + char servname[6]; struct rfb_softc *rc; - struct sockaddr_in sin; + struct addrinfo *ai; + struct addrinfo hints; int on = 1; #ifndef WITHOUT_CAPSICUM cap_rights_t rights; @@ -989,37 +996,49 @@ rfb_init(char *hostname, int port, int wait, char *password) rc->password = password; - rc->sfd = socket(AF_INET, SOCK_STREAM, 0); + snprintf(servname, sizeof(servname), "%d", port ? port : 5900); + + if (!hostname || strlen(hostname) == 0) +#if defined(INET) + hostname = "127.0.0.1"; +#elif defined(INET6) + hostname = "[::1]"; +#endif + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV | AI_PASSIVE; + + if ((e = getaddrinfo(hostname, servname, &hints, &ai)) != 0) { + fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(e)); + return(-1); + } + + rc->sfd = socket(ai->ai_family, ai->ai_socktype, 0); if (rc->sfd < 0) { perror("socket"); + freeaddrinfo(ai); return (-1); } setsockopt(rc->sfd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); -#ifdef __FreeBSD__ - sin.sin_len = sizeof(sin); -#endif - sin.sin_family = AF_INET; - sin.sin_port = port ? htons(port) : htons(5900); - if (hostname && strlen(hostname) > 0) - inet_pton(AF_INET, hostname, &(sin.sin_addr)); - else - sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - - if (bind(rc->sfd, (struct sockaddr *)&sin, sizeof(sin)) < 0) { + if (bind(rc->sfd, ai->ai_addr, ai->ai_addrlen) < 0) { perror("bind"); + freeaddrinfo(ai); return (-1); } if (listen(rc->sfd, 1) < 0) { perror("listen"); + freeaddrinfo(ai); return (-1); } #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE); - if (cap_rights_limit(rc->sfd, &rights) == -1 && errno != ENOSYS) + if (caph_rights_limit(rc->sfd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif @@ -1041,6 +1060,7 @@ rfb_init(char *hostname, int port, int wait, char *password) pthread_mutex_unlock(&rc->mtx); } + freeaddrinfo(ai); return (0); } diff --git a/usr/src/cmd/bhyve/uart_emul.c b/usr/src/cmd/bhyve/uart_emul.c index ac912991f0..1027d0b0f6 100644 --- a/usr/src/cmd/bhyve/uart_emul.c +++ b/usr/src/cmd/bhyve/uart_emul.c @@ -938,14 +938,12 @@ uart_set_backend(struct uart_softc *sc, const char *opts) #ifndef WITHOUT_CAPSICUM cap_rights_init(&rights, CAP_EVENT, CAP_IOCTL, CAP_READ, CAP_WRITE); - if (cap_rights_limit(sc->tty.fd, &rights) == -1 && - errno != ENOSYS) + if (caph_rights_limit(sc->tty.fd, &rights) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); - if (cap_ioctls_limit(sc->tty.fd, cmds, nitems(cmds)) == -1 && - errno != ENOSYS) + if (caph_ioctls_limit(sc->tty.fd, cmds, nitems(cmds)) == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); if (!uart_stdio) { - if (caph_limit_stdin() == -1 && errno != ENOSYS) + if (caph_limit_stdin() == -1) errx(EX_OSERR, "Unable to apply rights for sandbox"); } diff --git a/usr/src/compat/freebsd/amd64/machine/cpufunc.h b/usr/src/compat/freebsd/amd64/machine/cpufunc.h index 005a76b305..0b7bcdaa59 100644 --- a/usr/src/compat/freebsd/amd64/machine/cpufunc.h +++ b/usr/src/compat/freebsd/amd64/machine/cpufunc.h @@ -288,5 +288,24 @@ load_dr7(uint64_t dr7) __asm __volatile("movq %0,%%dr7" : : "r" (dr7)); } +#ifdef _KERNEL +/* + * Including the native sys/segments.h in userspace seriously conflicts with + * the FreeBSD compat/contrib headers. + */ +#include <sys/segments.h> + +static __inline void +lldt(u_short sel) +{ + wr_ldtr(sel); +} + +static __inline u_short +sldt() +{ + return (rd_ldtr()); +} +#endif /* _KERNEL */ #endif /* _COMPAT_FREEBSD_AMD64_MACHINE_CPUFUNC_H_ */ diff --git a/usr/src/compat/freebsd/amd64/machine/param.h b/usr/src/compat/freebsd/amd64/machine/param.h index eaca5ab8d7..b152f4d526 100644 --- a/usr/src/compat/freebsd/amd64/machine/param.h +++ b/usr/src/compat/freebsd/amd64/machine/param.h @@ -36,4 +36,6 @@ /* Size of the level 4 page-map level-4 table units */ #define NPML4EPG (PAGE_SIZE/(sizeof (pml4_entry_t))) +#define CACHE_LINE_SIZE 64 + #endif /* _COMPAT_FREEBSD_AMD64_MACHINE_PARAM_H_ */ diff --git a/usr/src/compat/freebsd/amd64/machine/specialreg.h b/usr/src/compat/freebsd/amd64/machine/specialreg.h index 59fc064a4c..e1e6543701 100644 --- a/usr/src/compat/freebsd/amd64/machine/specialreg.h +++ b/usr/src/compat/freebsd/amd64/machine/specialreg.h @@ -36,9 +36,25 @@ #undef CR4_PCE #undef CR4_VMXE #undef CR4_SMEP +#undef CR4_SMAP #undef CR4_FSGSBASE #undef CR4_PCIDE #endif /* _SYS_CONTROLREGS_H */ +#ifdef _SYS_X86_ARCHEXT_H +/* Our IA32 speculation-related defines conflict with BSD header */ +#undef IA32_ARCH_CAP_RDCL_NO +#undef IA32_ARCH_CAP_IBRS_ALL +#undef IA32_ARCH_CAP_RSBA +#undef IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY +#undef IA32_ARCH_CAP_SSB_NO +#undef IA32_SPEC_CTRL_IBRS +#undef IA32_SPEC_CTRL_STIBP +#undef IA32_SPEC_CTRL_SSBD +#undef IA32_FLUSH_CMD_L1D +#undef MSR_IA32_SPEC_CTRL +#undef MSR_IA32_PRED_CMD +#endif /* _SYS_X86_ARCHEXT_H */ + #include <x86/specialreg.h> #endif /* _COMPAT_FREEBSD_AMD64_MACHINE_SPECIALREG_H_ */ diff --git a/usr/src/pkg/manifests/system-test-zfstest.mf b/usr/src/pkg/manifests/system-test-zfstest.mf index 88f6f6bdc6..19f249965a 100644 --- a/usr/src/pkg/manifests/system-test-zfstest.mf +++ b/usr/src/pkg/manifests/system-test-zfstest.mf @@ -111,6 +111,7 @@ dir path=opt/zfs-tests/tests/functional/history dir path=opt/zfs-tests/tests/functional/holes dir path=opt/zfs-tests/tests/functional/inheritance dir path=opt/zfs-tests/tests/functional/inuse +dir path=opt/zfs-tests/tests/functional/large_dnode dir path=opt/zfs-tests/tests/functional/large_files dir path=opt/zfs-tests/tests/functional/largest_pool dir path=opt/zfs-tests/tests/functional/libzfs @@ -2202,6 +2203,26 @@ file path=opt/zfs-tests/tests/functional/inuse/inuse_007_pos mode=0555 file path=opt/zfs-tests/tests/functional/inuse/inuse_008_pos mode=0555 file path=opt/zfs-tests/tests/functional/inuse/inuse_009_pos mode=0555 file path=opt/zfs-tests/tests/functional/inuse/setup mode=0555 +file path=opt/zfs-tests/tests/functional/large_dnode/cleanup mode=0555 +file path=opt/zfs-tests/tests/functional/large_dnode/large_dnode_001_pos \ + mode=0555 +file path=opt/zfs-tests/tests/functional/large_dnode/large_dnode_002_pos \ + mode=0555 +file path=opt/zfs-tests/tests/functional/large_dnode/large_dnode_003_pos \ + mode=0555 +file path=opt/zfs-tests/tests/functional/large_dnode/large_dnode_004_neg \ + mode=0555 +file path=opt/zfs-tests/tests/functional/large_dnode/large_dnode_005_pos \ + mode=0555 +file path=opt/zfs-tests/tests/functional/large_dnode/large_dnode_006_pos \ + mode=0555 +file path=opt/zfs-tests/tests/functional/large_dnode/large_dnode_007_neg \ + mode=0555 +file path=opt/zfs-tests/tests/functional/large_dnode/large_dnode_008_pos \ + mode=0555 +file path=opt/zfs-tests/tests/functional/large_dnode/large_dnode_009_pos \ + mode=0555 +file path=opt/zfs-tests/tests/functional/large_dnode/setup mode=0555 file path=opt/zfs-tests/tests/functional/large_files/cleanup mode=0555 file path=opt/zfs-tests/tests/functional/large_files/large_files_001_pos \ mode=0555 diff --git a/usr/src/test/zfs-tests/runfiles/delphix.run b/usr/src/test/zfs-tests/runfiles/delphix.run index 57ed0506e3..1f7ce39c8c 100644 --- a/usr/src/test/zfs-tests/runfiles/delphix.run +++ b/usr/src/test/zfs-tests/runfiles/delphix.run @@ -432,6 +432,10 @@ tests = ['inuse_001_pos', 'inuse_003_pos', 'inuse_004_pos', 'inuse_009_pos'] post = +[/opt/zfs-tests/tests/functional/large_dnode] +tests = ['large_dnode_001_pos', 'large_dnode_003_pos', + 'large_dnode_004_neg', 'large_dnode_005_pos', 'large_dnode_007_neg'] + [/opt/zfs-tests/tests/functional/large_files] tests = ['large_files_001_pos'] diff --git a/usr/src/test/zfs-tests/runfiles/omnios.run b/usr/src/test/zfs-tests/runfiles/omnios.run index ca3181af77..a8bad95433 100644 --- a/usr/src/test/zfs-tests/runfiles/omnios.run +++ b/usr/src/test/zfs-tests/runfiles/omnios.run @@ -400,6 +400,10 @@ tests = ['inuse_001_pos', 'inuse_003_pos', 'inuse_004_pos', 'inuse_009_pos'] post = +[/opt/zfs-tests/tests/functional/large_dnode] +tests = ['large_dnode_001_pos', 'large_dnode_003_pos', + 'large_dnode_004_neg', 'large_dnode_005_pos', 'large_dnode_007_neg'] + [/opt/zfs-tests/tests/functional/large_files] tests = ['large_files_001_pos'] diff --git a/usr/src/test/zfs-tests/runfiles/openindiana.run b/usr/src/test/zfs-tests/runfiles/openindiana.run index 43a2a641c8..fc3131dc76 100644 --- a/usr/src/test/zfs-tests/runfiles/openindiana.run +++ b/usr/src/test/zfs-tests/runfiles/openindiana.run @@ -400,6 +400,10 @@ tests = ['inuse_001_pos', 'inuse_003_pos', 'inuse_004_pos', 'inuse_009_pos'] post = +[/opt/zfs-tests/tests/functional/large_dnode] +tests = ['large_dnode_001_pos', 'large_dnode_003_pos', + 'large_dnode_004_neg', 'large_dnode_005_pos', 'large_dnode_007_neg'] + [/opt/zfs-tests/tests/functional/large_files] tests = ['large_files_001_pos'] diff --git a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index f65d065d7c..76b7a1582b 100644 --- a/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/usr/src/test/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -70,4 +70,5 @@ typeset -a properties=( "feature@skein" "feature@edonr" "feature@device_removal" + "feature@large_dnode" ) diff --git a/usr/src/test/zfs-tests/tests/functional/delegate/delegate_common.kshlib b/usr/src/test/zfs-tests/tests/functional/delegate/delegate_common.kshlib index 771ada74a0..71d7a45c2f 100644 --- a/usr/src/test/zfs-tests/tests/functional/delegate/delegate_common.kshlib +++ b/usr/src/test/zfs-tests/tests/functional/delegate/delegate_common.kshlib @@ -248,6 +248,10 @@ function check_fs_perm verify_fs_canmount $user $perm $fs ret=$? ;; + dnodesize) + verify_fs_dnodesize $user $perm $fs + ret=$? + ;; recordsize) verify_fs_recordsize $user $perm $fs ret=$? @@ -1098,6 +1102,21 @@ function verify_fs_recordsize return 0 } +function verify_fs_dnodesize +{ + typeset user=$1 + typeset perm=$2 + typeset fs=$3 + value="2k" + + user_run $user zfs set dnodesize=$value $fs + if [[ $value != $(get_prop dnodesize $fs) ]]; then + return 1 + fi + + return 0 +} + function verify_fs_quota { typeset user=$1 diff --git a/usr/src/test/zfs-tests/tests/functional/delegate/zfs_allow_010_pos.ksh b/usr/src/test/zfs-tests/tests/functional/delegate/zfs_allow_010_pos.ksh index 98e58f9097..030d7a9179 100644 --- a/usr/src/test/zfs-tests/tests/functional/delegate/zfs_allow_010_pos.ksh +++ b/usr/src/test/zfs-tests/tests/functional/delegate/zfs_allow_010_pos.ksh @@ -59,6 +59,7 @@ set -A perms create true false \ allow true true \ quota true false \ reservation true true \ + dnodesize true false \ recordsize true false \ mountpoint true false \ checksum true true \ diff --git a/usr/src/test/zfs-tests/tests/functional/delegate/zfs_allow_012_neg.ksh b/usr/src/test/zfs-tests/tests/functional/delegate/zfs_allow_012_neg.ksh index 31e51c819c..97561821f3 100644 --- a/usr/src/test/zfs-tests/tests/functional/delegate/zfs_allow_012_neg.ksh +++ b/usr/src/test/zfs-tests/tests/functional/delegate/zfs_allow_012_neg.ksh @@ -58,7 +58,7 @@ log_onexit cleanup set -A perms create snapshot mount send allow quota reservation \ recordsize mountpoint checksum compression canmount atime \ devices exec volsize setuid readonly snapdir userprop \ - aclmode aclinherit rollback clone rename promote \ + aclmode aclinherit rollback clone rename promote dnodesize \ zoned xattr receive destroy sharenfs share log_must zpool set delegation=off $TESTPOOL diff --git a/usr/src/test/zfs-tests/tests/functional/large_dnode/Makefile b/usr/src/test/zfs-tests/tests/functional/large_dnode/Makefile new file mode 100644 index 0000000000..fa523a8279 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/large_dnode/Makefile @@ -0,0 +1,21 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2013, 2016 by Delphix. All rights reserved. +# + +include $(SRC)/Makefile.master + +ROOTOPTPKG = $(ROOT)/opt/zfs-tests +TARGETDIR = $(ROOTOPTPKG)/tests/functional/large_dnode + +include $(SRC)/test/zfs-tests/Makefile.com diff --git a/usr/src/test/zfs-tests/tests/functional/large_dnode/cleanup.ksh b/usr/src/test/zfs-tests/tests/functional/large_dnode/cleanup.ksh new file mode 100755 index 0000000000..61caf39100 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/large_dnode/cleanup.ksh @@ -0,0 +1,25 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_001_pos.ksh b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_001_pos.ksh new file mode 100755 index 0000000000..c07f4e8d74 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_001_pos.ksh @@ -0,0 +1,77 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify that the dnode sizes of newly created files are consistent +# with the dnodesize dataset property. +# +# STRATEGY: +# 1. Create a file system +# 2. Set dnodesize to a legal literal value +# 3. Create a file +# 4. Repeat 2-3 for all legal literal values of dnodesize values +# 5. Unmount the file system +# 6. Use zdb to check expected dnode sizes +# + +TEST_FS=$TESTPOOL/large_dnode + +verify_runnable "both" + +function cleanup +{ + datasetexists $TEST_FS && log_must zfs destroy $TEST_FS +} + +log_onexit cleanup +log_assert "dnode sizes are consistent with dnodesize dataset property" + +log_must zfs create $TEST_FS + +set -A dnsizes "512" "1k" "2k" "4k" "8k" "16k" +set -A inodes + +for ((i=0; i < ${#dnsizes[*]}; i++)) ; do + size=${dnsizes[$i]} + if [[ $size == "512" ]] ; then + size="legacy" + fi + file=/$TEST_FS/file.$size + log_must zfs set dnsize=$size $TEST_FS + touch $file + inodes[$i]=$(ls -li $file | awk '{print $1}') +done + +log_must zfs umount $TEST_FS + +for ((i=0; i < ${#dnsizes[*]}; i++)) ; do + dnsize=$(zdb -dddd $TEST_FS ${inodes[$i]} | + awk '/ZFS plain file/ {print $6}' | tr K k) + if [[ "$dnsize" != "${dnsizes[$i]}" ]]; then + log_fail "dnode size is $dnsize (expected ${dnsizes[$i]})" + fi +done + +log_pass "dnode sizes are consistent with dnodesize dataset property" diff --git a/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_002_pos.ksh b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_002_pos.ksh new file mode 100755 index 0000000000..1dd8d888c3 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_002_pos.ksh @@ -0,0 +1,78 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify that extended attributes can use extra bonus space of a large +# dnode without kicking in a spill block. +# +# STRATEGY: +# 1. Create a file system with xattr=sa +# 2. Set dnodesize to a legal literal value +# 3. Create a file +# 4 Store an xattr that fits within the dnode size +# 4. Repeat 2-3 for all legal literal values of dnodesize values +# 5. Unmount the file system +# 6. Use zdb to check for missing SPILL_BLKPTR flag +# + +TEST_FS=$TESTPOOL/large_dnode + +verify_runnable "both" + +function cleanup +{ + datasetexists $TEST_FS && log_must zfs destroy $TEST_FS +} + +log_onexit cleanup +log_assert "extended attributes use extra bonus space of a large dnode" + +log_must zfs create -o xattr=sa $TEST_FS + +# Store dnode size minus 512 in an xattr +set -A xattr_sizes "512" "1536" "3584" "7680" "15872" +set -A prop_values "1k" "2k" "4k" "8k" "16k" +set -A inodes + +for ((i=0; i < ${#prop_values[*]}; i++)) ; do + prop_val=${prop_values[$i]} + file=/$TEST_FS/file.$prop_val + log_must zfs set dnsize=$prop_val $TEST_FS + touch $file + xattr_size=${xattr_sizes[$i]} + xattr_name=user.foo + xattr_val=$(dd if=/dev/urandom bs=1 count=$xattr_size | + openssl enc -a -A) + log_must setfattr -n $xattr_name -v 0s$xattr_val $file + inodes[$i]=$(ls -li $file | awk '{print $1}') +done + +log_must zfs umount $TEST_FS + +for ((i=0; i < ${#inodes[*]}; i++)) ; do + log_mustnot eval "zdb -dddd $TEST_FS ${inodes[$i]} | grep SPILL_BLKPTR" +done + +log_pass "extended attributes use extra bonus space of a large dnode" diff --git a/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_003_pos.ksh b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_003_pos.ksh new file mode 100755 index 0000000000..a938c7de7c --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_003_pos.ksh @@ -0,0 +1,65 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +function cleanup +{ + if datasetexists $LDNPOOL ; then + log_must zpool destroy -f $LDNPOOL + fi +} + +log_onexit cleanup + +log_assert "feature correctly switches between enabled and active" + +LDNPOOL=ldnpool +LDNFS=$LDNPOOL/large_dnode +log_must mkfile 64M $TESTDIR/$LDNPOOL +log_must zpool create $LDNPOOL $TESTDIR/$LDNPOOL + + +state=$(zpool list -Ho feature@large_dnode $LDNPOOL) +if [[ "$state" != "enabled" ]]; then + log_fail "large_dnode has state $state (expected enabled)" +fi + +log_must zfs create -o dnodesize=1k $LDNFS +log_must touch /$LDNFS/foo +log_must zfs unmount $LDNFS + +state=$(zpool list -Ho feature@large_dnode $LDNPOOL) +if [[ "$state" != "active" ]]; then + log_fail "large_dnode has state $state (expected active)" +fi + +log_must zfs destroy $LDNFS + +state=$(zpool list -Ho feature@large_dnode $LDNPOOL) +if [[ "$state" != "enabled" ]]; then + log_fail "large_dnode has state $state (expected enabled)" +fi + +log_pass diff --git a/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_004_neg.ksh b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_004_neg.ksh new file mode 100755 index 0000000000..1006ae6af5 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_004_neg.ksh @@ -0,0 +1,68 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013, 2016 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +TEST_FS=$TESTPOOL/large_dnode +TEST_SNAP=$TESTPOOL/large_dnode@ldnsnap +TEST_STREAM=$TESTDIR/ldnsnap + +function cleanup +{ + if datasetexists $TEST_FS ; then + log_must zfs destroy -r $TEST_FS + fi + + if datasetexists $LGCYPOOL ; then + log_must zpool destroy -f $LGCYPOOL + fi + + rm -f $TEST_STREAM +} + +log_onexit cleanup +log_assert "zfs send stream with large dnodes not accepted by legacy pool" + +log_must zfs create -o dnodesize=1k $TEST_FS +log_must touch /$TEST_FS/foo +log_must zfs umount $TEST_FS +log_must zfs snap $TEST_SNAP +log_must eval "zfs send $TEST_SNAP > $TEST_STREAM" + +LGCYPOOL=ldnpool +LGCYFS=$LGCYPOOL/legacy +log_must mkfile 64M $TESTDIR/$LGCYPOOL +log_must zpool create -d $LGCYPOOL $TESTDIR/$LGCYPOOL +log_mustnot eval "zfs recv $LGCYFS < $TEST_STREAM" + +log_pass diff --git a/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_005_pos.ksh b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_005_pos.ksh new file mode 100755 index 0000000000..13f1288e4e --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_005_pos.ksh @@ -0,0 +1,75 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +TEST_SEND_FS=$TESTPOOL/send_large_dnode +TEST_RECV_FS=$TESTPOOL/recv_large_dnode +TEST_SNAP=$TEST_SEND_FS@ldnsnap +TEST_SNAPINCR=$TEST_SEND_FS@ldnsnap_incr +TEST_STREAM=$TESTDIR/ldnsnap +TEST_STREAMINCR=$TESTDIR/ldnsnap_incr +TEST_FILE=foo +TEST_FILEINCR=bar + +function cleanup +{ + if datasetexists $TEST_SEND_FS ; then + log_must zfs destroy -r $TEST_SEND_FS + fi + + if datasetexists $TEST_RECV_FS ; then + log_must zfs destroy -r $TEST_RECV_FS + fi + + rm -f $TEST_STREAM + rm -f $TEST_STREAMINCR +} + +log_onexit cleanup + +log_assert "zfs send stream with large dnodes accepted by new pool" + +log_must zfs create -o dnodesize=1k $TEST_SEND_FS +log_must touch /$TEST_SEND_FS/$TEST_FILE +log_must zfs snap $TEST_SNAP +log_must zfs send $TEST_SNAP > $TEST_STREAM +log_must rm -f /$TEST_SEND_FS/$TEST_FILE +log_must touch /$TEST_SEND_FS/$TEST_FILEINCR +log_must zfs snap $TEST_SNAPINCR +log_must zfs send -i $TEST_SNAP $TEST_SNAPINCR > $TEST_STREAMINCR + +log_must eval "zfs recv $TEST_RECV_FS < $TEST_STREAM" +inode=$(ls -li /$TEST_RECV_FS/$TEST_FILE | awk '{print $1}') +dnsize=$(zdb -dddd $TEST_RECV_FS $inode | awk '/ZFS plain file/ {print $6}') +if [[ "$dnsize" != "1K" ]]; then + log_fail "dnode size is $dnsize (expected 1K)" +fi + +log_must eval "zfs recv -F $TEST_RECV_FS < $TEST_STREAMINCR" +log_must diff -r /$TEST_SEND_FS /$TEST_RECV_FS +log_must zfs umount $TEST_SEND_FS +log_must zfs umount $TEST_RECV_FS + +log_pass diff --git a/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_006_pos.ksh b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_006_pos.ksh new file mode 100755 index 0000000000..68fc5e3040 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_006_pos.ksh @@ -0,0 +1,67 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013, 2016 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Run xattrtest on a dataset with large dnodes and xattr=sa +# to stress xattr usage of the extra bonus space and verify +# contents +# + +TEST_FS=$TESTPOOL/large_dnode + +verify_runnable "both" + +function cleanup +{ + datasetexists $TEST_FS && log_must zfs destroy $TEST_FS +} + +log_onexit cleanup +log_assert "xattrtest runs cleanly on dataset with large dnodes" + +log_must zfs create $TEST_FS + +set -A xattr_sizes "512" "1536" "3584" "7680" "15872" +set -A prop_values "1k" "2k" "4k" "8k" "16k" + +for ((i=0; i < ${#prop_values[*]}; i++)) ; do + prop_val=${prop_values[$i]} + dir=/$TEST_FS/$prop_val + xattr_size=${xattr_sizes[$i]} + log_must zfs set dnsize=$prop_val $TEST_FS + log_must mkdir $dir + log_must xattrtest -R -y -s $xattr_size -f 1024 -p $dir +done + +log_pass diff --git a/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_007_neg.ksh b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_007_neg.ksh new file mode 100755 index 0000000000..fb4747839f --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_007_neg.ksh @@ -0,0 +1,65 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2013, 2016 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Verify that the dnodesize dataset property won't accept a value +# other than "legacy" if the large_dnode feature is not enabled. +# + +verify_runnable "both" + +function cleanup +{ + if datasetexists $LGCYPOOL ; then + log_must zpool destroy -f $LGCYPOOL + fi +} + +log_onexit cleanup + +log_assert "values other than dnodesize=legacy rejected by legacy pool" + +set -A prop_vals "auto" "1k" "2k" "4k" "8k" "16k" + +LGCYPOOL=lgcypool +LGCYFS=$LGCYPOOL/legacy +log_must mkfile 64M $TESTDIR/$LGCYPOOL +log_must zpool create -d $LGCYPOOL $TESTDIR/$LGCYPOOL +log_must zfs create $LGCYFS + +for val in ${prop_vals[@]} ; do + log_mustnot zfs set dnodesize=$val $LGCYFS +done + +log_pass diff --git a/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_008_pos.ksh b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_008_pos.ksh new file mode 100755 index 0000000000..eac292cbe0 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_008_pos.ksh @@ -0,0 +1,77 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Run many xattrtests on a dataset with large dnodes and xattr=sa to +# stress concurrent allocation of large dnodes. +# + +TEST_FS=$TESTPOOL/large_dnode + +verify_runnable "both" + +function cleanup +{ + datasetexists $TEST_FS && log_must zfs destroy $TEST_FS +} + +function verify_dnode_packing +{ + zdb -dd $TEST_FS | grep -A 3 'Dnode slots' | awk ' + /Total used:/ {total_used=$NF} + /Max used:/ {max_used=$NF} + /Percent empty:/ {print total_used, max_used, int($NF)} + ' | while read total_used max_used pct_empty + do + log_note "total_used $total_used max_used $max_used pct_empty $pct_empty" + if [ $pct_empty -gt 5 ]; then + log_fail "Holes in dnode array: pct empty $pct_empty > 5" + fi + done +} + +log_onexit cleanup +log_assert "xattrtest runs concurrently on dataset with large dnodes" + +log_must zfs create $TEST_FS +log_must zfs set dnsize=auto $TEST_FS +log_must zfs set xattr=sa $TEST_FS + +for ((i=0; i < 100; i++)); do + dir="/$TEST_FS/dir.$i" + log_must mkdir "$dir" + log_must eval "xattrtest -R -r -y -x 1 -f 1024 -k -p $dir >/dev/null 2>&1 &" +done + +log_must wait + +verify_dnode_packing + +log_pass diff --git a/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_009_pos.ksh b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_009_pos.ksh new file mode 100755 index 0000000000..fa746c52e5 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/large_dnode/large_dnode_009_pos.ksh @@ -0,0 +1,71 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# Use is subject to license terms. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Run many xattrtests on a dataset with large dnodes and xattr=sa to +# stress concurrent allocation of large dnodes. +# + +TEST_FS=$TESTPOOL/large_dnode + +verify_runnable "both" + +function cleanup +{ + datasetexists $TEST_FS && log_must zfs destroy $TEST_FS +} + +log_onexit cleanup +log_assert "xattrtest runs concurrently on dataset with large dnodes" + +log_must zfs create $TEST_FS +log_must zfs set dnsize=auto $TEST_FS +log_must zfs set xattr=sa $TEST_FS + +for ((i=0; i < 100; i++)); do + dir="/$TEST_FS/dir.$i" + log_must mkdir "$dir" + + do_unlink="" + if [ $((RANDOM % 2)) -eq 0 ]; then + do_unlink="-k -f 1024" + else + do_unlink="-f $((RANDOM % 1024))" + fi + log_must eval "xattrtest -R -r -y -x 1 $do_unlink -p $dir >/dev/null 2>&1 &" +done + +log_must wait + +log_must zpool export $TESTPOOL +log_must zpool import $TESTPOOL +log_must ls -lR "/$TEST_FS/" >/dev/null 2>&1 +log_must zdb -d $TESTPOOL +log_pass diff --git a/usr/src/test/zfs-tests/tests/functional/large_dnode/setup.ksh b/usr/src/test/zfs-tests/tests/functional/large_dnode/setup.ksh new file mode 100755 index 0000000000..d9b1a6ee85 --- /dev/null +++ b/usr/src/test/zfs-tests/tests/functional/large_dnode/setup.ksh @@ -0,0 +1,27 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib + +DISK=${DISKS%% *} + +default_setup $DISK diff --git a/usr/src/test/zfs-tests/tests/functional/rsend/rsend.kshlib b/usr/src/test/zfs-tests/tests/functional/rsend/rsend.kshlib index 8f1f30ed26..5d2ba60f18 100644 --- a/usr/src/test/zfs-tests/tests/functional/rsend/rsend.kshlib +++ b/usr/src/test/zfs-tests/tests/functional/rsend/rsend.kshlib @@ -208,8 +208,8 @@ function cmp_ds_prop for item in "type" "origin" "volblocksize" "aclinherit" "aclmode" \ "atime" "canmount" "checksum" "compression" "copies" "devices" \ - "exec" "quota" "readonly" "recordsize" "reservation" "setuid" \ - "sharenfs" "snapdir" "version" "volsize" "xattr" "zoned" \ + "dnodesize" "exec" "quota" "readonly" "recordsize" "reservation" \ + "setuid" "sharenfs" "snapdir" "version" "volsize" "xattr" "zoned" \ "mountpoint"; do zfs get -H -o property,value,source $item $dtst1 >> \ diff --git a/usr/src/test/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh b/usr/src/test/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh index af6f49dc25..d257ed8e4c 100644 --- a/usr/src/test/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh +++ b/usr/src/test/zfs-tests/tests/functional/rsend/rsend_012_pos.ksh @@ -142,6 +142,7 @@ for fs in "$POOL" "$POOL/pclone" "$POOL/$FS" "$POOL/$FS/fs1" \ rand_set_prop $fs exec "on" "off" rand_set_prop $fs quota "512M" "1024M" rand_set_prop $fs recordsize "512" "2K" "8K" "32K" "128K" + rand_set_prop $fs dnodesize "legacy" "auto" "1k" "2k" "4k" "8k" "16k" rand_set_prop $fs setuid "on" "off" rand_set_prop $fs snapdir "hidden" "visible" rand_set_prop $fs xattr "on" "off" diff --git a/usr/src/uts/common/inet/ipf/ip_state.c b/usr/src/uts/common/inet/ipf/ip_state.c index c1fe642d00..184f8775b6 100644 --- a/usr/src/uts/common/inet/ipf/ip_state.c +++ b/usr/src/uts/common/inet/ipf/ip_state.c @@ -1729,7 +1729,7 @@ ipstate_t *is; } else if (flags == TH_SYN) { is->is_s0[source] = ntohl(tcp->th_seq) + 1; if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) - (void) fr_tcpoptions(fin, tcp, tdata); + (void) fr_tcpoptions(fin, tcp, fdata); if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) fr_checknewisn(fin, is); @@ -1840,6 +1840,7 @@ int flags; * the receiver also does window scaling) */ if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) { + fdata->td_winflags &= ~TCP_WSCALE_FIRST; fdata->td_maxwin = win; } @@ -1902,7 +1903,7 @@ int flags; #endif /* XXX what about big packets */ #define MAXACKWINDOW 66000 - (-ackskew <= (MAXACKWINDOW << fdata->td_winscale)) && + (-ackskew <= (MAXACKWINDOW)) && ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) { inseq = 1; /* diff --git a/usr/src/uts/i86pc/io/vmm/README.sync b/usr/src/uts/i86pc/io/vmm/README.sync index e9a2479b13..676fdd3a9d 100644 --- a/usr/src/uts/i86pc/io/vmm/README.sync +++ b/usr/src/uts/i86pc/io/vmm/README.sync @@ -1,18 +1,13 @@ The bhyve kernel module and its associated userland consumers have been updated to the latest upstream FreeBSD sources as of: -commit f81459bd8363602ed5e436f10288320419e80ccf -Author: andrew <andrew@FreeBSD.org> -Date: Thu Sep 27 11:16:19 2018 +0000 - Handle a guest executing a vm instruction by trapping and raising an - undefined instruction exception. Previously we would exit the guest, - however an unprivileged user could execute these. +commit 6b1bb0edb4792cc3d4e6b71c4a80e99438081d5d +Author: imp <imp@FreeBSD.org> +Date: Tue Feb 12 19:05:09 2019 +0000 - Found with: syzkaller - Reviewed by: araujo, tychon (previous version) - Approved by: re (kib) - MFC after: 1 week - Differential Revision: https://reviews.freebsd.org/D17192 + Revert r343077 until the license issues surrounding it can be resolved. -Which corresponds to SVN revision: 338957 + Approved by: core@ + +Which corresponds to SVN revision: 344057 diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c index 9c22fc2532..e921383d22 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c @@ -1965,6 +1965,7 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, struct vm *vm; uint64_t vmcb_pa; int handled; + uint16_t ldt_sel; svm_sc = arg; vm = svm_sc->vm; @@ -2049,6 +2050,15 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, break; } + /* + * #VMEXIT resumes the host with the guest LDTR, so + * save the current LDT selector so it can be restored + * after an exit. The userspace hypervisor probably + * doesn't use a LDT, but save and restore it to be + * safe. + */ + ldt_sel = sldt(); + svm_inj_interrupts(svm_sc, vcpu, vlapic); /* Activate the nested pmap on 'curcpu' */ @@ -2083,6 +2093,9 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, */ restore_host_tss(); + /* Restore host LDTR. */ + lldt(ldt_sel); + /* #VMEXIT disables interrupts so re-enable them here. */ enable_gintr(); diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c index 0417983233..67c43100f1 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm_msr.c @@ -122,9 +122,8 @@ svm_rdmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t *result, case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1: case MSR_MTRR64kBase: case MSR_SYSCFG: - *result = 0; - break; case MSR_AMDK8_IPM: + case MSR_EXTFEATURES: *result = 0; break; default: @@ -163,6 +162,8 @@ svm_wrmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t val, bool *retu) * Ignore writes to microcode update register. */ break; + case MSR_EXTFEATURES: + break; default: error = EINVAL; break; diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h index 28c5e6b15b..edde5c6dd5 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h +++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h @@ -419,6 +419,14 @@ VMPTRLD(struct vmcs *vmcs) #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 #define EXIT_REASON_APIC_WRITE 56 +#define EXIT_REASON_RDRAND 57 +#define EXIT_REASON_INVPCID 58 +#define EXIT_REASON_VMFUNC 59 +#define EXIT_REASON_ENCLS 60 +#define EXIT_REASON_RDSEED 61 +#define EXIT_REASON_PM_LOG_FULL 62 +#define EXIT_REASON_XSAVES 63 +#define EXIT_REASON_XRSTORS 64 /* * NMI unblocking due to IRET. diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c index d33ec7e4db..a723be0d28 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c @@ -104,7 +104,7 @@ __FBSDID("$FreeBSD$"); PROCBASED_NMI_WINDOW_EXITING) #ifdef __FreeBSD__ -#define PROCBASED_CTLS_ONE_SETTING \ +#define PROCBASED_CTLS_ONE_SETTING \ (PROCBASED_SECONDARY_CONTROLS | \ PROCBASED_MWAIT_EXITING | \ PROCBASED_MONITOR_EXITING | \ @@ -471,7 +471,7 @@ vmx_allow_x2apic_msrs(struct vmx *vmx) for (i = 0; i < 8; i++) error += guest_msr_ro(vmx, MSR_APIC_TMR0 + i); - + for (i = 0; i < 8; i++) error += guest_msr_ro(vmx, MSR_APIC_IRR0 + i); @@ -631,6 +631,7 @@ vmx_disable(void *arg __unused) static int vmx_cleanup(void) { + if (pirvec >= 0) lapic_ipi_free(pirvec); @@ -902,7 +903,8 @@ vmx_init(int ipinum) } #ifdef __FreeBSD__ - guest_l1d_flush = (cpu_ia32_arch_caps & IA32_ARCH_CAP_RDCL_NO) == 0; + guest_l1d_flush = (cpu_ia32_arch_caps & + IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) == 0; TUNABLE_INT_FETCH("hw.vmm.l1d_flush", &guest_l1d_flush); /* @@ -1231,7 +1233,7 @@ vmx_handle_cpuid(struct vm *vm, int vcpu, struct vmxctx *vmxctx) { #ifdef __FreeBSD__ int handled, func; - + func = vmxctx->guest_rax; #else int handled; @@ -3229,6 +3231,10 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, struct vm_exit *vmexit; struct vlapic *vlapic; uint32_t exit_reason; +#ifdef __FreeBSD__ + struct region_descriptor gdtr, idtr; + uint16_t ldt_sel; +#endif vmx = arg; vm = vmx->vm; @@ -3358,17 +3364,56 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, * re-VMLAUNCH as opposed to VMRESUME. */ launched = (vmx->vmcs_state[vcpu] & VS_LAUNCHED) != 0; + /* + * Restoration of the GDT limit is taken care of by + * vmx_savectx(). Since the maximum practical index for the + * IDT is 255, restoring its limits from the post-VMX-exit + * default of 0xffff is not a concern. + * + * Only 64-bit hypervisor callers are allowed, which forgoes + * the need to restore any LDT descriptor. Toss an error to + * anyone attempting to break that rule. + */ + if (curproc->p_model != DATAMODEL_LP64) { + ht_release(); + enable_intr(); + bzero(vmexit, sizeof (*vmexit)); + vmexit->rip = rip; + vmexit->exitcode = VM_EXITCODE_VMX; + vmexit->u.vmx.status = VM_FAIL_INVALID; + handled = UNHANDLED; + break; + } +#else + /* + * VM exits restore the base address but not the + * limits of GDTR and IDTR. The VMCS only stores the + * base address, so VM exits set the limits to 0xffff. + * Save and restore the full GDTR and IDTR to restore + * the limits. + * + * The VMCS does not save the LDTR at all, and VM + * exits clear LDTR as if a NULL selector were loaded. + * The userspace hypervisor probably doesn't use a + * LDT, but save and restore it to be safe. + */ + sgdt(&gdtr); + sidt(&idtr); + ldt_sel = sldt(); #endif + vmx_run_trace(vmx, vcpu); vmx_dr_enter_guest(vmxctx); rc = vmx_enter_guest(vmxctx, vmx, launched); vmx_dr_leave_guest(vmxctx); + #ifndef __FreeBSD__ vmx->vmcs_state[vcpu] |= VS_LAUNCHED; -#endif - -#ifndef __FreeBSD__ ht_release(); +#else + bare_lgdt(&gdtr); + lidt(&idtr); + lldt(ldt_sel); #endif /* Collect some information for VM exit processing */ @@ -3522,7 +3567,7 @@ vmx_get_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t *retval) uint64_t gi; int error; - error = vmcs_getreg(&vmx->vmcs[vcpu], running, + error = vmcs_getreg(&vmx->vmcs[vcpu], running, VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY), &gi); *retval = (gi & HWINTR_BLOCKING) ? 1 : 0; return (error); @@ -3566,8 +3611,8 @@ vmx_shadow_reg(int reg) switch (reg) { case VM_REG_GUEST_CR0: shreg = VMCS_CR0_SHADOW; - break; - case VM_REG_GUEST_CR4: + break; + case VM_REG_GUEST_CR4: shreg = VMCS_CR4_SHADOW; break; default: @@ -3638,7 +3683,7 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) if (shadow > 0) { /* * Store the unmodified value in the shadow - */ + */ error = vmcs_setreg(&vmx->vmcs[vcpu], running, VMCS_IDENT(shadow), val); } @@ -3821,7 +3866,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) } } - return (retval); + return (retval); } struct vlapic_vtx { @@ -4174,7 +4219,7 @@ vmx_vlapic_init(void *arg, int vcpuid) struct vmx *vmx; struct vlapic *vlapic; struct vlapic_vtx *vlapic_vtx; - + vmx = arg; vlapic = malloc(sizeof(struct vlapic_vtx), M_VLAPIC, M_WAITOK | M_ZERO); diff --git a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c index 1a2f493dd1..d276944800 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c @@ -89,6 +89,7 @@ enum { VIE_OP_TYPE_GROUP1, VIE_OP_TYPE_STOS, VIE_OP_TYPE_BITTEST, + VIE_OP_TYPE_TWOB_GRP15, VIE_OP_TYPE_LAST }; @@ -101,6 +102,10 @@ enum { #ifdef _KERNEL static const struct vie_op two_byte_opcodes[256] = { + [0xAE] = { + .op_byte = 0xAE, + .op_type = VIE_OP_TYPE_TWOB_GRP15, + }, [0xB6] = { .op_byte = 0xB6, .op_type = VIE_OP_TYPE_MOVZX, @@ -1458,6 +1463,37 @@ emulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, return (0); } +static int +emulate_twob_group15(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, + mem_region_read_t memread, mem_region_write_t memwrite, void *memarg) +{ + int error; + uint64_t buf; + + switch (vie->reg & 7) { + case 0x7: /* CLFLUSH, CLFLUSHOPT, and SFENCE */ + if (vie->mod == 0x3) { + /* + * SFENCE. Ignore it, VM exit provides enough + * barriers on its own. + */ + error = 0; + } else { + /* + * CLFLUSH, CLFLUSHOPT. Only check for access + * rights. + */ + error = memread(vm, vcpuid, gpa, &buf, 1, memarg); + } + break; + default: + error = EINVAL; + break; + } + + return (error); +} + int vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, struct vm_guest_paging *paging, mem_region_read_t memread, @@ -1518,6 +1554,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, error = emulate_bittest(vm, vcpuid, gpa, vie, memread, memwrite, memarg); break; + case VIE_OP_TYPE_TWOB_GRP15: + error = emulate_twob_group15(vm, vcpuid, gpa, vie, + memread, memwrite, memarg); + break; default: error = EINVAL; break; diff --git a/usr/src/uts/i86pc/io/vmm/x86.c b/usr/src/uts/i86pc/io/vmm/x86.c index 5a6d7f9dd7..b02142e7e5 100644 --- a/usr/src/uts/i86pc/io/vmm/x86.c +++ b/usr/src/uts/i86pc/io/vmm/x86.c @@ -141,17 +141,30 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, cpuid_count(*eax, *ecx, regs); if (vmm_is_amd()) { /* - * XXX this might appear silly because AMD - * cpus don't have threads. - * - * However this matches the logical cpus as - * advertised by leaf 0x1 and will work even - * if threads is set incorrectly on an AMD host. + * As on Intel (0000_0007:0, EDX), mask out + * unsupported or unsafe AMD extended features + * (8000_0008 EBX). */ + regs[1] &= (AMDFEID_CLZERO | AMDFEID_IRPERF | + AMDFEID_XSAVEERPTR); + vm_get_topology(vm, &sockets, &cores, &threads, &maxcpus); - logical_cpus = threads * cores; - regs[2] = logical_cpus - 1; + /* + * Here, width is ApicIdCoreIdSize, present on + * at least Family 15h and newer. It + * represents the "number of bits in the + * initial apicid that indicate thread id + * within a package." + * + * Our topo_probe_amd() uses it for + * pkg_id_shift and other OSes may rely on it. + */ + width = MIN(0xF, log2(threads * cores)); + if (width < 0x4) + width = 0; + logical_cpus = MIN(0xFF, threads * cores - 1); + regs[2] = (width << AMDID_COREID_SIZE_SHIFT) | logical_cpus; } break; @@ -159,9 +172,9 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, cpuid_count(*eax, *ecx, regs); /* - * Hide SVM and Topology Extension features from guest. + * Hide SVM from guest. */ - regs[2] &= ~(AMDID2_SVM | AMDID2_TOPOLOGY); + regs[2] &= ~AMDID2_SVM; /* * Don't advertise extended performance counter MSRs @@ -226,6 +239,68 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, #endif /* __FreeBSD__ */ break; + case CPUID_8000_001D: + /* AMD Cache topology, like 0000_0004 for Intel. */ + if (!vmm_is_amd()) + goto default_leaf; + + /* + * Similar to Intel, generate a ficticious cache + * topology for the guest with L3 shared by the + * package, and L1 and L2 local to a core. + */ + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); + switch (*ecx) { + case 0: + logical_cpus = threads; + level = 1; + func = 1; /* data cache */ + break; + case 1: + logical_cpus = threads; + level = 2; + func = 3; /* unified cache */ + break; + case 2: + logical_cpus = threads * cores; + level = 3; + func = 3; /* unified cache */ + break; + default: + logical_cpus = 0; + level = 0; + func = 0; + break; + } + + logical_cpus = MIN(0xfff, logical_cpus - 1); + regs[0] = (logical_cpus << 14) | (1 << 8) | + (level << 5) | func; + regs[1] = (func > 0) ? (CACHE_LINE_SIZE - 1) : 0; + regs[2] = 0; + regs[3] = 0; + break; + + case CPUID_8000_001E: + /* AMD Family 16h+ additional identifiers */ + if (!vmm_is_amd() || CPUID_TO_FAMILY(cpu_id) < 0x16) + goto default_leaf; + + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); + regs[0] = vcpu_id; + threads = MIN(0xFF, threads - 1); + regs[1] = (threads << 8) | + (vcpu_id >> log2(threads + 1)); + /* + * XXX Bhyve topology cannot yet represent >1 node per + * processor. + */ + regs[2] = 0; + regs[3] = 0; + break; + case CPUID_0000_0001: do_cpuid(1, regs); @@ -366,7 +441,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, CPUID_STDEXT_AVX512F | CPUID_STDEXT_AVX512PF | CPUID_STDEXT_AVX512ER | - CPUID_STDEXT_AVX512CD); + CPUID_STDEXT_AVX512CD | CPUID_STDEXT_SHA); regs[2] = 0; regs[3] = 0; @@ -398,35 +473,42 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, case CPUID_0000_000B: /* - * Processor topology enumeration + * Intel processor topology enumeration */ - vm_get_topology(vm, &sockets, &cores, &threads, - &maxcpus); - if (*ecx == 0) { - logical_cpus = threads; - width = log2(logical_cpus); - level = CPUID_TYPE_SMT; - x2apic_id = vcpu_id; - } + if (vmm_is_intel()) { + vm_get_topology(vm, &sockets, &cores, &threads, + &maxcpus); + if (*ecx == 0) { + logical_cpus = threads; + width = log2(logical_cpus); + level = CPUID_TYPE_SMT; + x2apic_id = vcpu_id; + } - if (*ecx == 1) { - logical_cpus = threads * cores; - width = log2(logical_cpus); - level = CPUID_TYPE_CORE; - x2apic_id = vcpu_id; - } + if (*ecx == 1) { + logical_cpus = threads * cores; + width = log2(logical_cpus); + level = CPUID_TYPE_CORE; + x2apic_id = vcpu_id; + } - if (!cpuid_leaf_b || *ecx >= 2) { - width = 0; - logical_cpus = 0; - level = 0; - x2apic_id = 0; - } + if (!cpuid_leaf_b || *ecx >= 2) { + width = 0; + logical_cpus = 0; + level = 0; + x2apic_id = 0; + } - regs[0] = width & 0x1f; - regs[1] = logical_cpus & 0xffff; - regs[2] = (level << 8) | (*ecx & 0xff); - regs[3] = x2apic_id; + regs[0] = width & 0x1f; + regs[1] = logical_cpus & 0xffff; + regs[2] = (level << 8) | (*ecx & 0xff); + regs[3] = x2apic_id; + } else { + regs[0] = 0; + regs[1] = 0; + regs[2] = 0; + regs[3] = 0; + } break; case CPUID_0000_000D: @@ -488,6 +570,7 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id, break; default: +default_leaf: /* * The leaf value has already been clamped so * simply pass this through, keeping count of diff --git a/usr/src/uts/i86pc/io/vmm/x86.h b/usr/src/uts/i86pc/io/vmm/x86.h index 3a8e043852..0d70c04fd8 100644 --- a/usr/src/uts/i86pc/io/vmm/x86.h +++ b/usr/src/uts/i86pc/io/vmm/x86.h @@ -49,6 +49,8 @@ #define CPUID_8000_0006 (0x80000006) #define CPUID_8000_0007 (0x80000007) #define CPUID_8000_0008 (0x80000008) +#define CPUID_8000_001D (0x8000001D) +#define CPUID_8000_001E (0x8000001E) /* * CPUID instruction Fn0000_0001: |
