summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2020-09-03 12:02:23 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2020-09-03 12:02:23 +0000
commit3ad7abf4f2e336baf268f2e63be234adf72c81eb (patch)
tree4d48604395ad47cfacf35e5652c47bca662cf787
parente0f664ec13fc70811953d2a807296a60b253b5a2 (diff)
parent096bb5cb663d8fa04eda9a4aceb4d82a9cbae42c (diff)
downloadillumos-joyent-3ad7abf4f2e336baf268f2e63be234adf72c81eb.tar.gz
[illumos-gate merge]
commit 096bb5cb663d8fa04eda9a4aceb4d82a9cbae42c 13072 clarify VMCB interface in bhyve commit 2ad530425ac9cd3f429e64463a85f6f58703061c 12976 system panics with error in IP module
-rw-r--r--usr/src/cmd/bhyvectl/bhyvectl.c18
-rw-r--r--usr/src/uts/common/inet/ip/ipclassifier.c9
-rw-r--r--usr/src/uts/common/inet/tcp/tcp.c40
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_output.c20
-rw-r--r--usr/src/uts/common/sys/socket_proto.h10
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/svm.c312
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/vmcb.c405
-rw-r--r--usr/src/uts/i86pc/io/vmm/amd/vmcb.h29
8 files changed, 388 insertions, 455 deletions
diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c
index e0041ede30..7f8847b184 100644
--- a/usr/src/cmd/bhyvectl/bhyvectl.c
+++ b/usr/src/cmd/bhyvectl/bhyvectl.c
@@ -580,6 +580,7 @@ vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val)
}
#endif /* __FreeBSD__ */
+#ifdef __FreeBSD__
static int
vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes,
uint64_t *ret_val)
@@ -595,6 +596,23 @@ vm_set_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes,
return (vm_set_register(ctx, vcpu, VMCB_ACCESS(off, bytes), val));
}
+#else /* __FreeBSD__ */
+/* Arbitrary VMCB read/write is not allowed */
+static int
+vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes,
+ uint64_t *ret_val)
+{
+ *ret_val = 0;
+ return (0);
+}
+
+static int
+vm_set_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes,
+ uint64_t val)
+{
+ return (EINVAL);
+}
+#endif /* __FreeBSD__ */
enum {
VMNAME = 1000, /* avoid collision with return values from getopt */
diff --git a/usr/src/uts/common/inet/ip/ipclassifier.c b/usr/src/uts/common/inet/ip/ipclassifier.c
index 4f3ec2d817..69af77db9a 100644
--- a/usr/src/uts/common/inet/ip/ipclassifier.c
+++ b/usr/src/uts/common/inet/ip/ipclassifier.c
@@ -22,6 +22,7 @@
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2016 Joyent, Inc.
* Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2020 Joyent, Inc.
*/
/*
@@ -2772,7 +2773,11 @@ conn_get_socket_info(conn_t *connp, mib2_socketInfoEntry_t *sie)
return (NULL);
}
- mutex_exit(&connp->conn_lock);
+ /*
+ * Continue to hold conn_lock because we don't want to race with an
+ * in-progress close, which will have set-to-NULL (and destroyed
+ * upper_handle, aka sonode (and vnode)) BEFORE setting CONN_CLOSING.
+ */
if (connp->conn_upper_handle != NULL) {
vn = (*connp->conn_upcalls->su_get_vnode)
@@ -2784,6 +2789,8 @@ conn_get_socket_info(conn_t *connp, mib2_socketInfoEntry_t *sie)
flags |= MIB2_SOCKINFO_STREAM;
}
+ mutex_exit(&connp->conn_lock);
+
if (vn == NULL || VOP_GETATTR(vn, &attr, 0, CRED(), NULL) != 0) {
if (vn != NULL)
VN_RELE(vn);
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c
index 554fe8b78f..88d558fd10 100644
--- a/usr/src/uts/common/inet/tcp/tcp.c
+++ b/usr/src/uts/common/inet/tcp/tcp.c
@@ -21,10 +21,10 @@
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2019 Joyent, Inc.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013, 2017 by Delphix. All rights reserved.
* Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2020 Joyent, Inc.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -1018,10 +1018,23 @@ finish:
/* If we have an upper handle (socket), release it */
if (IPCL_IS_NONSTR(connp)) {
- ASSERT(connp->conn_upper_handle != NULL);
- (*connp->conn_upcalls->su_closed)(connp->conn_upper_handle);
+ sock_upcalls_t *upcalls = connp->conn_upcalls;
+ sock_upper_handle_t handle = connp->conn_upper_handle;
+
+ ASSERT(upcalls != NULL);
+ ASSERT(upcalls->su_closed != NULL);
+ ASSERT(handle != NULL);
+ /*
+ * Set these to NULL first because closed() will free upper
+ * structures. Acquire conn_lock because an external caller
+ * like conn_get_socket_info() will upcall if these are
+ * non-NULL.
+ */
+ mutex_enter(&connp->conn_lock);
connp->conn_upper_handle = NULL;
connp->conn_upcalls = NULL;
+ mutex_exit(&connp->conn_lock);
+ upcalls->su_closed(handle);
}
}
@@ -1435,13 +1448,26 @@ tcp_free(tcp_t *tcp)
* nothing to do other than clearing the field.
*/
if (connp->conn_upper_handle != NULL) {
+ sock_upcalls_t *upcalls = connp->conn_upcalls;
+ sock_upper_handle_t handle = connp->conn_upper_handle;
+
+ /*
+ * Set these to NULL first because closed() will free upper
+ * structures. Acquire conn_lock because an external caller
+ * like conn_get_socket_info() will upcall if these are
+ * non-NULL.
+ */
+ mutex_enter(&connp->conn_lock);
+ connp->conn_upper_handle = NULL;
+ connp->conn_upcalls = NULL;
+ mutex_exit(&connp->conn_lock);
if (IPCL_IS_NONSTR(connp)) {
- (*connp->conn_upcalls->su_closed)(
- connp->conn_upper_handle);
+ ASSERT(upcalls != NULL);
+ ASSERT(upcalls->su_closed != NULL);
+ ASSERT(handle != NULL);
+ upcalls->su_closed(handle);
tcp->tcp_detached = B_TRUE;
}
- connp->conn_upper_handle = NULL;
- connp->conn_upcalls = NULL;
}
}
diff --git a/usr/src/uts/common/inet/tcp/tcp_output.c b/usr/src/uts/common/inet/tcp/tcp_output.c
index 7a0472f3dd..086668f435 100644
--- a/usr/src/uts/common/inet/tcp/tcp_output.c
+++ b/usr/src/uts/common/inet/tcp/tcp_output.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2017 by Delphix. All rights reserved.
- * Copyright 2019 Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
*/
/* This file contains all TCP output processing functions. */
@@ -1677,11 +1677,23 @@ finish:
/* non-STREAM socket, release the upper handle */
if (IPCL_IS_NONSTR(connp)) {
- ASSERT(connp->conn_upper_handle != NULL);
- (*connp->conn_upcalls->su_closed)
- (connp->conn_upper_handle);
+ sock_upcalls_t *upcalls = connp->conn_upcalls;
+ sock_upper_handle_t handle = connp->conn_upper_handle;
+
+ ASSERT(upcalls != NULL);
+ ASSERT(upcalls->su_closed != NULL);
+ ASSERT(handle != NULL);
+ /*
+ * Set these to NULL first because closed() will free
+ * upper structures. Acquire conn_lock because an
+ * external caller like conn_get_socket_info() will
+ * upcall if these are non-NULL.
+ */
+ mutex_enter(&connp->conn_lock);
connp->conn_upper_handle = NULL;
connp->conn_upcalls = NULL;
+ mutex_exit(&connp->conn_lock);
+ upcalls->su_closed(handle);
}
}
diff --git a/usr/src/uts/common/sys/socket_proto.h b/usr/src/uts/common/sys/socket_proto.h
index 4e1a4a0f35..825d0501c7 100644
--- a/usr/src/uts/common/sys/socket_proto.h
+++ b/usr/src/uts/common/sys/socket_proto.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
+ * Copyright 2020 Joyent, Inc.
*/
#ifndef _SYS_SOCKET_PROTO_H_
@@ -202,7 +203,16 @@ struct sock_upcalls_s {
void (*su_signal_oob)(sock_upper_handle_t, ssize_t);
void (*su_zcopy_notify)(sock_upper_handle_t);
void (*su_set_error)(sock_upper_handle_t, int);
+ /*
+ * NOTE: This function frees upper handle items. Caller cannot
+ * rely on them after this upcall.
+ */
void (*su_closed)(sock_upper_handle_t);
+ /*
+ * NOTE: This function MUST be implemented without using lower-level
+ * downcalls or accesses. This allows callers to ensure su_closed()
+ * upcalls can happen indepdently or concurrently.
+ */
vnode_t *(*su_get_vnode)(sock_upper_handle_t);
};
diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c
index 1046a54126..431a3d6d8e 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/svm.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c
@@ -672,22 +672,19 @@ svm_cpl(struct vmcb_state *state)
static enum vm_cpu_mode
svm_vcpu_mode(struct vmcb *vmcb)
{
- struct vmcb_segment seg;
struct vmcb_state *state;
- int error;
state = &vmcb->state;
if (state->efer & EFER_LMA) {
- error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg);
- KASSERT(error == 0, ("%s: vmcb_seg(cs) error %d", __func__,
- error));
+ struct vmcb_segment *seg;
/*
* Section 4.8.1 for APM2, check if Code Segment has
* Long attribute set in descriptor.
*/
- if (seg.attrib & VMCB_CS_ATTRIB_L)
+ seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS);
+ if (seg->attrib & VMCB_CS_ATTRIB_L)
return (CPU_MODE_64BIT);
else
return (CPU_MODE_COMPATIBILITY);
@@ -848,10 +845,9 @@ svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit,
struct vmcb *vmcb;
struct vie *vie;
struct vm_guest_paging paging;
- struct vmcb_segment seg;
+ struct vmcb_segment *seg;
char *inst_bytes = NULL;
uint8_t inst_len = 0;
- int error;
vmcb = svm_get_vmcb(svm_sc, vcpu);
ctrl = &vmcb->ctrl;
@@ -861,22 +857,21 @@ svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit,
vmexit->u.mmio_emul.gla = VIE_INVALID_GLA;
svm_paging_info(vmcb, &paging);
- error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg);
- KASSERT(error == 0, ("%s: vmcb_seg(CS) error %d", __func__, error));
-
switch (paging.cpu_mode) {
case CPU_MODE_REAL:
- vmexit->u.mmio_emul.cs_base = seg.base;
+ seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS);
+ vmexit->u.mmio_emul.cs_base = seg->base;
vmexit->u.mmio_emul.cs_d = 0;
break;
case CPU_MODE_PROTECTED:
case CPU_MODE_COMPATIBILITY:
- vmexit->u.mmio_emul.cs_base = seg.base;
+ seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS);
+ vmexit->u.mmio_emul.cs_base = seg->base;
/*
* Section 4.8.1 of APM2, Default Operand Size or D bit.
*/
- vmexit->u.mmio_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ?
+ vmexit->u.mmio_emul.cs_d = (seg->attrib & VMCB_CS_ATTRIB_D) ?
1 : 0;
break;
default:
@@ -1050,32 +1045,6 @@ disable_intr_window_exiting(struct svm_softc *sc, int vcpu)
svm_disable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR);
}
-static int
-svm_modify_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t val)
-{
- struct vmcb_ctrl *ctrl;
- int oldval, newval;
-
- ctrl = svm_get_vmcb_ctrl(sc, vcpu);
- oldval = ctrl->intr_shadow;
- newval = val ? 1 : 0;
- if (newval != oldval) {
- ctrl->intr_shadow = newval;
- VCPU_CTR1(sc->vm, vcpu, "Setting intr_shadow to %d", newval);
- }
- return (0);
-}
-
-static int
-svm_get_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t *val)
-{
- struct vmcb_ctrl *ctrl;
-
- ctrl = svm_get_vmcb_ctrl(sc, vcpu);
- *val = ctrl->intr_shadow;
- return (0);
-}
-
/*
* Once an NMI is injected it blocks delivery of further NMIs until the handler
* executes an IRET. The IRET intercept is enabled when an NMI is injected to
@@ -1103,7 +1072,7 @@ enable_nmi_blocking(struct svm_softc *sc, int vcpu)
static void
clear_nmi_blocking(struct svm_softc *sc, int vcpu)
{
- int error;
+ struct vmcb_ctrl *ctrl;
KASSERT(nmi_blocked(sc, vcpu), ("vNMI already unblocked"));
VCPU_CTR0(sc->vm, vcpu, "vNMI blocking cleared");
@@ -1124,8 +1093,8 @@ clear_nmi_blocking(struct svm_softc *sc, int vcpu)
* Set 'intr_shadow' to prevent an NMI from being injected on the
* immediate VMRUN.
*/
- error = svm_modify_intr_shadow(sc, vcpu, 1);
- KASSERT(!error, ("%s: error %d setting intr_shadow", __func__, error));
+ ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+ ctrl->intr_shadow = 1;
}
#define EFER_MBZ_BITS 0xFFFFFFFFFFFF0200UL
@@ -2155,7 +2124,6 @@ svm_vmcleanup(void *arg)
static register_t *
swctx_regptr(struct svm_regctx *regctx, int reg)
{
-
switch (reg) {
case VM_REG_GUEST_RBX:
return (&regctx->sctx_rbx);
@@ -2201,56 +2169,135 @@ swctx_regptr(struct svm_regctx *regctx, int reg)
static int
svm_getreg(void *arg, int vcpu, int ident, uint64_t *val)
{
- struct svm_softc *svm_sc;
- register_t *reg;
-
- svm_sc = arg;
+ struct svm_softc *sc;
+ struct vmcb *vmcb;
+ register_t *regp;
+ uint64_t *fieldp;
+ struct vmcb_segment *seg;
- if (ident == VM_REG_GUEST_INTR_SHADOW) {
- return (svm_get_intr_shadow(svm_sc, vcpu, val));
- }
+ sc = arg;
+ vmcb = svm_get_vmcb(sc, vcpu);
- if (vmcb_read(svm_sc, vcpu, ident, val) == 0) {
+ regp = swctx_regptr(svm_get_guest_regctx(sc, vcpu), ident);
+ if (regp != NULL) {
+ *val = *regp;
return (0);
}
- reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident);
+ switch (ident) {
+ case VM_REG_GUEST_INTR_SHADOW:
+ *val = (vmcb->ctrl.intr_shadow != 0) ? 1 : 0;
+ break;
- if (reg != NULL) {
- *val = *reg;
- return (0);
+ case VM_REG_GUEST_CR0:
+ case VM_REG_GUEST_CR2:
+ case VM_REG_GUEST_CR3:
+ case VM_REG_GUEST_CR4:
+ case VM_REG_GUEST_DR6:
+ case VM_REG_GUEST_DR7:
+ case VM_REG_GUEST_EFER:
+ case VM_REG_GUEST_RAX:
+ case VM_REG_GUEST_RFLAGS:
+ case VM_REG_GUEST_RIP:
+ case VM_REG_GUEST_RSP:
+ fieldp = vmcb_regptr(vmcb, ident, NULL);
+ *val = *fieldp;
+ break;
+
+ case VM_REG_GUEST_CS:
+ case VM_REG_GUEST_DS:
+ case VM_REG_GUEST_ES:
+ case VM_REG_GUEST_FS:
+ case VM_REG_GUEST_GS:
+ case VM_REG_GUEST_SS:
+ case VM_REG_GUEST_LDTR:
+ case VM_REG_GUEST_TR:
+ seg = vmcb_segptr(vmcb, ident);
+ *val = seg->selector;
+ break;
+
+ case VM_REG_GUEST_GDTR:
+ case VM_REG_GUEST_IDTR:
+ /* GDTR and IDTR don't have segment selectors */
+ return (EINVAL);
+
+ default:
+ return (EINVAL);
}
- VCPU_CTR1(svm_sc->vm, vcpu, "svm_getreg: unknown register %#x", ident);
- return (EINVAL);
+ return (0);
}
static int
svm_setreg(void *arg, int vcpu, int ident, uint64_t val)
{
- struct svm_softc *svm_sc;
- register_t *reg;
-
- svm_sc = arg;
+ struct svm_softc *sc;
+ struct vmcb *vmcb;
+ register_t *regp;
+ uint64_t *fieldp;
+ uint32_t dirty;
+ struct vmcb_segment *seg;
- if (ident == VM_REG_GUEST_INTR_SHADOW) {
- return (svm_modify_intr_shadow(svm_sc, vcpu, val));
- }
+ sc = arg;
+ vmcb = svm_get_vmcb(sc, vcpu);
- if (vmcb_write(svm_sc, vcpu, ident, val) == 0) {
+ regp = swctx_regptr(svm_get_guest_regctx(sc, vcpu), ident);
+ if (regp != NULL) {
+ *regp = val;
return (0);
}
- reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident);
+ dirty = VMCB_CACHE_NONE;
+ switch (ident) {
+ case VM_REG_GUEST_INTR_SHADOW:
+ vmcb->ctrl.intr_shadow = (val != 0) ? 1 : 0;
+ break;
- if (reg != NULL) {
- *reg = val;
- return (0);
+ case VM_REG_GUEST_EFER:
+ fieldp = vmcb_regptr(vmcb, ident, &dirty);
+ /* EFER_SVM must always be set when the guest is executing */
+ *fieldp = val | EFER_SVM;
+ dirty |= VMCB_CACHE_CR;
+ break;
+
+ case VM_REG_GUEST_CR0:
+ case VM_REG_GUEST_CR2:
+ case VM_REG_GUEST_CR3:
+ case VM_REG_GUEST_CR4:
+ case VM_REG_GUEST_DR6:
+ case VM_REG_GUEST_DR7:
+ case VM_REG_GUEST_RAX:
+ case VM_REG_GUEST_RFLAGS:
+ case VM_REG_GUEST_RIP:
+ case VM_REG_GUEST_RSP:
+ fieldp = vmcb_regptr(vmcb, ident, &dirty);
+ *fieldp = val;
+ break;
+
+ case VM_REG_GUEST_CS:
+ case VM_REG_GUEST_DS:
+ case VM_REG_GUEST_ES:
+ case VM_REG_GUEST_SS:
+ case VM_REG_GUEST_FS:
+ case VM_REG_GUEST_GS:
+ case VM_REG_GUEST_LDTR:
+ case VM_REG_GUEST_TR:
+ dirty |= VMCB_CACHE_SEG;
+ seg = vmcb_segptr(vmcb, ident);
+ seg->selector = (uint16_t)val;
+ break;
+
+ case VM_REG_GUEST_GDTR:
+ case VM_REG_GUEST_IDTR:
+ /* GDTR and IDTR don't have segment selectors */
+ return (EINVAL);
+
+ default:
+ return (EINVAL);
}
- if (ident == VM_REG_GUEST_ENTRY_INST_LENGTH) {
- /* Ignore. */
- return (0);
+ if (dirty != VMCB_CACHE_NONE) {
+ svm_set_dirty(sc, vcpu, dirty);
}
/*
@@ -2259,8 +2306,119 @@ svm_setreg(void *arg, int vcpu, int ident, uint64_t val)
* whether 'running' is true/false.
*/
- VCPU_CTR1(svm_sc->vm, vcpu, "svm_setreg: unknown register %#x", ident);
- return (EINVAL);
+ return (0);
+}
+
+static int
+svm_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
+{
+ struct vmcb *vmcb;
+ struct svm_softc *sc;
+ struct vmcb_segment *seg;
+
+ sc = arg;
+ vmcb = svm_get_vmcb(sc, vcpu);
+
+ switch (reg) {
+ case VM_REG_GUEST_CS:
+ case VM_REG_GUEST_DS:
+ case VM_REG_GUEST_ES:
+ case VM_REG_GUEST_SS:
+ case VM_REG_GUEST_FS:
+ case VM_REG_GUEST_GS:
+ case VM_REG_GUEST_LDTR:
+ case VM_REG_GUEST_TR:
+ svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG);
+ seg = vmcb_segptr(vmcb, reg);
+ /*
+ * Map seg_desc access to VMCB attribute format.
+ *
+ * SVM uses the 'P' bit in the segment attributes to indicate a
+ * NULL segment so clear it if the segment is marked unusable.
+ */
+ seg->attrib = VMCB_ACCESS2ATTR(desc->access);
+ if (SEG_DESC_UNUSABLE(desc->access)) {
+ seg->attrib &= ~0x80;
+ }
+ break;
+
+ case VM_REG_GUEST_GDTR:
+ case VM_REG_GUEST_IDTR:
+ svm_set_dirty(sc, vcpu, VMCB_CACHE_DT);
+ seg = vmcb_segptr(vmcb, reg);
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ ASSERT(seg != NULL);
+ seg->base = desc->base;
+ seg->limit = desc->limit;
+
+ return (0);
+}
+
+static int
+svm_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
+{
+ struct vmcb *vmcb;
+ struct svm_softc *sc;
+ struct vmcb_segment *seg;
+
+ sc = arg;
+ vmcb = svm_get_vmcb(sc, vcpu);
+
+ switch (reg) {
+ case VM_REG_GUEST_DS:
+ case VM_REG_GUEST_ES:
+ case VM_REG_GUEST_FS:
+ case VM_REG_GUEST_GS:
+ case VM_REG_GUEST_SS:
+ case VM_REG_GUEST_LDTR:
+ seg = vmcb_segptr(vmcb, reg);
+ desc->access = VMCB_ATTR2ACCESS(seg->attrib);
+ /*
+ * VT-x uses bit 16 to indicate a segment that has been loaded
+ * with a NULL selector (aka unusable). The 'desc->access'
+ * field is interpreted in the VT-x format by the
+ * processor-independent code.
+ *
+ * SVM uses the 'P' bit to convey the same information so
+ * convert it into the VT-x format. For more details refer to
+ * section "Segment State in the VMCB" in APMv2.
+ */
+ if ((desc->access & 0x80) == 0) {
+ /* Unusable segment */
+ desc->access |= 0x10000;
+ }
+ break;
+
+ case VM_REG_GUEST_CS:
+ case VM_REG_GUEST_TR:
+ seg = vmcb_segptr(vmcb, reg);
+ desc->access = VMCB_ATTR2ACCESS(seg->attrib);
+ break;
+
+ case VM_REG_GUEST_GDTR:
+ case VM_REG_GUEST_IDTR:
+ seg = vmcb_segptr(vmcb, reg);
+ /*
+ * Since there are no access bits associated with the GDTR or
+ * the IDTR, zero out the field to ensure it does not contain
+ * garbage which might confuse the consumer.
+ */
+ desc->access = 0;
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ ASSERT(seg != NULL);
+ desc->base = seg->base;
+ desc->limit = seg->limit;
+ return (0);
}
static int
@@ -2368,8 +2526,8 @@ struct vmm_ops vmm_ops_amd = {
.vmcleanup = svm_vmcleanup,
.vmgetreg = svm_getreg,
.vmsetreg = svm_setreg,
- .vmgetdesc = vmcb_getdesc,
- .vmsetdesc = vmcb_setdesc,
+ .vmgetdesc = svm_getdesc,
+ .vmsetdesc = svm_setdesc,
.vmgetcap = svm_getcap,
.vmsetcap = svm_setcap,
.vmspace_alloc = svm_npt_alloc,
diff --git a/usr/src/uts/i86pc/io/vmm/amd/vmcb.c b/usr/src/uts/i86pc/io/vmm/amd/vmcb.c
index 5075b69867..b00f974c23 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/vmcb.c
+++ b/usr/src/uts/i86pc/io/vmm/amd/vmcb.c
@@ -26,429 +26,130 @@
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * Copyright 2020 Oxide Computer Company
+ */
+
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
-#include <machine/segments.h>
-#include <machine/specialreg.h>
#include <machine/vmm.h>
-#include "vmm_ktr.h"
-
#include "vmcb.h"
#include "svm.h"
-#include "svm_softc.h"
-/*
- * The VMCB aka Virtual Machine Control Block is a 4KB aligned page
- * in memory that describes the virtual machine.
- *
- * The VMCB contains:
- * - instructions or events in the guest to intercept
- * - control bits that modify execution environment of the guest
- * - guest processor state (e.g. general purpose registers)
- */
-
-/*
- * Return VMCB segment area.
- */
-static struct vmcb_segment *
+struct vmcb_segment *
vmcb_segptr(struct vmcb *vmcb, int type)
{
- struct vmcb_state *state;
- struct vmcb_segment *seg;
-
- state = &vmcb->state;
+ struct vmcb_state *state = &vmcb->state;
switch (type) {
case VM_REG_GUEST_CS:
- seg = &state->cs;
- break;
-
+ return (&state->cs);
case VM_REG_GUEST_DS:
- seg = &state->ds;
- break;
-
+ return (&state->ds);
case VM_REG_GUEST_ES:
- seg = &state->es;
- break;
-
+ return (&state->es);
case VM_REG_GUEST_FS:
- seg = &state->fs;
- break;
-
+ return (&state->fs);
case VM_REG_GUEST_GS:
- seg = &state->gs;
- break;
-
+ return (&state->gs);
case VM_REG_GUEST_SS:
- seg = &state->ss;
- break;
-
+ return (&state->ss);
case VM_REG_GUEST_GDTR:
- seg = &state->gdt;
- break;
-
+ return (&state->gdt);
case VM_REG_GUEST_IDTR:
- seg = &state->idt;
- break;
-
+ return (&state->idt);
case VM_REG_GUEST_LDTR:
- seg = &state->ldt;
- break;
-
+ return (&state->ldt);
case VM_REG_GUEST_TR:
- seg = &state->tr;
- break;
-
+ return (&state->tr);
default:
- seg = NULL;
- break;
+ panic("unexpected seg %d", type);
}
-
- return (seg);
}
-static int
-vmcb_access(struct svm_softc *softc, int vcpu, int write, int ident,
- uint64_t *val)
+uint64_t *
+vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp)
{
- struct vmcb *vmcb;
- int off, bytes;
- char *ptr;
-
- vmcb = svm_get_vmcb(softc, vcpu);
- off = VMCB_ACCESS_OFFSET(ident);
- bytes = VMCB_ACCESS_BYTES(ident);
-
- if ((off + bytes) >= sizeof (struct vmcb))
- return (EINVAL);
-
- ptr = (char *)vmcb;
-
- if (!write)
- *val = 0;
-
- switch (bytes) {
- case 8:
- case 4:
- case 2:
- if (write)
- memcpy(ptr + off, val, bytes);
- else
- memcpy(val, ptr + off, bytes);
- break;
- default:
- VCPU_CTR1(softc->vm, vcpu,
- "Invalid size %d for VMCB access: %d", bytes);
- return (EINVAL);
- }
-
- /* Invalidate all VMCB state cached by h/w. */
- if (write)
- svm_set_dirty(softc, vcpu, 0xffffffff);
-
- return (0);
-}
-
-/*
- * Read from segment selector, control and general purpose register of VMCB.
- */
-int
-vmcb_read(struct svm_softc *sc, int vcpu, int ident, uint64_t *retval)
-{
- struct vmcb *vmcb;
struct vmcb_state *state;
- struct vmcb_segment *seg;
- int err;
+ uint64_t *res = NULL;
+ uint32_t dirty = VMCB_CACHE_NONE;
- vmcb = svm_get_vmcb(sc, vcpu);
state = &vmcb->state;
- err = 0;
-
- if (VMCB_ACCESS_OK(ident))
- return (vmcb_access(sc, vcpu, 0, ident, retval));
switch (ident) {
case VM_REG_GUEST_CR0:
- *retval = state->cr0;
+ res = &state->cr0;
+ dirty = VMCB_CACHE_CR;
break;
case VM_REG_GUEST_CR2:
- *retval = state->cr2;
+ res = &state->cr2;
+ dirty = VMCB_CACHE_CR2;
break;
case VM_REG_GUEST_CR3:
- *retval = state->cr3;
+ res = &state->cr3;
+ dirty = VMCB_CACHE_CR;
break;
case VM_REG_GUEST_CR4:
- *retval = state->cr4;
+ res = &state->cr4;
+ dirty = VMCB_CACHE_CR;
break;
case VM_REG_GUEST_DR6:
- *retval = state->dr6;
+ res = &state->dr6;
+ dirty = VMCB_CACHE_DR;
break;
case VM_REG_GUEST_DR7:
- *retval = state->dr7;
+ res = &state->dr7;
+ dirty = VMCB_CACHE_DR;
break;
case VM_REG_GUEST_EFER:
- *retval = state->efer;
+ res = &state->efer;
+ dirty = VMCB_CACHE_CR;
break;
case VM_REG_GUEST_RAX:
- *retval = state->rax;
+ res = &state->rax;
break;
case VM_REG_GUEST_RFLAGS:
- *retval = state->rflags;
+ res = &state->rflags;
break;
case VM_REG_GUEST_RIP:
- *retval = state->rip;
+ res = &state->rip;
break;
case VM_REG_GUEST_RSP:
- *retval = state->rsp;
- break;
-
- case VM_REG_GUEST_CS:
- case VM_REG_GUEST_DS:
- case VM_REG_GUEST_ES:
- case VM_REG_GUEST_FS:
- case VM_REG_GUEST_GS:
- case VM_REG_GUEST_SS:
- case VM_REG_GUEST_LDTR:
- case VM_REG_GUEST_TR:
- seg = vmcb_segptr(vmcb, ident);
- KASSERT(seg != NULL, ("%s: unable to get segment %d from VMCB",
- __func__, ident));
- *retval = seg->selector;
+ res = &state->rsp;
break;
- case VM_REG_GUEST_GDTR:
- case VM_REG_GUEST_IDTR:
- /* GDTR and IDTR don't have segment selectors */
- err = EINVAL;
- break;
default:
- err = EINVAL;
+ panic("unexpected register %d", ident);
break;
}
- return (err);
-}
-
-/*
- * Write to segment selector, control and general purpose register of VMCB.
- */
-int
-vmcb_write(struct svm_softc *sc, int vcpu, int ident, uint64_t val)
-{
- struct vmcb *vmcb;
- struct vmcb_state *state;
- struct vmcb_segment *seg;
- int err, dirtyseg;
-
- vmcb = svm_get_vmcb(sc, vcpu);
- state = &vmcb->state;
- dirtyseg = 0;
- err = 0;
-
- if (VMCB_ACCESS_OK(ident))
- return (vmcb_access(sc, vcpu, 1, ident, &val));
-
- switch (ident) {
- case VM_REG_GUEST_CR0:
- state->cr0 = val;
- svm_set_dirty(sc, vcpu, VMCB_CACHE_CR);
- break;
-
- case VM_REG_GUEST_CR2:
- state->cr2 = val;
- svm_set_dirty(sc, vcpu, VMCB_CACHE_CR2);
- break;
-
- case VM_REG_GUEST_CR3:
- state->cr3 = val;
- svm_set_dirty(sc, vcpu, VMCB_CACHE_CR);
- break;
-
- case VM_REG_GUEST_CR4:
- state->cr4 = val;
- svm_set_dirty(sc, vcpu, VMCB_CACHE_CR);
- break;
-
- case VM_REG_GUEST_DR6:
- state->dr6 = val;
- svm_set_dirty(sc, vcpu, VMCB_CACHE_DR);
- break;
-
- case VM_REG_GUEST_DR7:
- state->dr7 = val;
- svm_set_dirty(sc, vcpu, VMCB_CACHE_DR);
- break;
-
- case VM_REG_GUEST_EFER:
- /* EFER_SVM must always be set when the guest is executing */
- state->efer = val | EFER_SVM;
- svm_set_dirty(sc, vcpu, VMCB_CACHE_CR);
- break;
-
- case VM_REG_GUEST_RAX:
- state->rax = val;
- break;
-
- case VM_REG_GUEST_RFLAGS:
- state->rflags = val;
- break;
-
- case VM_REG_GUEST_RIP:
- state->rip = val;
- break;
-
- case VM_REG_GUEST_RSP:
- state->rsp = val;
- break;
-
- case VM_REG_GUEST_CS:
- case VM_REG_GUEST_DS:
- case VM_REG_GUEST_ES:
- case VM_REG_GUEST_SS:
- dirtyseg = 1; /* FALLTHROUGH */
- case VM_REG_GUEST_FS:
- case VM_REG_GUEST_GS:
- case VM_REG_GUEST_LDTR:
- case VM_REG_GUEST_TR:
- seg = vmcb_segptr(vmcb, ident);
- KASSERT(seg != NULL, ("%s: unable to get segment %d from VMCB",
- __func__, ident));
- seg->selector = val;
- if (dirtyseg)
- svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG);
- break;
-
- case VM_REG_GUEST_GDTR:
- case VM_REG_GUEST_IDTR:
- /* GDTR and IDTR don't have segment selectors */
- err = EINVAL;
- break;
- default:
- err = EINVAL;
- break;
- }
-
- return (err);
-}
-
-int
-vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg2)
-{
- struct vmcb_segment *seg;
-
- seg = vmcb_segptr(vmcb, ident);
- if (seg != NULL) {
- bcopy(seg, seg2, sizeof(struct vmcb_segment));
- return (0);
- } else {
- return (EINVAL);
- }
-}
-
-int
-vmcb_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
-{
- struct vmcb *vmcb;
- struct svm_softc *sc;
- struct vmcb_segment *seg;
- uint16_t attrib;
-
- sc = arg;
- vmcb = svm_get_vmcb(sc, vcpu);
-
- seg = vmcb_segptr(vmcb, reg);
- KASSERT(seg != NULL, ("%s: invalid segment descriptor %d",
- __func__, reg));
-
- seg->base = desc->base;
- seg->limit = desc->limit;
- if (reg != VM_REG_GUEST_GDTR && reg != VM_REG_GUEST_IDTR) {
- /*
- * Map seg_desc access to VMCB attribute format.
- *
- * SVM uses the 'P' bit in the segment attributes to indicate a
- * NULL segment so clear it if the segment is marked unusable.
- */
- attrib = ((desc->access & 0xF000) >> 4) | (desc->access & 0xFF);
- if (SEG_DESC_UNUSABLE(desc->access)) {
- attrib &= ~0x80;
- }
- seg->attrib = attrib;
- }
-
- VCPU_CTR4(sc->vm, vcpu, "Setting desc %d: base (%#lx), limit (%#x), "
- "attrib (%#x)", reg, seg->base, seg->limit, seg->attrib);
-
- switch (reg) {
- case VM_REG_GUEST_CS:
- case VM_REG_GUEST_DS:
- case VM_REG_GUEST_ES:
- case VM_REG_GUEST_SS:
- svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG);
- break;
- case VM_REG_GUEST_GDTR:
- case VM_REG_GUEST_IDTR:
- svm_set_dirty(sc, vcpu, VMCB_CACHE_DT);
- break;
- default:
- break;
+ ASSERT(res != NULL);
+ if (dirtyp != NULL) {
+ *dirtyp |= dirty;
}
-
- return (0);
-}
-
-int
-vmcb_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
-{
- struct vmcb *vmcb;
- struct svm_softc *sc;
- struct vmcb_segment *seg;
-
- sc = arg;
- vmcb = svm_get_vmcb(sc, vcpu);
- seg = vmcb_segptr(vmcb, reg);
- KASSERT(seg != NULL, ("%s: invalid segment descriptor %d",
- __func__, reg));
-
- desc->base = seg->base;
- desc->limit = seg->limit;
- desc->access = 0;
-
- if (reg != VM_REG_GUEST_GDTR && reg != VM_REG_GUEST_IDTR) {
- /* Map seg_desc access to VMCB attribute format */
- desc->access = ((seg->attrib & 0xF00) << 4) |
- (seg->attrib & 0xFF);
-
- /*
- * VT-x uses bit 16 to indicate a segment that has been loaded
- * with a NULL selector (aka unusable). The 'desc->access'
- * field is interpreted in the VT-x format by the
- * processor-independent code.
- *
- * SVM uses the 'P' bit to convey the same information so
- * convert it into the VT-x format. For more details refer to
- * section "Segment State in the VMCB" in APMv2.
- */
- if (reg != VM_REG_GUEST_CS && reg != VM_REG_GUEST_TR) {
- if ((desc->access & 0x80) == 0)
- desc->access |= 0x10000; /* Unusable segment */
- }
- }
-
- return (0);
+ return (res);
}
diff --git a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
index 88f65df66a..e3593db7f9 100644
--- a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
+++ b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h
@@ -212,15 +212,6 @@ struct svm_softc;
#define VMCB_OFF_SYSENTER_EIP VMCB_OFF_STATE(0x238)
#define VMCB_OFF_GUEST_PAT VMCB_OFF_STATE(0x268)
-/*
- * Encode the VMCB offset and bytes that we want to read from VMCB.
- */
-#define VMCB_ACCESS(o, w) (0x80000000 | (((w) & 0xF) << 16) | \
- ((o) & 0xFFF))
-#define VMCB_ACCESS_OK(v) ((v) & 0x80000000 )
-#define VMCB_ACCESS_BYTES(v) (((v) >> 16) & 0xF)
-#define VMCB_ACCESS_OFFSET(v) ((v) & 0xFFF)
-
#ifdef _KERNEL
/* VMCB save state area segment format */
struct vmcb_segment {
@@ -231,6 +222,10 @@ struct vmcb_segment {
};
CTASSERT(sizeof(struct vmcb_segment) == 16);
+/* Convert to/from vmcb segment access to generic (VMX) access */
+#define VMCB_ATTR2ACCESS(attr) ((((attr) & 0xf00) << 4) | ((attr) & 0xff))
+#define VMCB_ACCESS2ATTR(acc) ((((acc) & 0xf000) >> 4) | ((acc) & 0xff))
+
/* Code segment descriptor attribute in 12 bit format as saved by VMCB. */
#define VMCB_CS_ATTRIB_L BIT(9) /* Long mode. */
#define VMCB_CS_ATTRIB_D BIT(10) /* OPerand size bit. */
@@ -360,6 +355,15 @@ struct vmcb_state {
CTASSERT(sizeof(struct vmcb_state) == 0xC00);
CTASSERT(offsetof(struct vmcb_state, int_to) == 0x290);
+/*
+ * The VMCB aka Virtual Machine Control Block is a 4KB aligned page
+ * in memory that describes the virtual machine.
+ *
+ * The VMCB contains:
+ * - instructions or events in the guest to intercept
+ * - control bits that modify execution environment of the guest
+ * - guest processor state (e.g. general purpose registers)
+ */
struct vmcb {
struct vmcb_ctrl ctrl;
struct vmcb_state state;
@@ -367,11 +371,8 @@ struct vmcb {
CTASSERT(sizeof(struct vmcb) == PAGE_SIZE);
CTASSERT(offsetof(struct vmcb, state) == 0x400);
-int vmcb_read(struct svm_softc *sc, int vcpu, int ident, uint64_t *retval);
-int vmcb_write(struct svm_softc *sc, int vcpu, int ident, uint64_t val);
-int vmcb_setdesc(void *arg, int vcpu, int ident, struct seg_desc *desc);
-int vmcb_getdesc(void *arg, int vcpu, int ident, struct seg_desc *desc);
-int vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg);
+struct vmcb_segment *vmcb_segptr(struct vmcb *vmcb, int type);
+uint64_t *vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp);
#endif /* _KERNEL */
#endif /* _VMCB_H_ */