diff options
author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2020-09-03 12:02:23 +0000 |
---|---|---|
committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2020-09-03 12:02:23 +0000 |
commit | 3ad7abf4f2e336baf268f2e63be234adf72c81eb (patch) | |
tree | 4d48604395ad47cfacf35e5652c47bca662cf787 | |
parent | e0f664ec13fc70811953d2a807296a60b253b5a2 (diff) | |
parent | 096bb5cb663d8fa04eda9a4aceb4d82a9cbae42c (diff) | |
download | illumos-joyent-3ad7abf4f2e336baf268f2e63be234adf72c81eb.tar.gz |
[illumos-gate merge]
commit 096bb5cb663d8fa04eda9a4aceb4d82a9cbae42c
13072 clarify VMCB interface in bhyve
commit 2ad530425ac9cd3f429e64463a85f6f58703061c
12976 system panics with error in IP module
-rw-r--r-- | usr/src/cmd/bhyvectl/bhyvectl.c | 18 | ||||
-rw-r--r-- | usr/src/uts/common/inet/ip/ipclassifier.c | 9 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp.c | 40 | ||||
-rw-r--r-- | usr/src/uts/common/inet/tcp/tcp_output.c | 20 | ||||
-rw-r--r-- | usr/src/uts/common/sys/socket_proto.h | 10 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/amd/svm.c | 312 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/amd/vmcb.c | 405 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/amd/vmcb.h | 29 |
8 files changed, 388 insertions, 455 deletions
diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c index e0041ede30..7f8847b184 100644 --- a/usr/src/cmd/bhyvectl/bhyvectl.c +++ b/usr/src/cmd/bhyvectl/bhyvectl.c @@ -580,6 +580,7 @@ vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val) } #endif /* __FreeBSD__ */ +#ifdef __FreeBSD__ static int vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, uint64_t *ret_val) @@ -595,6 +596,23 @@ vm_set_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, return (vm_set_register(ctx, vcpu, VMCB_ACCESS(off, bytes), val)); } +#else /* __FreeBSD__ */ +/* Arbitrary VMCB read/write is not allowed */ +static int +vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, + uint64_t *ret_val) +{ + *ret_val = 0; + return (0); +} + +static int +vm_set_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, + uint64_t val) +{ + return (EINVAL); +} +#endif /* __FreeBSD__ */ enum { VMNAME = 1000, /* avoid collision with return values from getopt */ diff --git a/usr/src/uts/common/inet/ip/ipclassifier.c b/usr/src/uts/common/inet/ip/ipclassifier.c index 4f3ec2d817..69af77db9a 100644 --- a/usr/src/uts/common/inet/ip/ipclassifier.c +++ b/usr/src/uts/common/inet/ip/ipclassifier.c @@ -22,6 +22,7 @@ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2016 Joyent, Inc. * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2020 Joyent, Inc. */ /* @@ -2772,7 +2773,11 @@ conn_get_socket_info(conn_t *connp, mib2_socketInfoEntry_t *sie) return (NULL); } - mutex_exit(&connp->conn_lock); + /* + * Continue to hold conn_lock because we don't want to race with an + * in-progress close, which will have set-to-NULL (and destroyed + * upper_handle, aka sonode (and vnode)) BEFORE setting CONN_CLOSING. + */ if (connp->conn_upper_handle != NULL) { vn = (*connp->conn_upcalls->su_get_vnode) @@ -2784,6 +2789,8 @@ conn_get_socket_info(conn_t *connp, mib2_socketInfoEntry_t *sie) flags |= MIB2_SOCKINFO_STREAM; } + mutex_exit(&connp->conn_lock); + if (vn == NULL || VOP_GETATTR(vn, &attr, 0, CRED(), NULL) != 0) { if (vn != NULL) VN_RELE(vn); diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c index 554fe8b78f..88d558fd10 100644 --- a/usr/src/uts/common/inet/tcp/tcp.c +++ b/usr/src/uts/common/inet/tcp/tcp.c @@ -21,10 +21,10 @@ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2019 Joyent, Inc. * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013, 2017 by Delphix. All rights reserved. * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved. + * Copyright 2020 Joyent, Inc. */ /* Copyright (c) 1990 Mentat Inc. */ @@ -1018,10 +1018,23 @@ finish: /* If we have an upper handle (socket), release it */ if (IPCL_IS_NONSTR(connp)) { - ASSERT(connp->conn_upper_handle != NULL); - (*connp->conn_upcalls->su_closed)(connp->conn_upper_handle); + sock_upcalls_t *upcalls = connp->conn_upcalls; + sock_upper_handle_t handle = connp->conn_upper_handle; + + ASSERT(upcalls != NULL); + ASSERT(upcalls->su_closed != NULL); + ASSERT(handle != NULL); + /* + * Set these to NULL first because closed() will free upper + * structures. Acquire conn_lock because an external caller + * like conn_get_socket_info() will upcall if these are + * non-NULL. + */ + mutex_enter(&connp->conn_lock); connp->conn_upper_handle = NULL; connp->conn_upcalls = NULL; + mutex_exit(&connp->conn_lock); + upcalls->su_closed(handle); } } @@ -1435,13 +1448,26 @@ tcp_free(tcp_t *tcp) * nothing to do other than clearing the field. */ if (connp->conn_upper_handle != NULL) { + sock_upcalls_t *upcalls = connp->conn_upcalls; + sock_upper_handle_t handle = connp->conn_upper_handle; + + /* + * Set these to NULL first because closed() will free upper + * structures. Acquire conn_lock because an external caller + * like conn_get_socket_info() will upcall if these are + * non-NULL. + */ + mutex_enter(&connp->conn_lock); + connp->conn_upper_handle = NULL; + connp->conn_upcalls = NULL; + mutex_exit(&connp->conn_lock); if (IPCL_IS_NONSTR(connp)) { - (*connp->conn_upcalls->su_closed)( - connp->conn_upper_handle); + ASSERT(upcalls != NULL); + ASSERT(upcalls->su_closed != NULL); + ASSERT(handle != NULL); + upcalls->su_closed(handle); tcp->tcp_detached = B_TRUE; } - connp->conn_upper_handle = NULL; - connp->conn_upcalls = NULL; } } diff --git a/usr/src/uts/common/inet/tcp/tcp_output.c b/usr/src/uts/common/inet/tcp/tcp_output.c index 7a0472f3dd..086668f435 100644 --- a/usr/src/uts/common/inet/tcp/tcp_output.c +++ b/usr/src/uts/common/inet/tcp/tcp_output.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2017 by Delphix. All rights reserved. - * Copyright 2019 Joyent, Inc. + * Copyright 2020 Joyent, Inc. */ /* This file contains all TCP output processing functions. */ @@ -1677,11 +1677,23 @@ finish: /* non-STREAM socket, release the upper handle */ if (IPCL_IS_NONSTR(connp)) { - ASSERT(connp->conn_upper_handle != NULL); - (*connp->conn_upcalls->su_closed) - (connp->conn_upper_handle); + sock_upcalls_t *upcalls = connp->conn_upcalls; + sock_upper_handle_t handle = connp->conn_upper_handle; + + ASSERT(upcalls != NULL); + ASSERT(upcalls->su_closed != NULL); + ASSERT(handle != NULL); + /* + * Set these to NULL first because closed() will free + * upper structures. Acquire conn_lock because an + * external caller like conn_get_socket_info() will + * upcall if these are non-NULL. + */ + mutex_enter(&connp->conn_lock); connp->conn_upper_handle = NULL; connp->conn_upcalls = NULL; + mutex_exit(&connp->conn_lock); + upcalls->su_closed(handle); } } diff --git a/usr/src/uts/common/sys/socket_proto.h b/usr/src/uts/common/sys/socket_proto.h index 4e1a4a0f35..825d0501c7 100644 --- a/usr/src/uts/common/sys/socket_proto.h +++ b/usr/src/uts/common/sys/socket_proto.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2020 Joyent, Inc. */ #ifndef _SYS_SOCKET_PROTO_H_ @@ -202,7 +203,16 @@ struct sock_upcalls_s { void (*su_signal_oob)(sock_upper_handle_t, ssize_t); void (*su_zcopy_notify)(sock_upper_handle_t); void (*su_set_error)(sock_upper_handle_t, int); + /* + * NOTE: This function frees upper handle items. Caller cannot + * rely on them after this upcall. + */ void (*su_closed)(sock_upper_handle_t); + /* + * NOTE: This function MUST be implemented without using lower-level + * downcalls or accesses. This allows callers to ensure su_closed() + * upcalls can happen indepdently or concurrently. + */ vnode_t *(*su_get_vnode)(sock_upper_handle_t); }; diff --git a/usr/src/uts/i86pc/io/vmm/amd/svm.c b/usr/src/uts/i86pc/io/vmm/amd/svm.c index 1046a54126..431a3d6d8e 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/svm.c +++ b/usr/src/uts/i86pc/io/vmm/amd/svm.c @@ -672,22 +672,19 @@ svm_cpl(struct vmcb_state *state) static enum vm_cpu_mode svm_vcpu_mode(struct vmcb *vmcb) { - struct vmcb_segment seg; struct vmcb_state *state; - int error; state = &vmcb->state; if (state->efer & EFER_LMA) { - error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg); - KASSERT(error == 0, ("%s: vmcb_seg(cs) error %d", __func__, - error)); + struct vmcb_segment *seg; /* * Section 4.8.1 for APM2, check if Code Segment has * Long attribute set in descriptor. */ - if (seg.attrib & VMCB_CS_ATTRIB_L) + seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS); + if (seg->attrib & VMCB_CS_ATTRIB_L) return (CPU_MODE_64BIT); else return (CPU_MODE_COMPATIBILITY); @@ -848,10 +845,9 @@ svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit, struct vmcb *vmcb; struct vie *vie; struct vm_guest_paging paging; - struct vmcb_segment seg; + struct vmcb_segment *seg; char *inst_bytes = NULL; uint8_t inst_len = 0; - int error; vmcb = svm_get_vmcb(svm_sc, vcpu); ctrl = &vmcb->ctrl; @@ -861,22 +857,21 @@ svm_handle_mmio_emul(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit, vmexit->u.mmio_emul.gla = VIE_INVALID_GLA; svm_paging_info(vmcb, &paging); - error = vmcb_seg(vmcb, VM_REG_GUEST_CS, &seg); - KASSERT(error == 0, ("%s: vmcb_seg(CS) error %d", __func__, error)); - switch (paging.cpu_mode) { case CPU_MODE_REAL: - vmexit->u.mmio_emul.cs_base = seg.base; + seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS); + vmexit->u.mmio_emul.cs_base = seg->base; vmexit->u.mmio_emul.cs_d = 0; break; case CPU_MODE_PROTECTED: case CPU_MODE_COMPATIBILITY: - vmexit->u.mmio_emul.cs_base = seg.base; + seg = vmcb_segptr(vmcb, VM_REG_GUEST_CS); + vmexit->u.mmio_emul.cs_base = seg->base; /* * Section 4.8.1 of APM2, Default Operand Size or D bit. */ - vmexit->u.mmio_emul.cs_d = (seg.attrib & VMCB_CS_ATTRIB_D) ? + vmexit->u.mmio_emul.cs_d = (seg->attrib & VMCB_CS_ATTRIB_D) ? 1 : 0; break; default: @@ -1050,32 +1045,6 @@ disable_intr_window_exiting(struct svm_softc *sc, int vcpu) svm_disable_intercept(sc, vcpu, VMCB_CTRL1_INTCPT, VMCB_INTCPT_VINTR); } -static int -svm_modify_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t val) -{ - struct vmcb_ctrl *ctrl; - int oldval, newval; - - ctrl = svm_get_vmcb_ctrl(sc, vcpu); - oldval = ctrl->intr_shadow; - newval = val ? 1 : 0; - if (newval != oldval) { - ctrl->intr_shadow = newval; - VCPU_CTR1(sc->vm, vcpu, "Setting intr_shadow to %d", newval); - } - return (0); -} - -static int -svm_get_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t *val) -{ - struct vmcb_ctrl *ctrl; - - ctrl = svm_get_vmcb_ctrl(sc, vcpu); - *val = ctrl->intr_shadow; - return (0); -} - /* * Once an NMI is injected it blocks delivery of further NMIs until the handler * executes an IRET. The IRET intercept is enabled when an NMI is injected to @@ -1103,7 +1072,7 @@ enable_nmi_blocking(struct svm_softc *sc, int vcpu) static void clear_nmi_blocking(struct svm_softc *sc, int vcpu) { - int error; + struct vmcb_ctrl *ctrl; KASSERT(nmi_blocked(sc, vcpu), ("vNMI already unblocked")); VCPU_CTR0(sc->vm, vcpu, "vNMI blocking cleared"); @@ -1124,8 +1093,8 @@ clear_nmi_blocking(struct svm_softc *sc, int vcpu) * Set 'intr_shadow' to prevent an NMI from being injected on the * immediate VMRUN. */ - error = svm_modify_intr_shadow(sc, vcpu, 1); - KASSERT(!error, ("%s: error %d setting intr_shadow", __func__, error)); + ctrl = svm_get_vmcb_ctrl(sc, vcpu); + ctrl->intr_shadow = 1; } #define EFER_MBZ_BITS 0xFFFFFFFFFFFF0200UL @@ -2155,7 +2124,6 @@ svm_vmcleanup(void *arg) static register_t * swctx_regptr(struct svm_regctx *regctx, int reg) { - switch (reg) { case VM_REG_GUEST_RBX: return (®ctx->sctx_rbx); @@ -2201,56 +2169,135 @@ swctx_regptr(struct svm_regctx *regctx, int reg) static int svm_getreg(void *arg, int vcpu, int ident, uint64_t *val) { - struct svm_softc *svm_sc; - register_t *reg; - - svm_sc = arg; + struct svm_softc *sc; + struct vmcb *vmcb; + register_t *regp; + uint64_t *fieldp; + struct vmcb_segment *seg; - if (ident == VM_REG_GUEST_INTR_SHADOW) { - return (svm_get_intr_shadow(svm_sc, vcpu, val)); - } + sc = arg; + vmcb = svm_get_vmcb(sc, vcpu); - if (vmcb_read(svm_sc, vcpu, ident, val) == 0) { + regp = swctx_regptr(svm_get_guest_regctx(sc, vcpu), ident); + if (regp != NULL) { + *val = *regp; return (0); } - reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident); + switch (ident) { + case VM_REG_GUEST_INTR_SHADOW: + *val = (vmcb->ctrl.intr_shadow != 0) ? 1 : 0; + break; - if (reg != NULL) { - *val = *reg; - return (0); + case VM_REG_GUEST_CR0: + case VM_REG_GUEST_CR2: + case VM_REG_GUEST_CR3: + case VM_REG_GUEST_CR4: + case VM_REG_GUEST_DR6: + case VM_REG_GUEST_DR7: + case VM_REG_GUEST_EFER: + case VM_REG_GUEST_RAX: + case VM_REG_GUEST_RFLAGS: + case VM_REG_GUEST_RIP: + case VM_REG_GUEST_RSP: + fieldp = vmcb_regptr(vmcb, ident, NULL); + *val = *fieldp; + break; + + case VM_REG_GUEST_CS: + case VM_REG_GUEST_DS: + case VM_REG_GUEST_ES: + case VM_REG_GUEST_FS: + case VM_REG_GUEST_GS: + case VM_REG_GUEST_SS: + case VM_REG_GUEST_LDTR: + case VM_REG_GUEST_TR: + seg = vmcb_segptr(vmcb, ident); + *val = seg->selector; + break; + + case VM_REG_GUEST_GDTR: + case VM_REG_GUEST_IDTR: + /* GDTR and IDTR don't have segment selectors */ + return (EINVAL); + + default: + return (EINVAL); } - VCPU_CTR1(svm_sc->vm, vcpu, "svm_getreg: unknown register %#x", ident); - return (EINVAL); + return (0); } static int svm_setreg(void *arg, int vcpu, int ident, uint64_t val) { - struct svm_softc *svm_sc; - register_t *reg; - - svm_sc = arg; + struct svm_softc *sc; + struct vmcb *vmcb; + register_t *regp; + uint64_t *fieldp; + uint32_t dirty; + struct vmcb_segment *seg; - if (ident == VM_REG_GUEST_INTR_SHADOW) { - return (svm_modify_intr_shadow(svm_sc, vcpu, val)); - } + sc = arg; + vmcb = svm_get_vmcb(sc, vcpu); - if (vmcb_write(svm_sc, vcpu, ident, val) == 0) { + regp = swctx_regptr(svm_get_guest_regctx(sc, vcpu), ident); + if (regp != NULL) { + *regp = val; return (0); } - reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident); + dirty = VMCB_CACHE_NONE; + switch (ident) { + case VM_REG_GUEST_INTR_SHADOW: + vmcb->ctrl.intr_shadow = (val != 0) ? 1 : 0; + break; - if (reg != NULL) { - *reg = val; - return (0); + case VM_REG_GUEST_EFER: + fieldp = vmcb_regptr(vmcb, ident, &dirty); + /* EFER_SVM must always be set when the guest is executing */ + *fieldp = val | EFER_SVM; + dirty |= VMCB_CACHE_CR; + break; + + case VM_REG_GUEST_CR0: + case VM_REG_GUEST_CR2: + case VM_REG_GUEST_CR3: + case VM_REG_GUEST_CR4: + case VM_REG_GUEST_DR6: + case VM_REG_GUEST_DR7: + case VM_REG_GUEST_RAX: + case VM_REG_GUEST_RFLAGS: + case VM_REG_GUEST_RIP: + case VM_REG_GUEST_RSP: + fieldp = vmcb_regptr(vmcb, ident, &dirty); + *fieldp = val; + break; + + case VM_REG_GUEST_CS: + case VM_REG_GUEST_DS: + case VM_REG_GUEST_ES: + case VM_REG_GUEST_SS: + case VM_REG_GUEST_FS: + case VM_REG_GUEST_GS: + case VM_REG_GUEST_LDTR: + case VM_REG_GUEST_TR: + dirty |= VMCB_CACHE_SEG; + seg = vmcb_segptr(vmcb, ident); + seg->selector = (uint16_t)val; + break; + + case VM_REG_GUEST_GDTR: + case VM_REG_GUEST_IDTR: + /* GDTR and IDTR don't have segment selectors */ + return (EINVAL); + + default: + return (EINVAL); } - if (ident == VM_REG_GUEST_ENTRY_INST_LENGTH) { - /* Ignore. */ - return (0); + if (dirty != VMCB_CACHE_NONE) { + svm_set_dirty(sc, vcpu, dirty); } /* @@ -2259,8 +2306,119 @@ svm_setreg(void *arg, int vcpu, int ident, uint64_t val) * whether 'running' is true/false. */ - VCPU_CTR1(svm_sc->vm, vcpu, "svm_setreg: unknown register %#x", ident); - return (EINVAL); + return (0); +} + +static int +svm_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +{ + struct vmcb *vmcb; + struct svm_softc *sc; + struct vmcb_segment *seg; + + sc = arg; + vmcb = svm_get_vmcb(sc, vcpu); + + switch (reg) { + case VM_REG_GUEST_CS: + case VM_REG_GUEST_DS: + case VM_REG_GUEST_ES: + case VM_REG_GUEST_SS: + case VM_REG_GUEST_FS: + case VM_REG_GUEST_GS: + case VM_REG_GUEST_LDTR: + case VM_REG_GUEST_TR: + svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG); + seg = vmcb_segptr(vmcb, reg); + /* + * Map seg_desc access to VMCB attribute format. + * + * SVM uses the 'P' bit in the segment attributes to indicate a + * NULL segment so clear it if the segment is marked unusable. + */ + seg->attrib = VMCB_ACCESS2ATTR(desc->access); + if (SEG_DESC_UNUSABLE(desc->access)) { + seg->attrib &= ~0x80; + } + break; + + case VM_REG_GUEST_GDTR: + case VM_REG_GUEST_IDTR: + svm_set_dirty(sc, vcpu, VMCB_CACHE_DT); + seg = vmcb_segptr(vmcb, reg); + break; + + default: + return (EINVAL); + } + + ASSERT(seg != NULL); + seg->base = desc->base; + seg->limit = desc->limit; + + return (0); +} + +static int +svm_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +{ + struct vmcb *vmcb; + struct svm_softc *sc; + struct vmcb_segment *seg; + + sc = arg; + vmcb = svm_get_vmcb(sc, vcpu); + + switch (reg) { + case VM_REG_GUEST_DS: + case VM_REG_GUEST_ES: + case VM_REG_GUEST_FS: + case VM_REG_GUEST_GS: + case VM_REG_GUEST_SS: + case VM_REG_GUEST_LDTR: + seg = vmcb_segptr(vmcb, reg); + desc->access = VMCB_ATTR2ACCESS(seg->attrib); + /* + * VT-x uses bit 16 to indicate a segment that has been loaded + * with a NULL selector (aka unusable). The 'desc->access' + * field is interpreted in the VT-x format by the + * processor-independent code. + * + * SVM uses the 'P' bit to convey the same information so + * convert it into the VT-x format. For more details refer to + * section "Segment State in the VMCB" in APMv2. + */ + if ((desc->access & 0x80) == 0) { + /* Unusable segment */ + desc->access |= 0x10000; + } + break; + + case VM_REG_GUEST_CS: + case VM_REG_GUEST_TR: + seg = vmcb_segptr(vmcb, reg); + desc->access = VMCB_ATTR2ACCESS(seg->attrib); + break; + + case VM_REG_GUEST_GDTR: + case VM_REG_GUEST_IDTR: + seg = vmcb_segptr(vmcb, reg); + /* + * Since there are no access bits associated with the GDTR or + * the IDTR, zero out the field to ensure it does not contain + * garbage which might confuse the consumer. + */ + desc->access = 0; + break; + + default: + return (EINVAL); + } + + ASSERT(seg != NULL); + desc->base = seg->base; + desc->limit = seg->limit; + return (0); } static int @@ -2368,8 +2526,8 @@ struct vmm_ops vmm_ops_amd = { .vmcleanup = svm_vmcleanup, .vmgetreg = svm_getreg, .vmsetreg = svm_setreg, - .vmgetdesc = vmcb_getdesc, - .vmsetdesc = vmcb_setdesc, + .vmgetdesc = svm_getdesc, + .vmsetdesc = svm_setdesc, .vmgetcap = svm_getcap, .vmsetcap = svm_setcap, .vmspace_alloc = svm_npt_alloc, diff --git a/usr/src/uts/i86pc/io/vmm/amd/vmcb.c b/usr/src/uts/i86pc/io/vmm/amd/vmcb.c index 5075b69867..b00f974c23 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/vmcb.c +++ b/usr/src/uts/i86pc/io/vmm/amd/vmcb.c @@ -26,429 +26,130 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * Copyright 2020 Oxide Computer Company + */ + #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> -#include <machine/segments.h> -#include <machine/specialreg.h> #include <machine/vmm.h> -#include "vmm_ktr.h" - #include "vmcb.h" #include "svm.h" -#include "svm_softc.h" -/* - * The VMCB aka Virtual Machine Control Block is a 4KB aligned page - * in memory that describes the virtual machine. - * - * The VMCB contains: - * - instructions or events in the guest to intercept - * - control bits that modify execution environment of the guest - * - guest processor state (e.g. general purpose registers) - */ - -/* - * Return VMCB segment area. - */ -static struct vmcb_segment * +struct vmcb_segment * vmcb_segptr(struct vmcb *vmcb, int type) { - struct vmcb_state *state; - struct vmcb_segment *seg; - - state = &vmcb->state; + struct vmcb_state *state = &vmcb->state; switch (type) { case VM_REG_GUEST_CS: - seg = &state->cs; - break; - + return (&state->cs); case VM_REG_GUEST_DS: - seg = &state->ds; - break; - + return (&state->ds); case VM_REG_GUEST_ES: - seg = &state->es; - break; - + return (&state->es); case VM_REG_GUEST_FS: - seg = &state->fs; - break; - + return (&state->fs); case VM_REG_GUEST_GS: - seg = &state->gs; - break; - + return (&state->gs); case VM_REG_GUEST_SS: - seg = &state->ss; - break; - + return (&state->ss); case VM_REG_GUEST_GDTR: - seg = &state->gdt; - break; - + return (&state->gdt); case VM_REG_GUEST_IDTR: - seg = &state->idt; - break; - + return (&state->idt); case VM_REG_GUEST_LDTR: - seg = &state->ldt; - break; - + return (&state->ldt); case VM_REG_GUEST_TR: - seg = &state->tr; - break; - + return (&state->tr); default: - seg = NULL; - break; + panic("unexpected seg %d", type); } - - return (seg); } -static int -vmcb_access(struct svm_softc *softc, int vcpu, int write, int ident, - uint64_t *val) +uint64_t * +vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp) { - struct vmcb *vmcb; - int off, bytes; - char *ptr; - - vmcb = svm_get_vmcb(softc, vcpu); - off = VMCB_ACCESS_OFFSET(ident); - bytes = VMCB_ACCESS_BYTES(ident); - - if ((off + bytes) >= sizeof (struct vmcb)) - return (EINVAL); - - ptr = (char *)vmcb; - - if (!write) - *val = 0; - - switch (bytes) { - case 8: - case 4: - case 2: - if (write) - memcpy(ptr + off, val, bytes); - else - memcpy(val, ptr + off, bytes); - break; - default: - VCPU_CTR1(softc->vm, vcpu, - "Invalid size %d for VMCB access: %d", bytes); - return (EINVAL); - } - - /* Invalidate all VMCB state cached by h/w. */ - if (write) - svm_set_dirty(softc, vcpu, 0xffffffff); - - return (0); -} - -/* - * Read from segment selector, control and general purpose register of VMCB. - */ -int -vmcb_read(struct svm_softc *sc, int vcpu, int ident, uint64_t *retval) -{ - struct vmcb *vmcb; struct vmcb_state *state; - struct vmcb_segment *seg; - int err; + uint64_t *res = NULL; + uint32_t dirty = VMCB_CACHE_NONE; - vmcb = svm_get_vmcb(sc, vcpu); state = &vmcb->state; - err = 0; - - if (VMCB_ACCESS_OK(ident)) - return (vmcb_access(sc, vcpu, 0, ident, retval)); switch (ident) { case VM_REG_GUEST_CR0: - *retval = state->cr0; + res = &state->cr0; + dirty = VMCB_CACHE_CR; break; case VM_REG_GUEST_CR2: - *retval = state->cr2; + res = &state->cr2; + dirty = VMCB_CACHE_CR2; break; case VM_REG_GUEST_CR3: - *retval = state->cr3; + res = &state->cr3; + dirty = VMCB_CACHE_CR; break; case VM_REG_GUEST_CR4: - *retval = state->cr4; + res = &state->cr4; + dirty = VMCB_CACHE_CR; break; case VM_REG_GUEST_DR6: - *retval = state->dr6; + res = &state->dr6; + dirty = VMCB_CACHE_DR; break; case VM_REG_GUEST_DR7: - *retval = state->dr7; + res = &state->dr7; + dirty = VMCB_CACHE_DR; break; case VM_REG_GUEST_EFER: - *retval = state->efer; + res = &state->efer; + dirty = VMCB_CACHE_CR; break; case VM_REG_GUEST_RAX: - *retval = state->rax; + res = &state->rax; break; case VM_REG_GUEST_RFLAGS: - *retval = state->rflags; + res = &state->rflags; break; case VM_REG_GUEST_RIP: - *retval = state->rip; + res = &state->rip; break; case VM_REG_GUEST_RSP: - *retval = state->rsp; - break; - - case VM_REG_GUEST_CS: - case VM_REG_GUEST_DS: - case VM_REG_GUEST_ES: - case VM_REG_GUEST_FS: - case VM_REG_GUEST_GS: - case VM_REG_GUEST_SS: - case VM_REG_GUEST_LDTR: - case VM_REG_GUEST_TR: - seg = vmcb_segptr(vmcb, ident); - KASSERT(seg != NULL, ("%s: unable to get segment %d from VMCB", - __func__, ident)); - *retval = seg->selector; + res = &state->rsp; break; - case VM_REG_GUEST_GDTR: - case VM_REG_GUEST_IDTR: - /* GDTR and IDTR don't have segment selectors */ - err = EINVAL; - break; default: - err = EINVAL; + panic("unexpected register %d", ident); break; } - return (err); -} - -/* - * Write to segment selector, control and general purpose register of VMCB. - */ -int -vmcb_write(struct svm_softc *sc, int vcpu, int ident, uint64_t val) -{ - struct vmcb *vmcb; - struct vmcb_state *state; - struct vmcb_segment *seg; - int err, dirtyseg; - - vmcb = svm_get_vmcb(sc, vcpu); - state = &vmcb->state; - dirtyseg = 0; - err = 0; - - if (VMCB_ACCESS_OK(ident)) - return (vmcb_access(sc, vcpu, 1, ident, &val)); - - switch (ident) { - case VM_REG_GUEST_CR0: - state->cr0 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR); - break; - - case VM_REG_GUEST_CR2: - state->cr2 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR2); - break; - - case VM_REG_GUEST_CR3: - state->cr3 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR); - break; - - case VM_REG_GUEST_CR4: - state->cr4 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR); - break; - - case VM_REG_GUEST_DR6: - state->dr6 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_DR); - break; - - case VM_REG_GUEST_DR7: - state->dr7 = val; - svm_set_dirty(sc, vcpu, VMCB_CACHE_DR); - break; - - case VM_REG_GUEST_EFER: - /* EFER_SVM must always be set when the guest is executing */ - state->efer = val | EFER_SVM; - svm_set_dirty(sc, vcpu, VMCB_CACHE_CR); - break; - - case VM_REG_GUEST_RAX: - state->rax = val; - break; - - case VM_REG_GUEST_RFLAGS: - state->rflags = val; - break; - - case VM_REG_GUEST_RIP: - state->rip = val; - break; - - case VM_REG_GUEST_RSP: - state->rsp = val; - break; - - case VM_REG_GUEST_CS: - case VM_REG_GUEST_DS: - case VM_REG_GUEST_ES: - case VM_REG_GUEST_SS: - dirtyseg = 1; /* FALLTHROUGH */ - case VM_REG_GUEST_FS: - case VM_REG_GUEST_GS: - case VM_REG_GUEST_LDTR: - case VM_REG_GUEST_TR: - seg = vmcb_segptr(vmcb, ident); - KASSERT(seg != NULL, ("%s: unable to get segment %d from VMCB", - __func__, ident)); - seg->selector = val; - if (dirtyseg) - svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG); - break; - - case VM_REG_GUEST_GDTR: - case VM_REG_GUEST_IDTR: - /* GDTR and IDTR don't have segment selectors */ - err = EINVAL; - break; - default: - err = EINVAL; - break; - } - - return (err); -} - -int -vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg2) -{ - struct vmcb_segment *seg; - - seg = vmcb_segptr(vmcb, ident); - if (seg != NULL) { - bcopy(seg, seg2, sizeof(struct vmcb_segment)); - return (0); - } else { - return (EINVAL); - } -} - -int -vmcb_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) -{ - struct vmcb *vmcb; - struct svm_softc *sc; - struct vmcb_segment *seg; - uint16_t attrib; - - sc = arg; - vmcb = svm_get_vmcb(sc, vcpu); - - seg = vmcb_segptr(vmcb, reg); - KASSERT(seg != NULL, ("%s: invalid segment descriptor %d", - __func__, reg)); - - seg->base = desc->base; - seg->limit = desc->limit; - if (reg != VM_REG_GUEST_GDTR && reg != VM_REG_GUEST_IDTR) { - /* - * Map seg_desc access to VMCB attribute format. - * - * SVM uses the 'P' bit in the segment attributes to indicate a - * NULL segment so clear it if the segment is marked unusable. - */ - attrib = ((desc->access & 0xF000) >> 4) | (desc->access & 0xFF); - if (SEG_DESC_UNUSABLE(desc->access)) { - attrib &= ~0x80; - } - seg->attrib = attrib; - } - - VCPU_CTR4(sc->vm, vcpu, "Setting desc %d: base (%#lx), limit (%#x), " - "attrib (%#x)", reg, seg->base, seg->limit, seg->attrib); - - switch (reg) { - case VM_REG_GUEST_CS: - case VM_REG_GUEST_DS: - case VM_REG_GUEST_ES: - case VM_REG_GUEST_SS: - svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG); - break; - case VM_REG_GUEST_GDTR: - case VM_REG_GUEST_IDTR: - svm_set_dirty(sc, vcpu, VMCB_CACHE_DT); - break; - default: - break; + ASSERT(res != NULL); + if (dirtyp != NULL) { + *dirtyp |= dirty; } - - return (0); -} - -int -vmcb_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) -{ - struct vmcb *vmcb; - struct svm_softc *sc; - struct vmcb_segment *seg; - - sc = arg; - vmcb = svm_get_vmcb(sc, vcpu); - seg = vmcb_segptr(vmcb, reg); - KASSERT(seg != NULL, ("%s: invalid segment descriptor %d", - __func__, reg)); - - desc->base = seg->base; - desc->limit = seg->limit; - desc->access = 0; - - if (reg != VM_REG_GUEST_GDTR && reg != VM_REG_GUEST_IDTR) { - /* Map seg_desc access to VMCB attribute format */ - desc->access = ((seg->attrib & 0xF00) << 4) | - (seg->attrib & 0xFF); - - /* - * VT-x uses bit 16 to indicate a segment that has been loaded - * with a NULL selector (aka unusable). The 'desc->access' - * field is interpreted in the VT-x format by the - * processor-independent code. - * - * SVM uses the 'P' bit to convey the same information so - * convert it into the VT-x format. For more details refer to - * section "Segment State in the VMCB" in APMv2. - */ - if (reg != VM_REG_GUEST_CS && reg != VM_REG_GUEST_TR) { - if ((desc->access & 0x80) == 0) - desc->access |= 0x10000; /* Unusable segment */ - } - } - - return (0); + return (res); } diff --git a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h index 88f65df66a..e3593db7f9 100644 --- a/usr/src/uts/i86pc/io/vmm/amd/vmcb.h +++ b/usr/src/uts/i86pc/io/vmm/amd/vmcb.h @@ -212,15 +212,6 @@ struct svm_softc; #define VMCB_OFF_SYSENTER_EIP VMCB_OFF_STATE(0x238) #define VMCB_OFF_GUEST_PAT VMCB_OFF_STATE(0x268) -/* - * Encode the VMCB offset and bytes that we want to read from VMCB. - */ -#define VMCB_ACCESS(o, w) (0x80000000 | (((w) & 0xF) << 16) | \ - ((o) & 0xFFF)) -#define VMCB_ACCESS_OK(v) ((v) & 0x80000000 ) -#define VMCB_ACCESS_BYTES(v) (((v) >> 16) & 0xF) -#define VMCB_ACCESS_OFFSET(v) ((v) & 0xFFF) - #ifdef _KERNEL /* VMCB save state area segment format */ struct vmcb_segment { @@ -231,6 +222,10 @@ struct vmcb_segment { }; CTASSERT(sizeof(struct vmcb_segment) == 16); +/* Convert to/from vmcb segment access to generic (VMX) access */ +#define VMCB_ATTR2ACCESS(attr) ((((attr) & 0xf00) << 4) | ((attr) & 0xff)) +#define VMCB_ACCESS2ATTR(acc) ((((acc) & 0xf000) >> 4) | ((acc) & 0xff)) + /* Code segment descriptor attribute in 12 bit format as saved by VMCB. */ #define VMCB_CS_ATTRIB_L BIT(9) /* Long mode. */ #define VMCB_CS_ATTRIB_D BIT(10) /* OPerand size bit. */ @@ -360,6 +355,15 @@ struct vmcb_state { CTASSERT(sizeof(struct vmcb_state) == 0xC00); CTASSERT(offsetof(struct vmcb_state, int_to) == 0x290); +/* + * The VMCB aka Virtual Machine Control Block is a 4KB aligned page + * in memory that describes the virtual machine. + * + * The VMCB contains: + * - instructions or events in the guest to intercept + * - control bits that modify execution environment of the guest + * - guest processor state (e.g. general purpose registers) + */ struct vmcb { struct vmcb_ctrl ctrl; struct vmcb_state state; @@ -367,11 +371,8 @@ struct vmcb { CTASSERT(sizeof(struct vmcb) == PAGE_SIZE); CTASSERT(offsetof(struct vmcb, state) == 0x400); -int vmcb_read(struct svm_softc *sc, int vcpu, int ident, uint64_t *retval); -int vmcb_write(struct svm_softc *sc, int vcpu, int ident, uint64_t val); -int vmcb_setdesc(void *arg, int vcpu, int ident, struct seg_desc *desc); -int vmcb_getdesc(void *arg, int vcpu, int ident, struct seg_desc *desc); -int vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg); +struct vmcb_segment *vmcb_segptr(struct vmcb *vmcb, int type); +uint64_t *vmcb_regptr(struct vmcb *vmcb, int ident, uint32_t *dirtyp); #endif /* _KERNEL */ #endif /* _VMCB_H_ */ |