diff options
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/intel/vmx.c | 69 |
1 files changed, 64 insertions, 5 deletions
diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c index f628e0b8ab..89573cccec 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c @@ -3313,6 +3313,12 @@ do { \ } while (0) /* + * The least significant bit in the 'pending' field of the PIR descriptor + * indicates to the CPU that interrupts are pending in the 'pir' fields. + */ +#define PIR_MASK_PENDING 0x1 + +/* * vlapic->ops handlers that utilize the APICv hardware assist described in * Chapter 29 of the Intel SDM. */ @@ -3321,8 +3327,14 @@ vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level) { struct vlapic_vtx *vlapic_vtx; struct pir_desc *pir_desc; - uint64_t mask; + uint64_t mask, old; int idx, notify; + const uint_t prio = (vector & 0xf0) >> 4; + const uint64_t prio_mask = (1 << prio) | PIR_MASK_PENDING; + +#ifndef __FreeBSD__ + ASSERT(vector >= 0x10 && vector <= 0xff); +#endif vlapic_vtx = (struct vlapic_vtx *)vlapic; pir_desc = vlapic_vtx->pir_desc; @@ -3335,7 +3347,52 @@ vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level) idx = vector / 64; mask = 1UL << (vector % 64); atomic_set_long(&pir_desc->pir[idx], mask); - notify = atomic_cmpset_long(&pir_desc->pending, 0, 1); + + /* + * Deciding if vCPU notification is required when using PIR is + * complicated by interrupt priorities. It is not enough to simply + * notify when 'pending' makes the 0->1 transition. If an interrupt + * with a higher priority class than those already present is queued, + * its arrival necessitates a notification in case the vCPU is blocked + * in HLT with a PPR higher than the existing interrupts. + * + * The priority classes of pending interrupts is cached as a bitfield + * in the higher order bits of the 'pending' field of pir_desc. The + * Intel manual states those bits are reserved for software and we are + * free to use them. + * + * Those priority bits will be left unchanged, becoming effectively + * stale, when the CPU delivers the posted interrupts to the guest and + * clears the 'pending' bit. This is acceptable since they are only + * used to elide interrupt-is-ready wake-ups when the 'pending' bit is + * not making a 0->1 transition _and_ the vCPU priority is elevated. + * + * When vmx_inject_pir() is called to inject any interrupts which were + * posted while the CPU was outside VMX context, it will clear the + * priority bitfield as part of querying the 'pending' field. + */ + old = atomic_load_acq_long(&pir_desc->pending); + if (atomic_cmpset_long(&pir_desc->pending, old, old|prio_mask) != 0) { + /* + * If there was no race in updating the pending field + * (including the priority bitfield), then a notification is + * only needed if the incoming priority class is higher than + * any existing ones. + * + * This will also cover the case where the 'pending' bit has + * been cleared by the CPU as it delivered interrupts posted in + * the structure. + */ + notify = ((old & PIR_MASK_PENDING) == 0 || prio_mask > old); + } else { + /* + * In the case of racing updates to the pending field, the + * priority and pending bit are atomically set and the + * notification is unconditionally requested. + */ + atomic_set_long(&pir_desc->pending, prio_mask); + notify = 1; + } VMX_CTR_PIR(vlapic->vm, vlapic->vcpuid, pir_desc, notify, vector, level, "vmx_set_intr_ready"); @@ -3362,7 +3419,7 @@ vmx_pending_intr(struct vlapic *vlapic, int *vecptr) pir_desc = vlapic_vtx->pir_desc; pending = atomic_load_acq_long(&pir_desc->pending); - if (!pending) { + if ((pending & PIR_MASK_PENDING) == 0) { /* * While a virtual interrupt may have already been * processed the actual delivery maybe pending the @@ -3503,13 +3560,15 @@ vmx_inject_pir(struct vlapic *vlapic) struct vlapic_vtx *vlapic_vtx; struct pir_desc *pir_desc; struct LAPIC *lapic; - uint64_t val, pirval; + uint64_t val, pirval, pending; int rvi, pirbase = -1; uint16_t intr_status_old, intr_status_new; vlapic_vtx = (struct vlapic_vtx *)vlapic; pir_desc = vlapic_vtx->pir_desc; - if (atomic_cmpset_long(&pir_desc->pending, 1, 0) == 0) { + + pending = atomic_swap_long(&pir_desc->pending, 0); + if ((pending & PIR_MASK_PENDING) == 0) { VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: " "no posted interrupt pending"); return; |