diff options
author | Felix Geyer <debfx-pkg@fobos.de> | 2011-01-02 10:58:34 +0100 |
---|---|---|
committer | Felix Geyer <debfx-pkg@fobos.de> | 2011-01-02 10:58:34 +0100 |
commit | 361589e3ae692db27161410309b92c6b89e1ec6c (patch) | |
tree | f58702fae95e097c1a03bd37c1df44963a5b316f /src/VBox/VMM/VMMR0 | |
parent | d4835ef8bf2b0196ae1887c04e3d57cce840904c (diff) | |
download | virtualbox-361589e3ae692db27161410309b92c6b89e1ec6c.tar.gz |
Imported Upstream version 4.0.0-dfsgupstream/4.0.0-dfsg
Diffstat (limited to 'src/VBox/VMM/VMMR0')
26 files changed, 1381 insertions, 2873 deletions
diff --git a/src/VBox/VMM/VMMR0/CPUMR0.cpp b/src/VBox/VMM/VMMR0/CPUMR0.cpp index 2956fb14a..004e00c50 100644 --- a/src/VBox/VMM/VMMR0/CPUMR0.cpp +++ b/src/VBox/VMM/VMMR0/CPUMR0.cpp @@ -1,4 +1,4 @@ -/* $Id: CPUMR0.cpp $ */ +/* $Id: CPUMR0.cpp 33938 2010-11-10 15:50:41Z vboxsync $ */ /** @file * CPUM - Host Context Ring 0. */ @@ -30,7 +30,6 @@ #include <iprt/assert.h> #include <iprt/asm-amd64-x86.h> #ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI -# include <iprt/cpuset.h> # include <iprt/mem.h> # include <iprt/memobj.h> # include <VBox/apic.h> diff --git a/src/VBox/VMM/VMMR0/CPUMR0A.asm b/src/VBox/VMM/VMMR0/CPUMR0A.asm index 46b515759..d6e5be385 100644 --- a/src/VBox/VMM/VMMR0/CPUMR0A.asm +++ b/src/VBox/VMM/VMMR0/CPUMR0A.asm @@ -1,4 +1,4 @@ -; $Id: CPUMR0A.asm $ +; $Id: CPUMR0A.asm 28800 2010-04-27 08:22:32Z vboxsync $ ;; @file ; CPUM - Guest Context Assembly Routines. ; diff --git a/src/VBox/VMM/VMMR0/CPUMR0UnusedA.asm b/src/VBox/VMM/VMMR0/CPUMR0UnusedA.asm index afbd75f39..fe39bf901 100644 --- a/src/VBox/VMM/VMMR0/CPUMR0UnusedA.asm +++ b/src/VBox/VMM/VMMR0/CPUMR0UnusedA.asm @@ -1,4 +1,4 @@ -; $Id: CPUMR0UnusedA.asm $ +; $Id: CPUMR0UnusedA.asm 28800 2010-04-27 08:22:32Z vboxsync $ ;; @file ; CPUM - Guest Context Assembly Routines. ; diff --git a/src/VBox/VMM/VMMR0/GMMR0.cpp b/src/VBox/VMM/VMMR0/GMMR0.cpp index 9c7930afe..8ff9c09e2 100644 --- a/src/VBox/VMM/VMMR0/GMMR0.cpp +++ b/src/VBox/VMM/VMMR0/GMMR0.cpp @@ -1,4 +1,4 @@ -/* $Id: GMMR0.cpp $ */ +/* $Id: GMMR0.cpp 33540 2010-10-28 09:27:05Z vboxsync $ */ /** @file * GMM - Global Memory Manager. */ @@ -30,7 +30,7 @@ * The allocation chunks has fixed sized, the size defined at compile time * by the #GMM_CHUNK_SIZE \#define. * - * Each chunk is given an unquie ID. Each page also has a unique ID. The + * Each chunk is given an unique ID. Each page also has a unique ID. The * relation ship between the two IDs is: * @code * GMM_CHUNK_SHIFT = log2(GMM_CHUNK_SIZE / PAGE_SIZE); @@ -93,7 +93,7 @@ * * The per page cost in kernel space is 32-bit plus whatever RTR0MEMOBJ * entails. In addition there is the chunk cost of approximately - * (sizeof(RT0MEMOBJ) + sizof(CHUNK)) / 2^CHUNK_SHIFT bytes per page. + * (sizeof(RT0MEMOBJ) + sizeof(CHUNK)) / 2^CHUNK_SHIFT bytes per page. * * On Windows the per page #RTR0MEMOBJ cost is 32-bit on 32-bit windows * and 64-bit on 64-bit windows (a PFN_NUMBER in the MDL). So, 64-bit per page. @@ -111,7 +111,7 @@ * @subsection sub_gmm_locking Serializing * * One simple fast mutex will be employed in the initial implementation, not - * two as metioned in @ref subsec_pgmPhys_Serializing. + * two as mentioned in @ref subsec_pgmPhys_Serializing. * * @see @ref subsec_pgmPhys_Serializing * @@ -292,7 +292,7 @@ typedef GMMPAGE *PGMMPAGE; * @{ */ /** A private page. */ #define GMM_PAGE_STATE_PRIVATE 0 -/** A private page - alternative value used on the 32-bit implemenation. +/** A private page - alternative value used on the 32-bit implementation. * This will never be used on 64-bit hosts. */ #define GMM_PAGE_STATE_PRIVATE_32 1 /** A shared page. */ @@ -488,7 +488,7 @@ typedef struct GMM GMMCHUNKFREESET Shared; /** Shared module tree (global). */ - /** @todo seperate trees for distinctly different guest OSes. */ + /** @todo separate trees for distinctly different guest OSes. */ PAVLGCPTRNODECORE pGlobalSharedModuleTree; /** The maximum number of pages we're allowed to allocate. @@ -1114,7 +1114,7 @@ static DECLCALLBACK(int) gmmR0CleanupVMScanChunk(PAVLU32NODECORE pNode, void *pv * @param idCpu VCPU id * @param cBasePages The number of pages that may be allocated for the base RAM and ROMs. * This does not include MMIO2 and similar. - * @param cShadowPages The number of pages that may be allocated for shadow pageing structures. + * @param cShadowPages The number of pages that may be allocated for shadow paging structures. * @param cFixedPages The number of pages that may be allocated for fixed objects like the * hyper heap, MMIO2 and similar. * @param enmPolicy The OC policy to use on this VM. @@ -1153,7 +1153,7 @@ GMMR0DECL(int) GMMR0InitialReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePag && !pGVM->gmm.s.Reserved.cShadowPages) { /* - * Check if we can accomodate this. + * Check if we can accommodate this. */ /* ... later ... */ if (RT_SUCCESS(rc)) @@ -1215,7 +1215,7 @@ GMMR0DECL(int) GMMR0InitialReservationReq(PVM pVM, VMCPUID idCpu, PGMMINITIALRES * @param idCpu VCPU id * @param cBasePages The number of pages that may be allocated for the base RAM and ROMs. * This does not include MMIO2 and similar. - * @param cShadowPages The number of pages that may be allocated for shadow pageing structures. + * @param cShadowPages The number of pages that may be allocated for shadow paging structures. * @param cFixedPages The number of pages that may be allocated for fixed objects like the * hyper heap, MMIO2 and similar. * @@ -1249,7 +1249,7 @@ GMMR0DECL(int) GMMR0UpdateReservation(PVM pVM, VMCPUID idCpu, uint64_t cBasePage && pGVM->gmm.s.Reserved.cShadowPages) { /* - * Check if we can accomodate this. + * Check if we can accommodate this. */ /* ... later ... */ if (RT_SUCCESS(rc)) @@ -2358,7 +2358,7 @@ GMMR0DECL(int) GMMR0AllocateLargePage(PVM pVM, VMCPUID idCpu, uint32_t cbPage, return VERR_GMM_HIT_VM_ACCOUNT_LIMIT; } - /* Allocate a new continous chunk. */ + /* Allocate a new continuous chunk. */ rc = gmmR0AllocateOneChunk(pGMM, &pGMM->Private, pGVM->hSelf, GMMCHUNKTYPE_CONTINUOUS, &pChunk); if (RT_FAILURE(rc)) { @@ -2927,7 +2927,7 @@ GMMR0DECL(int) GMMR0FreePagesReq(PVM pVM, VMCPUID idCpu, PGMMFREEPAGESREQ pReq) * @returns VBox status code: * @retval VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH * @retval VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH - * @retval VERR_GMM_OVERCOMMITED_TRY_AGAIN_IN_A_BIT - reset condition + * @retval VERR_GMM_OVERCOMMITTED_TRY_AGAIN_IN_A_BIT - reset condition * indicating that we won't necessarily have sufficient RAM to boot * the VM again and that it should pause until this changes (we'll try * balloon some other VM). (For standard deflate we have little choice @@ -2958,7 +2958,7 @@ GMMR0DECL(int) GMMR0BalloonedPages(PVM pVM, VMCPUID idCpu, GMMBALLOONACTION enmA return rc; /* - * Take the sempahore and do some more validations. + * Take the semaphore and do some more validations. */ rc = RTSemFastMutexRequest(pGMM->Mtx); AssertRC(rc); @@ -3137,7 +3137,7 @@ GMMR0DECL(int) GMMR0QueryMemoryStatsReq(PVM pVM, VMCPUID idCpu, PGMMMEMSTATSREQ return rc; /* - * Take the sempahore and do some more validations. + * Take the semaphore and do some more validations. */ rc = RTSemFastMutexRequest(pGMM->Mtx); AssertRC(rc); @@ -3538,7 +3538,7 @@ GMMR0DECL(int) GMMR0RegisterSharedModule(PVM pVM, VMCPUID idCpu, VBOXOSFAMILY en Log(("GMMR0RegisterSharedModule %s %s base %RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule)); /* - * Take the sempahore and do some more validations. + * Take the semaphore and do some more validations. */ rc = RTSemFastMutexRequest(pGMM->Mtx); AssertRC(rc); @@ -3658,7 +3658,7 @@ GMMR0DECL(int) GMMR0RegisterSharedModule(PVM pVM, VMCPUID idCpu, VBOXOSFAMILY en /* Save reference. */ pRecVM->pGlobalModule = pGlobalModule; if ( fNewModule - || pRecVM->fCollision == true) /* colliding module unregistered and new one registerd since the last check */ + || pRecVM->fCollision == true) /* colliding module unregistered and new one registered since the last check */ { pGlobalModule->cUsers++; Log(("GMMR0RegisterSharedModule: using existing module %s cUser=%d!\n", pszModuleName, pGlobalModule->cUsers)); @@ -3738,7 +3738,7 @@ GMMR0DECL(int) GMMR0UnregisterSharedModule(PVM pVM, VMCPUID idCpu, char *pszModu Log(("GMMR0UnregisterSharedModule %s %s base=%RGv size %x\n", pszModuleName, pszVersion, GCBaseAddr, cbModule)); /* - * Take the sempahore and do some more validations. + * Take the semaphore and do some more validations. */ rc = RTSemFastMutexRequest(pGMM->Mtx); AssertRC(rc); @@ -3764,7 +3764,7 @@ GMMR0DECL(int) GMMR0UnregisterSharedModule(PVM pVM, VMCPUID idCpu, char *pszModu if (pRec->aRegions[i].paHCPhysPageID) RTMemFree(pRec->aRegions[i].paHCPhysPageID); - Assert(pRec->Core.Key == GCBaseAddr || pRec->enmGuestOS == VBOXOSFAMILY_Windows64); + Assert(pRec->Core.Key == GCBaseAddr || pRec->enmGuestOS == VBOXOSFAMILY_Windows64); Assert(pRec->cRegions == pRecVM->cRegions); #ifdef VBOX_STRICT for (unsigned i = 0; i < pRecVM->cRegions; i++) @@ -4055,7 +4055,7 @@ GMMR0DECL(int) GMMR0ResetSharedModules(PVM pVM, VMCPUID idCpu) return rc; /* - * Take the sempahore and do some more validations. + * Take the semaphore and do some more validations. */ rc = RTSemFastMutexRequest(pGMM->Mtx); AssertRC(rc); @@ -4122,7 +4122,7 @@ GMMR0DECL(int) GMMR0CheckSharedModulesStart(PVM pVM) GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR); /* - * Take the sempahore and do some more validations. + * Take the semaphore and do some more validations. */ int rc = RTSemFastMutexRequest(pGMM->Mtx); AssertRC(rc); @@ -4175,7 +4175,7 @@ GMMR0DECL(int) GMMR0CheckSharedModules(PVM pVM, PVMCPU pVCpu) # ifndef DEBUG_sandervl /* - * Take the sempahore and do some more validations. + * Take the semaphore and do some more validations. */ rc = RTSemFastMutexRequest(pGMM->Mtx); AssertRC(rc); @@ -4286,7 +4286,7 @@ GMMR0DECL(int) GMMR0FindDuplicatePageReq(PVM pVM, PGMMFINDDUPLICATEPAGEREQ pReq) GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR); /* - * Take the sempahore and do some more validations. + * Take the semaphore and do some more validations. */ int rc = RTSemFastMutexRequest(pGMM->Mtx); AssertRC(rc); diff --git a/src/VBox/VMM/VMMR0/GMMR0Internal.h b/src/VBox/VMM/VMMR0/GMMR0Internal.h index cbc0c0921..8f9451079 100644 --- a/src/VBox/VMM/VMMR0/GMMR0Internal.h +++ b/src/VBox/VMM/VMMR0/GMMR0Internal.h @@ -1,4 +1,4 @@ -/* $Id: GMMR0Internal.h $ */ +/* $Id: GMMR0Internal.h 33540 2010-10-28 09:27:05Z vboxsync $ */ /** @file * GMM - The Global Memory Manager, Internal Header. */ @@ -29,7 +29,7 @@ typedef struct GMMVMSIZES /** The number of pages of base memory. * This is the sum of RAM, ROMs and handy pages. */ uint64_t cBasePages; - /** The number of pages for the shadow pool. (Can be sequeezed for memory.) */ + /** The number of pages for the shadow pool. (Can be squeezed for memory.) */ uint32_t cShadowPages; /** The number of pages for fixed allocations like MMIO2 and the hyper heap. */ uint32_t cFixedPages; @@ -78,10 +78,10 @@ typedef struct GMMPERVM uint64_t cPrivatePages; /** The current number of shared pages. */ uint64_t cSharedPages; - /** The current over-comitment policy. */ + /** The current over-commitment policy. */ GMMOCPOLICY enmPolicy; /** The VM priority for arbitrating VMs in low and out of memory situation. - * Like which VMs to start sequeezing first. */ + * Like which VMs to start squeezing first. */ GMMPRIORITY enmPriority; /** The current number of ballooned pages. */ diff --git a/src/VBox/VMM/VMMR0/GVMMR0.cpp b/src/VBox/VMM/VMMR0/GVMMR0.cpp index 74520a957..4dbc7e3bf 100644 --- a/src/VBox/VMM/VMMR0/GVMMR0.cpp +++ b/src/VBox/VMM/VMMR0/GVMMR0.cpp @@ -1,10 +1,10 @@ -/* $Id: GVMMR0.cpp $ */ +/* $Id: GVMMR0.cpp 33540 2010-10-28 09:27:05Z vboxsync $ */ /** @file * GVMM - Global VM Manager. */ /* - * Copyright (C) 2007 Oracle Corporation + * Copyright (C) 2007-2010 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; @@ -18,15 +18,31 @@ /** @page pg_gvmm GVMM - The Global VM Manager * - * The Global VM Manager lives in ring-0. It's main function at the moment - * is to manage a list of all running VMs, keep a ring-0 only structure (GVM) - * for each of them, and assign them unique identifiers (so GMM can track - * page owners). The idea for the future is to add an idle priority kernel - * thread that can take care of tasks like page sharing. + * The Global VM Manager lives in ring-0. Its main function at the moment is + * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for + * each of them, and assign them unique identifiers (so GMM can track page + * owners). The GVMM also manage some of the host CPU resources, like the the + * periodic preemption timer. * - * The GVMM will create a ring-0 object for each VM when it's registered, - * this is both for session cleanup purposes and for having a point where - * it's possible to implement usage polices later (in SUPR0ObjRegister). + * The GVMM will create a ring-0 object for each VM when it is registered, this + * is both for session cleanup purposes and for having a point where it is + * possible to implement usage polices later (in SUPR0ObjRegister). + * + * + * @section sec_gvmm_ppt Periodic Preemption Timer (PPT) + * + * On system that sports a high resolution kernel timer API, we use per-cpu + * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest + * execution. The timer frequency is calculating by taking the max + * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms + * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) * + * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS). + * + * The TMCalcHostTimerFrequency() part of the things gets its takes the max + * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent, + * warp drive percent and some fudge factors. VMMR0.cpp reports the result via + * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x, + * AMD-V and raw-mode execution environments. */ @@ -56,6 +72,18 @@ #include <iprt/mem.h> #include <iprt/memobj.h> #include <iprt/mp.h> +#include <iprt/cpuset.h> +#include <iprt/spinlock.h> +#include <iprt/timer.h> + + +/******************************************************************************* +* Defined Constants And Macros * +*******************************************************************************/ +#if defined(RT_OS_LINUX) || defined(DOXYGEN_RUNNING) +/** Define this to enable the periodic preemption timer. */ +# define GVMM_SCHED_WITH_PPT +#endif /******************************************************************************* @@ -98,6 +126,68 @@ typedef GVMHANDLE *PGVMHANDLE; #endif /** + * Per host CPU GVMM data. + */ +typedef struct GVMMHOSTCPU +{ + /** Magic number (GVMMHOSTCPU_MAGIC). */ + uint32_t volatile u32Magic; + /** The CPU ID. */ + RTCPUID idCpu; + /** The CPU set index. */ + uint32_t idxCpuSet; + +#ifdef GVMM_SCHED_WITH_PPT + /** Periodic preemption timer data. */ + struct + { + /** The handle to the periodic preemption timer. */ + PRTTIMER pTimer; + /** Spinlock protecting the data below. */ + RTSPINLOCK hSpinlock; + /** The smalles Hz that we need to care about. (static) */ + uint32_t uMinHz; + /** The number of ticks between each historization. */ + uint32_t cTicksHistoriziationInterval; + /** The current historization tick (counting up to + * cTicksHistoriziationInterval and then resetting). */ + uint32_t iTickHistorization; + /** The current timer interval. This is set to 0 when inactive. */ + uint32_t cNsInterval; + /** The current timer frequency. This is set to 0 when inactive. */ + uint32_t uTimerHz; + /** The current max frequency reported by the EMTs. + * This gets historicize and reset by the timer callback. This is + * read without holding the spinlock, so needs atomic updating. */ + uint32_t volatile uDesiredHz; + /** Whether the timer was started or not. */ + bool volatile fStarted; + /** Set if we're starting timer. */ + bool volatile fStarting; + /** The index of the next history entry (mod it). */ + uint32_t iHzHistory; + /** Historicized uDesiredHz values. The array wraps around, new entries + * are added at iHzHistory. This is updated approximately every + * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */ + uint32_t aHzHistory[8]; + /** Statistics counter for recording the number of interval changes. */ + uint32_t cChanges; + /** Statistics counter for recording the number of timer starts. */ + uint32_t cStarts; + } Ppt; +#endif /* GVMM_SCHED_WITH_PPT */ + +} GVMMHOSTCPU; +/** Pointer to the per host CPU GVMM data. */ +typedef GVMMHOSTCPU *PGVMMHOSTCPU; +/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */ +#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011) +/** The interval on history entry should cover (approximately) give in + * nanoseconds. */ +#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000) + + +/** * The GVMM instance data. */ typedef struct GVMM @@ -110,10 +200,18 @@ typedef struct GVMM uint16_t volatile iUsedHead; /** The number of VMs. */ uint16_t volatile cVMs; -// /** The number of halted EMT threads. */ -// uint16_t volatile cHaltedEMTs; + /** Alignment padding. */ + uint16_t u16Reserved; /** The number of EMTs. */ uint32_t volatile cEMTs; + /** The number of EMTs that have halted in GVMMR0SchedHalt. */ + uint32_t volatile cHaltedEMTs; + /** Alignment padding. */ + uint32_t u32Alignment; + /** When the next halted or sleeping EMT will wake up. + * This is set to 0 when it needs recalculating and to UINT64_MAX when + * there are no halted or sleeping EMTs in the GVMM. */ + uint64_t uNsNextEmtWakeup; /** The lock used to serialize VM creation, destruction and associated events that * isn't performance critical. Owners may acquire the list lock. */ RTSEMFASTMUTEX CreateDestroyLock; @@ -147,6 +245,11 @@ typedef struct GVMM * The limit for the second round of early wakeups, given in nano seconds. */ uint32_t nsEarlyWakeUp2; + + /** The number of entries in the host CPU array (aHostCpus). */ + uint32_t cHostCpus; + /** Per host CPU data (variable length). */ + GVMMHOSTCPU aHostCpus[1]; } GVMM; /** Pointer to the GVMM instance data. */ typedef GVMM *PGVMM; @@ -197,12 +300,15 @@ static void gvmmR0InitPerVMData(PGVM pGVM); static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle); static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock); static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM); +#ifdef GVMM_SCHED_WITH_PPT +static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick); +#endif /** * Initializes the GVMM. * - * This is called while owninng the loader sempahore (see supdrvIOCtl_LdrLoad()). + * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()). * * @returns VBox status code. */ @@ -213,7 +319,10 @@ GVMMR0DECL(int) GVMMR0Init(void) /* * Allocate and initialize the instance data. */ - PGVMM pGVMM = (PGVMM)RTMemAllocZ(sizeof(*pGVMM)); + uint32_t cHostCpus = RTMpGetArraySize(); + AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_INTERNAL_ERROR_2); + + PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF(GVMM, aHostCpus[cHostCpus])); if (!pGVMM) return VERR_NO_MEMORY; int rc = RTSemFastMutexCreate(&pGVMM->CreateDestroyLock); @@ -243,18 +352,98 @@ GVMMR0DECL(int) GVMMR0Init(void) } /* The default configuration values. */ - pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */ - pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */ - pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */; - pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */; - pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */; - - g_pGVMM = pGVMM; - LogFlow(("GVMMR0Init: pGVMM=%p\n", pGVMM)); - return VINF_SUCCESS; - } + uint32_t cNsResolution = RTSemEventMultiGetResolution(); + pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */ + if (cNsResolution >= 5*RT_NS_100US) + { + pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */ + pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */; + pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */; + pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */; + } + else if (cNsResolution > RT_NS_100US) + { + pGVMM->nsMinSleepAlone = cNsResolution / 2; + pGVMM->nsMinSleepCompany = cNsResolution / 4; + pGVMM->nsEarlyWakeUp1 = 0; + pGVMM->nsEarlyWakeUp2 = 0; + } + else + { + pGVMM->nsMinSleepAlone = 2000; + pGVMM->nsMinSleepCompany = 2000; + pGVMM->nsEarlyWakeUp1 = 0; + pGVMM->nsEarlyWakeUp2 = 0; + } + + /* The host CPU data. */ + pGVMM->cHostCpus = cHostCpus; + uint32_t iCpu = cHostCpus; + RTCPUSET PossibleSet; + RTMpGetSet(&PossibleSet); + while (iCpu-- > 0) + { + pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu; +#ifdef GVMM_SCHED_WITH_PPT + pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL; + pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK; + pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */ + pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1; + //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0; + //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0; + //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0; + //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0; + //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false; + //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false; + //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0; + //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0}; +#endif + + if (RTCpuSetIsMember(&PossibleSet, iCpu)) + { + pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu); + pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC; + +#ifdef GVMM_SCHED_WITH_PPT + rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer, + 50*1000*1000 /* whatever */, + RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES, + gvmmR0SchedPeriodicPreemptionTimerCallback, + &pGVMM->aHostCpus[iCpu]); + if (RT_SUCCESS(rc)) + rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock); + if (RT_FAILURE(rc)) + { + while (iCpu < cHostCpus) + { + RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer); + RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock); + pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK; + iCpu++; + } + break; + } +#endif + } + else + { + pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID; + pGVMM->aHostCpus[iCpu].u32Magic = 0; + } + } + if (RT_SUCCESS(rc)) + { + g_pGVMM = pGVMM; + LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus)); + return VINF_SUCCESS; + } + /* bail out. */ + RTSemFastMutexDestroy(pGVMM->UsedLock); + pGVMM->UsedLock = NIL_RTSEMFASTMUTEX; + } RTSemFastMutexDestroy(pGVMM->CreateDestroyLock); + pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX; } RTMemFree(pGVMM); @@ -281,8 +470,27 @@ GVMMR0DECL(void) GVMMR0Term(void) return; } - pGVMM->u32Magic++; + /* + * First of all, stop all active timers. + */ + uint32_t cActiveTimers = 0; + uint32_t iCpu = pGVMM->cHostCpus; + while (iCpu-- > 0) + { + ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC); +#ifdef GVMM_SCHED_WITH_PPT + if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL + && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer))) + cActiveTimers++; +#endif + } + if (cActiveTimers) + RTThreadSleep(1); /* fudge */ + /* + * Invalidate the and free resources. + */ + pGVMM->u32Magic = ~GVMM_MAGIC; RTSemFastMutexDestroy(pGVMM->UsedLock); pGVMM->UsedLock = NIL_RTSEMFASTMUTEX; RTSemFastMutexDestroy(pGVMM->CreateDestroyLock); @@ -295,6 +503,17 @@ GVMMR0DECL(void) GVMMR0Term(void) pGVMM->iUsedHead = 0; } +#ifdef GVMM_SCHED_WITH_PPT + iCpu = pGVMM->cHostCpus; + while (iCpu-- > 0) + { + RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer); + pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL; + RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock); + pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK; + } +#endif + RTMemFree(pGVMM); } @@ -334,28 +553,28 @@ GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, ui } else if (!strcmp(pszName, "MinSleepAlone")) { - if (u64Value <= 100000000) + if (u64Value <= RT_NS_100MS) pGVMM->nsMinSleepAlone = u64Value; else rc = VERR_OUT_OF_RANGE; } else if (!strcmp(pszName, "MinSleepCompany")) { - if (u64Value <= 100000000) + if (u64Value <= RT_NS_100MS) pGVMM->nsMinSleepCompany = u64Value; else rc = VERR_OUT_OF_RANGE; } else if (!strcmp(pszName, "EarlyWakeUp1")) { - if (u64Value <= 100000000) + if (u64Value <= RT_NS_100MS) pGVMM->nsEarlyWakeUp1 = u64Value; else rc = VERR_OUT_OF_RANGE; } else if (!strcmp(pszName, "EarlyWakeUp2")) { - if (u64Value <= 100000000) + if (u64Value <= RT_NS_100MS) pGVMM->nsEarlyWakeUp2 = u64Value; else rc = VERR_OUT_OF_RANGE; @@ -606,13 +825,14 @@ GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppV { PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM); memset(pVM, 0, cPages << PAGE_SHIFT); - pVM->enmVMState = VMSTATE_CREATING; - pVM->pVMR0 = pVM; - pVM->pSession = pSession; - pVM->hSelf = iHandle; - pVM->cbSelf = cbVM; - pVM->cCpus = cCpus; - pVM->offVMCPU = RT_UOFFSETOF(VM, aCpus); + pVM->enmVMState = VMSTATE_CREATING; + pVM->pVMR0 = pVM; + pVM->pSession = pSession; + pVM->hSelf = iHandle; + pVM->cbSelf = cbVM; + pVM->cCpus = cCpus; + pVM->uCpuExecutionCap = 100; /* default is no cap. */ + pVM->offVMCPU = RT_UOFFSETOF(VM, aCpus); rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */); if (RT_SUCCESS(rc)) @@ -638,9 +858,10 @@ GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppV /* Initialize all the VM pointers. */ for (uint32_t i = 0; i < cCpus; i++) { - pVM->aCpus[i].pVMR0 = pVM; - pVM->aCpus[i].pVMR3 = pVM->pVMR3; - pVM->aCpus[i].idHostCpu = NIL_RTCPUID; + pVM->aCpus[i].pVMR0 = pVM; + pVM->aCpus[i].pVMR3 = pVM->pVMR3; + pVM->aCpus[i].idHostCpu = NIL_RTCPUID; + pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD; } rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1, 0, @@ -654,12 +875,13 @@ GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppV rc = gvmmR0UsedLock(pGVMM); AssertRC(rc); - pHandle->pVM = pVM; - pHandle->pGVM = pGVM; - pHandle->hEMT0 = hEMT0; - pHandle->ProcId = ProcId; - pGVM->pVM = pVM; - pGVM->aCpus[0].hEMT = hEMT0; + pHandle->pVM = pVM; + pHandle->pGVM = pGVM; + pHandle->hEMT0 = hEMT0; + pHandle->ProcId = ProcId; + pGVM->pVM = pVM; + pGVM->aCpus[0].hEMT = hEMT0; + pVM->aCpus[0].hNativeThreadR0 = hEMT0; pGVMM->cEMTs += cCpus; gvmmR0UsedUnlock(pGVMM); @@ -954,7 +1176,7 @@ static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, v AssertRC(rc); /* - * This is a tad slow but a doubly linked list is too much hazzle. + * This is a tad slow but a doubly linked list is too much hassle. */ if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles))) { @@ -1067,12 +1289,12 @@ static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, v */ pHandle->iNext = pGVMM->iFreeHead; pGVMM->iFreeHead = iHandle; - ASMAtomicXchgPtr((void * volatile *)&pHandle->pGVM, NULL); - ASMAtomicXchgPtr((void * volatile *)&pHandle->pVM, NULL); - ASMAtomicXchgPtr((void * volatile *)&pHandle->pvObj, NULL); - ASMAtomicXchgPtr((void * volatile *)&pHandle->pSession, NULL); - ASMAtomicXchgSize(&pHandle->hEMT0, NIL_RTNATIVETHREAD); - ASMAtomicXchgSize(&pHandle->ProcId, NIL_RTPROCESS); + ASMAtomicWriteNullPtr(&pHandle->pGVM); + ASMAtomicWriteNullPtr(&pHandle->pVM); + ASMAtomicWriteNullPtr(&pHandle->pvObj); + ASMAtomicWriteNullPtr(&pHandle->pSession); + ASMAtomicWriteSize(&pHandle->hEMT0, NIL_RTNATIVETHREAD); + ASMAtomicWriteSize(&pHandle->ProcId, NIL_RTPROCESS); gvmmR0UsedUnlock(pGVMM); gvmmR0CreateDestroyUnlock(pGVMM); @@ -1102,10 +1324,13 @@ GVMMR0DECL(int) GVMMR0RegisterVCpu(PVM pVM, VMCPUID idCpu) if (RT_FAILURE(rc)) return rc; - AssertReturn(idCpu < pVM->cCpus, VERR_INVALID_CPU_ID); + AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); AssertReturn(pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD, VERR_ACCESS_DENIED); + Assert(pGVM->cCpus == pVM->cCpus); + Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD); + + pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf(); - pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf(); return VINF_SUCCESS; } @@ -1155,7 +1380,7 @@ GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM) * Be very careful if not taking the lock as it's possible that * the VM will disappear then. * - * @remark This will not assert on an invalid pVM but try return sliently. + * @remark This will not assert on an invalid pVM but try return silently. */ static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock) { @@ -1383,16 +1608,34 @@ GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT) */ static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now) { + /* + * Skip this if we've got disabled because of high resolution wakeups or by + * the user. + */ + if ( !pGVMM->nsEarlyWakeUp1 + && !pGVMM->nsEarlyWakeUp2) + return 0; + /** @todo Rewrite this algorithm. See performance defect XYZ. */ /* + * A cheap optimization to stop wasting so much time here on big setups. + */ + const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2; + if ( pGVMM->cHaltedEMTs == 0 + || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup) + return 0; + + /* * The first pass will wake up VMs which have actually expired * and look for VMs that should be woken up in the 2nd and 3rd passes. */ - unsigned cWoken = 0; - unsigned cHalted = 0; - unsigned cTodo2nd = 0; - unsigned cTodo3rd = 0; + const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1; + uint64_t u64Min = UINT64_MAX; + unsigned cWoken = 0; + unsigned cHalted = 0; + unsigned cTodo2nd = 0; + unsigned cTodo3rd = 0; for (unsigned i = pGVMM->iUsedHead, cGuard = 0; i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles); i = pGVMM->aHandles[i].iNext) @@ -1403,9 +1646,8 @@ static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now) { for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++) { - PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu]; - - uint64_t u64 = pCurGVCpu->gvmm.s.u64HaltExpire; + PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu]; + uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire); if (u64) { if (u64 <= u64Now) @@ -1420,10 +1662,12 @@ static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now) else { cHalted++; - if (u64 <= u64Now + pGVMM->nsEarlyWakeUp1) + if (u64 <= uNsEarlyWakeUp1) cTodo2nd++; - else if (u64 <= u64Now + pGVMM->nsEarlyWakeUp2) + else if (u64 <= uNsEarlyWakeUp2) cTodo3rd++; + else if (u64 < u64Min) + u64 = u64Min; } } } @@ -1443,10 +1687,10 @@ static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now) { for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++) { - PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu]; - - if ( pCurGVCpu->gvmm.s.u64HaltExpire - && pCurGVCpu->gvmm.s.u64HaltExpire <= u64Now + pGVMM->nsEarlyWakeUp1) + PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu]; + uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire); + if ( u64 + && u64 <= uNsEarlyWakeUp1) { if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0)) { @@ -1473,10 +1717,10 @@ static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now) { for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++) { - PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu]; - - if ( pCurGVCpu->gvmm.s.u64HaltExpire - && pCurGVCpu->gvmm.s.u64HaltExpire <= u64Now + pGVMM->nsEarlyWakeUp2) + PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu]; + uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire); + if ( u64 + && u64 <= uNsEarlyWakeUp2) { if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0)) { @@ -1491,6 +1735,11 @@ static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now) } } + /* + * Set the minimum value. + */ + pGVMM->uNsNextEmtWakeup = u64Min; + return cWoken; } @@ -1532,28 +1781,44 @@ GVMMR0DECL(int) GVMMR0SchedHalt(PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTim pCurGVCpu->gvmm.s.iCpuEmt = ASMGetApicId(); + /* GIP hack: We might are frequently sleeping for short intervals where the + difference between GIP and system time matters on systems with high resolution + system time. So, convert the input from GIP to System time in that case. */ Assert(ASMGetFlags() & X86_EFL_IF); - const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */ - pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now); + const uint64_t u64NowSys = RTTimeSystemNanoTS(); + const uint64_t u64NowGip = RTTimeNanoTS(); + pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip); /* * Go to sleep if we must... + * Cap the sleep time to 1 second to be on the safe side. */ - if ( u64Now < u64ExpireGipTime - && u64ExpireGipTime - u64Now > (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany - ? pGVMM->nsMinSleepCompany - : pGVMM->nsMinSleepAlone)) + uint64_t cNsInterval = u64ExpireGipTime - u64NowGip; + if ( u64NowGip < u64ExpireGipTime + && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany + ? pGVMM->nsMinSleepCompany + : pGVMM->nsMinSleepAlone)) { pGVM->gvmm.s.StatsSched.cHaltBlocking++; - ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime); + if (cNsInterval > RT_NS_1SEC) + u64ExpireGipTime = u64NowGip + RT_NS_1SEC; + if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup) + pGVMM->uNsNextEmtWakeup = u64ExpireGipTime; + ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime); + ASMAtomicIncU32(&pGVMM->cHaltedEMTs); gvmmR0UsedUnlock(pGVMM); - uint32_t cMillies = (u64ExpireGipTime - u64Now) / 1000000; - /* Cap the timeout to one second. */ - cMillies = RT_MIN(1000, cMillies); - rc = RTSemEventMultiWaitNoResume(pCurGVCpu->gvmm.s.HaltEventMulti, cMillies ? cMillies : 1); - ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0); - if (rc == VERR_TIMEOUT) + rc = RTSemEventMultiWaitEx(pCurGVCpu->gvmm.s.HaltEventMulti, + RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE, + u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval); + + ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0); + ASMAtomicDecU32(&pGVMM->cHaltedEMTs); + + /* Reset the semaphore to try prevent a few false wake-ups. */ + if (rc == VINF_SUCCESS) + RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti); + else if (rc == VERR_TIMEOUT) { pGVM->gvmm.s.StatsSched.cHaltTimeouts++; rc = VINF_SUCCESS; @@ -1563,11 +1828,9 @@ GVMMR0DECL(int) GVMMR0SchedHalt(PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTim { pGVM->gvmm.s.StatsSched.cHaltNotBlocking++; gvmmR0UsedUnlock(pGVMM); + RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti); } - /* Make sure false wake up calls (gvmmR0SchedDoWakeUps) cause us to spin. */ - RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti); - return rc; } @@ -1599,7 +1862,7 @@ DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu) if (pGVCpu->gvmm.s.u64HaltExpire) { rc = VINF_SUCCESS; - ASMAtomicXchgU64(&pGVCpu->gvmm.s.u64HaltExpire, 0); + ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0); } else { @@ -1881,6 +2144,170 @@ GVMMR0DECL(int) GVMMR0SchedPoll(PVM pVM, VMCPUID idCpu, bool fYield) } +#ifdef GVMM_SCHED_WITH_PPT +/** + * Timer callback for the periodic preemption timer. + * + * @param pTimer The timer handle. + * @param pvUser Pointer to the per cpu structure. + * @param iTick The current tick. + */ +static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick) +{ + PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser; + + /* + * Termination check + */ + if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC) + return; + + /* + * Do the house keeping. + */ + RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER; + RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp); + + if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval) + { + /* + * Historicize the max frequency. + */ + uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory); + pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz; + pCpu->Ppt.iTickHistorization = 0; + pCpu->Ppt.uDesiredHz = 0; + + /* + * Check if the current timer frequency. + */ + uint32_t uHistMaxHz = 0; + for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++) + if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz) + uHistMaxHz = pCpu->Ppt.aHzHistory[i]; + if (uHistMaxHz == pCpu->Ppt.uTimerHz) + RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp); + else if (uHistMaxHz) + { + /* + * Reprogram it. + */ + pCpu->Ppt.cChanges++; + pCpu->Ppt.iTickHistorization = 0; + pCpu->Ppt.uTimerHz = uHistMaxHz; + uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz; + pCpu->Ppt.cNsInterval = cNsInterval; + if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS) + pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS + + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1) + / cNsInterval; + else + pCpu->Ppt.cTicksHistoriziationInterval = 1; + RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp); + + /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/ + RTTimerChangeInterval(pTimer, cNsInterval); + } + else + { + /* + * Stop it. + */ + pCpu->Ppt.fStarted = false; + pCpu->Ppt.uTimerHz = 0; + pCpu->Ppt.cNsInterval = 0; + RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp); + + /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/ + RTTimerStop(pTimer); + } + } + else + RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp); +} +#endif /* GVMM_SCHED_WITH_PPT */ + + +/** + * Updates the periodic preemption timer for the calling CPU. + * + * The caller must have disabled preemption! + * The caller must check that the host can do high resolution timers. + * + * @param pVM The VM handle. + * @param idHostCpu The current host CPU id. + * @param uHz The desired frequency. + */ +GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz) +{ +#ifdef GVMM_SCHED_WITH_PPT + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(RTTimerCanDoHighResolution()); + + /* + * Resolve the per CPU data. + */ + uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu); + PGVMM pGVMM = g_pGVMM; + if ( !VALID_PTR(pGVMM) + || pGVMM->u32Magic != GVMM_MAGIC) + return; + AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus)); + PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu]; + AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC + && pCpu->idCpu == idHostCpu, + ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu)); + + /* + * Check whether we need to do anything about the timer. + * We have to be a little bit careful since we might be race the timer + * callback here. + */ + if (uHz > 16384) + uHz = 16384; /** @todo add a query method for this! */ + if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz) + && uHz >= pCpu->Ppt.uMinHz + && !pCpu->Ppt.fStarting /* solaris paranoia */)) + { + RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER; + RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp); + + pCpu->Ppt.uDesiredHz = uHz; + uint32_t cNsInterval = 0; + if (!pCpu->Ppt.fStarted) + { + pCpu->Ppt.cStarts++; + pCpu->Ppt.fStarted = true; + pCpu->Ppt.fStarting = true; + pCpu->Ppt.iTickHistorization = 0; + pCpu->Ppt.uTimerHz = uHz; + pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz; + if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS) + pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS + + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1) + / cNsInterval; + else + pCpu->Ppt.cTicksHistoriziationInterval = 1; + } + + RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp); + + if (cNsInterval) + { + RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval); + int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval); + AssertRC(rc); + + RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp); + if (RT_FAILURE(rc)) + pCpu->Ppt.fStarted = false; + pCpu->Ppt.fStarting = false; + RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp); + } + } +#endif /* GVMM_SCHED_WITH_PPT */ +} + /** * Retrieves the GVMM statistics visible to the caller. @@ -1924,7 +2351,7 @@ GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession } /* - * Enumerate the VMs and add the ones visibile to the statistics. + * Enumerate the VMs and add the ones visible to the statistics. */ pStats->cVMs = 0; pStats->cEMTs = 0; @@ -1963,6 +2390,33 @@ GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession } } + /* + * Copy out the per host CPU statistics. + */ + uint32_t iDstCpu = 0; + uint32_t cSrcCpus = pGVMM->cHostCpus; + for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++) + { + if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID) + { + pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu; + pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet; +#ifdef GVMM_SCHED_WITH_PPT + pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz; + pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz; + pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges; + pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts; +#else + pStats->aHostCpus[iDstCpu].uDesiredHz = 0; + pStats->aHostCpus[iDstCpu].uTimerHz = 0; + pStats->aHostCpus[iDstCpu].cChanges = 0; + pStats->aHostCpus[iDstCpu].cStarts = 0; +#endif + iDstCpu++; + } + } + pStats->cHostCpus = iDstCpu; + gvmmR0UsedUnlock(pGVMM); return VINF_SUCCESS; @@ -2043,7 +2497,7 @@ GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSessio } /* - * Enumerate the VMs and add the ones visibile to the statistics. + * Enumerate the VMs and add the ones visible to the statistics. */ if (ASMMemIsAll8(&pStats->SchedSum, sizeof(pStats->SchedSum), 0)) { diff --git a/src/VBox/VMM/VMMR0/GVMMR0Internal.h b/src/VBox/VMM/VMMR0/GVMMR0Internal.h index 40b1b0003..053257206 100644 --- a/src/VBox/VMM/VMMR0/GVMMR0Internal.h +++ b/src/VBox/VMM/VMMR0/GVMMR0Internal.h @@ -1,4 +1,4 @@ -/* $Id: GVMMR0Internal.h $ */ +/* $Id: GVMMR0Internal.h 28800 2010-04-27 08:22:32Z vboxsync $ */ /** @file * GVMM - The Global VM Manager, Internal header. */ diff --git a/src/VBox/VMM/VMMR0/HWACCMR0.cpp b/src/VBox/VMM/VMMR0/HWACCMR0.cpp index f3fc77952..a38d725e7 100644 --- a/src/VBox/VMM/VMMR0/HWACCMR0.cpp +++ b/src/VBox/VMM/VMMR0/HWACCMR0.cpp @@ -1,4 +1,4 @@ -/* $Id: HWACCMR0.cpp $ */ +/* $Id: HWACCMR0.cpp 34184 2010-11-18 21:19:11Z vboxsync $ */ /** @file * HWACCM - Host Context Ring 0. */ @@ -76,7 +76,7 @@ static struct struct { - /** Set by the ring-0 driver to indicate VMX is supported by the CPU. */ + /** Set to by us to indicate VMX is supported by the CPU. */ bool fSupported; /** Whether we're using SUPR0EnableVTx or not. */ bool fUsingSUPR0EnableVTx; @@ -123,7 +123,7 @@ static struct /** SVM feature bits from cpuid 0x8000000a */ uint32_t u32Features; - /** Set by the ring-0 driver to indicate SVM is supported by the CPU. */ + /** Set by us to indicate SVM is supported by the CPU. */ bool fSupported; } svm; /** Saved error from detection */ @@ -1159,7 +1159,7 @@ VMMR0DECL(int) HWACCMR0Enter(PVM pVM, PVMCPU pVCpu) } #ifdef VBOX_WITH_2X_4GB_ADDR_SPACE - bool fStartedSet = PGMDynMapStartOrMigrateAutoSet(pVCpu); + bool fStartedSet = PGMR0DynMapStartOrMigrateAutoSet(pVCpu); #endif rc = HWACCMR0Globals.pfnEnterSession(pVM, pVCpu, pCpu); @@ -1172,7 +1172,7 @@ VMMR0DECL(int) HWACCMR0Enter(PVM pVM, PVMCPU pVCpu) #ifdef VBOX_WITH_2X_4GB_ADDR_SPACE if (fStartedSet) - PGMDynMapReleaseAutoSet(pVCpu); + PGMRZDynMapReleaseAutoSet(pVCpu); #endif /* keep track of the CPU owning the VMCS for debugging scheduling weirdness and ring-3 calls. */ @@ -1274,7 +1274,7 @@ VMMR0DECL(int) HWACCMR0RunGuestCode(PVM pVM, PVMCPU pVCpu) Assert(ASMAtomicReadBool(&pCpu->fInUse) == true); #ifdef VBOX_WITH_2X_4GB_ADDR_SPACE - PGMDynMapStartAutoSet(pVCpu); + PGMRZDynMapStartAutoSet(pVCpu); #endif pCtx = CPUMQueryGuestCtxPtr(pVCpu); @@ -1282,7 +1282,7 @@ VMMR0DECL(int) HWACCMR0RunGuestCode(PVM pVM, PVMCPU pVCpu) rc = HWACCMR0Globals.pfnRunGuestCode(pVM, pVCpu, pCtx); #ifdef VBOX_WITH_2X_4GB_ADDR_SPACE - PGMDynMapReleaseAutoSet(pVCpu); + PGMRZDynMapReleaseAutoSet(pVCpu); #endif return rc; } @@ -1299,6 +1299,7 @@ VMMR0DECL(int) HWACCMR0RunGuestCode(PVM pVM, PVMCPU pVCpu) */ VMMR0DECL(int) HWACCMR0SaveFPUState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) { + STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFpu64SwitchBack); if (pVM->hwaccm.s.vmx.fSupported) return VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnSaveGuestFPU64, 0, NULL); @@ -1315,6 +1316,7 @@ VMMR0DECL(int) HWACCMR0SaveFPUState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) */ VMMR0DECL(int) HWACCMR0SaveDebugState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) { + STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDebug64SwitchBack); if (pVM->hwaccm.s.vmx.fSupported) return VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnSaveGuestDebug64, 0, NULL); @@ -1432,7 +1434,7 @@ VMMR0DECL(void) HWACCMR0SavePendingIOPortWrite(PVMCPU pVCpu, RTGCPTR GCPtrRip, R */ VMMR0DECL(int) HWACCMR0EnterSwitcher(PVM pVM, bool *pfVTxDisabled) { - Assert(!(ASMGetFlags() & X86_EFL_IF)); + Assert(!(ASMGetFlags() & X86_EFL_IF) || !RTThreadPreemptIsEnabled(NIL_RTTHREAD)); *pfVTxDisabled = false; @@ -1471,7 +1473,7 @@ VMMR0DECL(int) HWACCMR0EnterSwitcher(PVM pVM, bool *pfVTxDisabled) } /** - * Reeable VT-x if was active *and* the current switcher turned off paging + * Enable VT-x if was active *and* the current switcher turned off paging * * @returns VBox status code. * @param pVM VM handle. diff --git a/src/VBox/VMM/VMMR0/HWACCMR0A.asm b/src/VBox/VMM/VMMR0/HWACCMR0A.asm index 1216e5849..219ee9229 100644 --- a/src/VBox/VMM/VMMR0/HWACCMR0A.asm +++ b/src/VBox/VMM/VMMR0/HWACCMR0A.asm @@ -1,4 +1,4 @@ -; $Id: HWACCMR0A.asm $ +; $Id: HWACCMR0A.asm 33540 2010-10-28 09:27:05Z vboxsync $ ;; @file ; VMXM - R0 vmx helpers ; @@ -77,12 +77,12 @@ ;; @def MYPUSHSEGS ; Macro saving all segment registers on the stack. ; @param 1 full width register name -; @param 2 16-bit regsiter name for \a 1. +; @param 2 16-bit register name for \a 1. ;; @def MYPOPSEGS ; Macro restoring all segment registers on the stack ; @param 1 full width register name -; @param 2 16-bit regsiter name for \a 1. +; @param 2 16-bit register name for \a 1. %ifdef MAYBE_64_BIT ; Save a host and load the corresponding guest MSR (trashes rdx & rcx) diff --git a/src/VBox/VMM/VMMR0/HWACCMR0Mixed.mac b/src/VBox/VMM/VMMR0/HWACCMR0Mixed.mac index 13a9e1bbf..7e301d03a 100644 --- a/src/VBox/VMM/VMMR0/HWACCMR0Mixed.mac +++ b/src/VBox/VMM/VMMR0/HWACCMR0Mixed.mac @@ -1,4 +1,4 @@ -; $Id: HWACCMR0Mixed.mac $ +; $Id: HWACCMR0Mixed.mac 30414 2010-06-24 08:46:18Z vboxsync $ ;; @file ; HWACCMR0Mixed.mac - Stuff that darwin needs to build two versions of. ; diff --git a/src/VBox/VMM/VMMR0/HWSVMR0.cpp b/src/VBox/VMM/VMMR0/HWSVMR0.cpp index 34be128da..afb7b8633 100644 --- a/src/VBox/VMM/VMMR0/HWSVMR0.cpp +++ b/src/VBox/VMM/VMMR0/HWSVMR0.cpp @@ -1,4 +1,4 @@ -/* $Id: HWSVMR0.cpp $ */ +/* $Id: HWSVMR0.cpp 32847 2010-09-30 14:18:37Z vboxsync $ */ /** @file * HWACCM SVM - Host Context Ring 0. */ @@ -787,7 +787,7 @@ VMMR0DECL(int) SVMR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) case PGMMODE_PAE: /* PAE paging. */ case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */ - /** @todo use normal 32 bits paging */ + /** Must use PAE paging as we could use physical memory > 4 GB */ val |= X86_CR4_PAE; break; @@ -955,7 +955,12 @@ VMMR0DECL(int) SVMR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) */ VMMR0DECL(int) SVMR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) { - int rc = VINF_SUCCESS; + STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit1); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit2); + + VBOXSTRICTRC rc = VINF_SUCCESS; + int rc2; uint64_t exitCode = (uint64_t)SVM_EXIT_INVALID; SVM_VMCB *pVMCB; bool fSyncTPR = false; @@ -970,14 +975,14 @@ VMMR0DECL(int) SVMR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) uint64_t u64LastTime = RTTimeMilliTS(); #endif - STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x); - pVMCB = (SVM_VMCB *)pVCpu->hwaccm.s.svm.pVMCB; AssertMsgReturn(pVMCB, ("Invalid pVMCB\n"), VERR_EM_INTERNAL_ERROR); /* We can jump to this point to resume execution after determining that a VM-exit is innocent. */ ResumeExecution: + if (!STAM_PROFILE_ADV_IS_RUNNING(&pVCpu->hwaccm.s.StatEntry)) + STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit2, &pVCpu->hwaccm.s.StatEntry, x); Assert(!HWACCMR0SuspendPending()); /* Safety precaution; looping for too long here can have a very bad effect on the host */ @@ -1011,7 +1016,7 @@ ResumeExecution: } #ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0 - if (RT_UNLIKELY(cResume & 0xf) == 0) + if (RT_UNLIKELY((cResume & 0xf) == 0)) { uint64_t u64CurTime = RTTimeMilliTS(); @@ -1024,17 +1029,17 @@ ResumeExecution: #endif /* Check for pending actions that force us to go back to ring 3. */ - if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING) + if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA) || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_REQUEST)) { /* Check if a sync operation is pending. */ if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)) { rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)); - AssertRC(rc); + AssertRC(VBOXSTRICTRC_VAL(rc)); if (rc != VINF_SUCCESS) { - Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", rc)); + Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", VBOXSTRICTRC_VAL(rc))); goto end; } } @@ -1047,9 +1052,7 @@ ResumeExecution: if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK) || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK)) { - VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3); STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchToR3); - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3; goto end; } @@ -1059,7 +1062,6 @@ ResumeExecution: if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST) || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST)) { - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); rc = VINF_EM_PENDING_REQUEST; goto end; } @@ -1067,10 +1069,16 @@ ResumeExecution: /* Check if a pgm pool flush is in progress. */ if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING)) { - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); rc = VINF_PGM_POOL_FLUSH_PENDING; goto end; } + + /* Check if DMA work is pending (2nd+ run). */ + if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA) && cResume > 1) + { + rc = VINF_EM_RAW_TO_R3; + goto end; + } } #ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION @@ -1098,10 +1106,7 @@ ResumeExecution: /* Note! *After* VM_FF_INHIBIT_INTERRUPTS check!!! */ rc = SVMR0CheckPendingInterrupt(pVM, pVCpu, pVMCB, pCtx); if (RT_FAILURE(rc)) - { - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); goto end; - } /* TPR caching using CR8 is only available in 64 bits mode or with 32 bits guests when X86_CPUID_AMD_FEATURE_ECX_CR8L is supported. */ /* Note: we can't do this in LoadGuestState as PDMApicGetTPR can jump back to ring 3 (lock)!!!!!!!! (no longer true) @@ -1112,7 +1117,7 @@ ResumeExecution: bool fPending; /* TPR caching in CR8 */ - int rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending); + rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending); AssertRC(rc2); if (pVM->hwaccm.s.fTPRPatchingActive) @@ -1150,7 +1155,6 @@ ResumeExecution: } /* All done! Let's start VM execution. */ - STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatInGC, x); /* Enable nested paging if necessary (disabled each time after #VMEXIT). */ pVMCB->ctrl.NestedPaging.n.u1NestedPaging = pVM->hwaccm.s.fNestedPaging; @@ -1182,7 +1186,6 @@ ResumeExecution: rc = SVMR0LoadGuestState(pVM, pVCpu, pCtx); if (RT_UNLIKELY(rc != VINF_SUCCESS)) { - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); VMMR0LogFlushEnable(pVCpu); goto end; } @@ -1194,6 +1197,7 @@ ResumeExecution: uOldEFlags = ASMIntDisableFlags(); VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); #endif + STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatEntry, &pVCpu->hwaccm.s.StatInGC, x); pCpu = HWACCMR0GetCurrentCpu(); /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */ @@ -1296,11 +1300,11 @@ ResumeExecution: TMCpuTickSetLastSeen(pVCpu, ASMReadTSC() + pVMCB->ctrl.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */); TMNotifyEndOfExecution(pVCpu); VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED); + STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatInGC, &pVCpu->hwaccm.s.StatExit1, x); ASMSetFlags(uOldEFlags); #ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION uOldEFlags = ~(RTCCUINTREG)0; #endif - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatInGC, x); /* * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -1308,8 +1312,6 @@ ResumeExecution: * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ - STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit1, x); - /* Reason for the VM exit */ exitCode = pVMCB->ctrl.u64ExitCode; @@ -1567,20 +1569,22 @@ ResumeExecution: if ((pCtx->msrLSTAR & 0xff) != u8LastTPR) { /* Our patch code uses LSTAR for TPR caching. */ - rc = PDMApicSetTPR(pVCpu, pCtx->msrLSTAR & 0xff); - AssertRC(rc); + rc2 = PDMApicSetTPR(pVCpu, pCtx->msrLSTAR & 0xff); + AssertRC(rc2); } } else { if ((u8LastTPR >> 4) != pVMCB->ctrl.IntCtrl.n.u8VTPR) { - rc = PDMApicSetTPR(pVCpu, pVMCB->ctrl.IntCtrl.n.u8VTPR << 4); /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */ - AssertRC(rc); + rc2 = PDMApicSetTPR(pVCpu, pVMCB->ctrl.IntCtrl.n.u8VTPR << 4); /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */ + AssertRC(rc2); } } } + STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit1, &pVCpu->hwaccm.s.StatExit2, x); + /* Deal with the reason of the VM-exit. */ switch (exitCode) { @@ -1619,12 +1623,10 @@ ResumeExecution: Event.n.u8Vector = X86_XCPT_DB; SVMR0InjectEvent(pVCpu, pVMCB, pCtx, &Event); - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } /* Return to ring 3 to deal with the debug exit code. */ - Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, rc)); + Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, VBOXSTRICTRC_VAL(rc))); break; } @@ -1641,7 +1643,6 @@ ResumeExecution: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowNM); /* Continue execution. */ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0; goto ResumeExecution; @@ -1656,7 +1657,6 @@ ResumeExecution: Event.n.u8Vector = X86_XCPT_NM; SVMR0InjectEvent(pVCpu, pVMCB, pCtx, &Event); - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } @@ -1684,8 +1684,6 @@ ResumeExecution: Event.n.u32ErrorCode = errCode; SVMR0InjectEvent(pVCpu, pVMCB, pCtx, &Event); - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } #endif @@ -1727,14 +1725,13 @@ ResumeExecution: /* Forward it to our trap handler first, in case our shadow pages are out of sync. */ rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)uFaultAddress); - Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, rc)); + Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc))); if (rc == VINF_SUCCESS) { /* We've successfully synced our shadow pages, so let's just continue execution. */ Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, uFaultAddress, errCode)); STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF); TRPMResetTrap(pVCpu); - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } else @@ -1760,13 +1757,11 @@ ResumeExecution: Event.n.u32ErrorCode = errCode; SVMR0InjectEvent(pVCpu, pVMCB, pCtx, &Event); - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } #ifdef VBOX_STRICT if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK) - LogFlow(("PGMTrap0eHandler failed with %d\n", rc)); + LogFlow(("PGMTrap0eHandler failed with %d\n", VBOXSTRICTRC_VAL(rc))); #endif /* Need to go back to the recompiler to emulate the instruction. */ TRPMResetTrap(pVCpu); @@ -1791,8 +1786,6 @@ ResumeExecution: Event.n.u8Vector = X86_XCPT_MF; SVMR0InjectEvent(pVCpu, pVMCB, pCtx, &Event); - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } @@ -1817,6 +1810,7 @@ ResumeExecution: Event.n.u32ErrorCode = pVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */ break; case X86_XCPT_BP: + /** Saves the wrong EIP on the stack (pointing to the int3 instead of the next instruction. */ break; case X86_XCPT_DE: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDE); @@ -1837,8 +1831,6 @@ ResumeExecution: } Log(("Trap %x at %04x:%RGv esi=%x\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip, pCtx->esi)); SVMR0InjectEvent(pVCpu, pVMCB, pCtx, &Event); - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } #endif @@ -1855,17 +1847,18 @@ ResumeExecution: { /* EXITINFO1 contains fault errorcode; EXITINFO2 contains the guest physical address causing the fault. */ uint32_t errCode = pVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */ - RTGCPHYS uFaultAddress = pVMCB->ctrl.u64ExitInfo2; /* EXITINFO2 = fault address */ + RTGCPHYS GCPhysFault = pVMCB->ctrl.u64ExitInfo2; /* EXITINFO2 = fault address */ PGMMODE enmShwPagingMode; Assert(pVM->hwaccm.s.fNestedPaging); - LogFlow(("Nested page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, uFaultAddress, errCode)); + LogFlow(("Nested page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, GCPhysFault, errCode)); #ifdef VBOX_HWACCM_WITH_GUEST_PATCHING /* Shortcut for APIC TPR reads and writes; 32 bits guests only */ if ( pVM->hwaccm.s.fTRPPatchingAllowed - && (uFaultAddress & 0xfff) == 0x080 - && !(errCode & X86_TRAP_PF_P) /* not present */ + && (GCPhysFault & PAGE_OFFSET_MASK) == 0x080 + && ( !(errCode & X86_TRAP_PF_P) /* not present */ + || (errCode & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) == (X86_TRAP_PF_P | X86_TRAP_PF_RSVD) /* mmio optimization */) && CPUMGetGuestCPL(pVCpu, CPUMCTX2CORE(pCtx)) == 0 && !CPUMIsGuestInLongModeEx(pCtx) && pVM->hwaccm.s.cPatches < RT_ELEMENTS(pVM->hwaccm.s.aPatches)) @@ -1874,7 +1867,7 @@ ResumeExecution: PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */ GCPhysApicBase &= PAGE_BASE_GC_MASK; - if (uFaultAddress == GCPhysApicBase + 0x80) + if (GCPhysFault == GCPhysApicBase + 0x80) { /* Only attempt to patch the instruction once. */ PHWACCMTPRPATCH pPatch = (PHWACCMTPRPATCH)RTAvloU32Get(&pVM->hwaccm.s.PatchTree, (AVLOU32KEY)pCtx->eip); @@ -1887,35 +1880,47 @@ ResumeExecution: } #endif - /* Exit qualification contains the linear address of the page fault. */ - TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP); - TRPMSetErrorCode(pVCpu, errCode); - TRPMSetFaultAddress(pVCpu, uFaultAddress); - /* Handle the pagefault trap for the nested shadow table. */ -#if HC_ARCH_BITS == 32 +#if HC_ARCH_BITS == 32 /** @todo shadow this in a variable. */ if (CPUMIsGuestInLongModeEx(pCtx)) enmShwPagingMode = PGMMODE_AMD64_NX; else #endif enmShwPagingMode = PGMGetHostMode(pVM); - rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, enmShwPagingMode, errCode, CPUMCTX2CORE(pCtx), uFaultAddress); - Log2(("PGMR0Trap0eHandlerNestedPaging %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, rc)); + /* MMIO optimization */ + Assert((errCode & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != X86_TRAP_PF_RSVD); + if ((errCode & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) == (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) + { + rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, enmShwPagingMode, CPUMCTX2CORE(pCtx), GCPhysFault, errCode); + if (rc == VINF_SUCCESS) + { + Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhysFault, (RTGCPTR)pCtx->rip)); + goto ResumeExecution; + } + Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhysFault, (RTGCPTR)pCtx->rip)); + break; + } + + /* Exit qualification contains the linear address of the page fault. */ + TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP); + TRPMSetErrorCode(pVCpu, errCode); + TRPMSetFaultAddress(pVCpu, GCPhysFault); + + rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, enmShwPagingMode, errCode, CPUMCTX2CORE(pCtx), GCPhysFault); + Log2(("PGMR0Trap0eHandlerNestedPaging %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc))); if (rc == VINF_SUCCESS) { /* We've successfully synced our shadow pages, so let's just continue execution. */ - Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, uFaultAddress, errCode)); + Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, GCPhysFault, errCode)); STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF); TRPMResetTrap(pVCpu); - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } #ifdef VBOX_STRICT if (rc != VINF_EM_RAW_EMULATE_INSTR) - LogFlow(("PGMTrap0eHandlerNestedPaging failed with %d\n", rc)); + LogFlow(("PGMTrap0eHandlerNestedPaging failed with %d\n", VBOXSTRICTRC_VAL(rc))); #endif /* Need to go back to the recompiler to emulate the instruction. */ TRPMResetTrap(pVCpu); @@ -1944,7 +1949,6 @@ ResumeExecution: /* Skip instruction and continue directly. */ pCtx->rip += 2; /* Note! hardcoded opcode size! */ /* Continue execution.*/ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; case SVM_EXIT_CPUID: /* Guest software attempted to execute CPUID. */ @@ -1956,10 +1960,9 @@ ResumeExecution: { /* Update EIP and continue execution. */ pCtx->rip += 2; /* Note! hardcoded opcode size! */ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } - AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", rc)); + AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", VBOXSTRICTRC_VAL(rc))); rc = VINF_EM_RAW_EMULATE_INSTR; break; } @@ -1973,7 +1976,6 @@ ResumeExecution: { /* Update EIP and continue execution. */ pCtx->rip += 2; /* Note! hardcoded opcode size! */ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } rc = VINF_EM_RAW_EMULATE_INSTR; @@ -2004,10 +2006,9 @@ ResumeExecution: { /* Update EIP and continue execution. */ pCtx->rip += 3; /* Note! hardcoded opcode size! */ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } - AssertMsgFailed(("EMU: rdtscp failed with %Rrc\n", rc)); + AssertMsgFailed(("EMU: rdtscp failed with %Rrc\n", VBOXSTRICTRC_VAL(rc))); rc = VINF_EM_RAW_EMULATE_INSTR; break; } @@ -2064,7 +2065,6 @@ ResumeExecution: /* EIP has been updated already. */ /* Only resume if successful. */ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3); @@ -2086,7 +2086,6 @@ ResumeExecution: /* EIP has been updated already. */ /* Only resume if successful. */ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3); @@ -2113,10 +2112,8 @@ ResumeExecution: pVMCB->ctrl.u16InterceptWrDRx = 0; /* Save the host and load the guest debug state. */ - rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, false /* exclude DR6 */); - AssertRC(rc); - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); + rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, false /* exclude DR6 */); + AssertRC(rc2); goto ResumeExecution; } @@ -2127,7 +2124,6 @@ ResumeExecution: pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG; /* Only resume if successful. */ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3); @@ -2153,10 +2149,8 @@ ResumeExecution: pVMCB->ctrl.u16InterceptWrDRx = 0; /* Save the host and load the guest debug state. */ - rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, false /* exclude DR6 */); - AssertRC(rc); - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); + rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, false /* exclude DR6 */); + AssertRC(rc2); goto ResumeExecution; } @@ -2166,7 +2160,6 @@ ResumeExecution: /* EIP has been updated already. */ /* Only resume if successful. */ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3); @@ -2219,13 +2212,13 @@ ResumeExecution: { Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize)); STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringWrite); - rc = VBOXSTRICTRC_TODO(IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), IoExitInfo.n.u16Port, pDis->prefix, uIOSize)); + rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), IoExitInfo.n.u16Port, pDis->prefix, uIOSize); } else { Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize)); STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringRead); - rc = VBOXSTRICTRC_TODO(IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), IoExitInfo.n.u16Port, pDis->prefix, uIOSize)); + rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), IoExitInfo.n.u16Port, pDis->prefix, uIOSize); } } else @@ -2240,7 +2233,7 @@ ResumeExecution: { Log2(("IOMIOPortWrite %RGv %x %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, pCtx->eax & uAndVal, uIOSize)); STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOWrite); - rc = VBOXSTRICTRC_TODO(IOMIOPortWrite(pVM, IoExitInfo.n.u16Port, pCtx->eax & uAndVal, uIOSize)); + rc = IOMIOPortWrite(pVM, IoExitInfo.n.u16Port, pCtx->eax & uAndVal, uIOSize); if (rc == VINF_IOM_HC_IOPORT_WRITE) HWACCMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pVMCB->ctrl.u64ExitInfo2, IoExitInfo.n.u16Port, uAndVal, uIOSize); } @@ -2249,7 +2242,7 @@ ResumeExecution: uint32_t u32Val = 0; STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIORead); - rc = VBOXSTRICTRC_TODO(IOMIOPortRead(pVM, IoExitInfo.n.u16Port, &u32Val, uIOSize)); + rc = IOMIOPortRead(pVM, IoExitInfo.n.u16Port, &u32Val, uIOSize); if (IOM_SUCCESS(rc)) { /* Write back to the EAX register. */ @@ -2319,17 +2312,13 @@ ResumeExecution: Event.n.u8Vector = X86_XCPT_DB; SVMR0InjectEvent(pVCpu, pVMCB, pCtx, &Event); - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } } } - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } - Log2(("EM status from IO at %RGv %x size %d: %Rrc\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize, rc)); + Log2(("EM status from IO at %RGv %x size %d: %Rrc\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize, VBOXSTRICTRC_VAL(rc))); break; } @@ -2339,7 +2328,7 @@ ResumeExecution: else if (rc == VINF_IOM_HC_IOPORT_WRITE) Assert(IoExitInfo.n.u1Type == 0); else - AssertMsg(RT_FAILURE(rc) || rc == VINF_EM_RAW_EMULATE_INSTR || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", rc)); + AssertMsg(RT_FAILURE(rc) || rc == VINF_EM_RAW_EMULATE_INSTR || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); #endif Log2(("Failed IO at %RGv %x size %d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize)); break; @@ -2372,7 +2361,7 @@ ResumeExecution: ) goto ResumeExecution; } - AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", rc)); + AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", VBOXSTRICTRC_VAL(rc))); break; case SVM_EXIT_MONITOR: @@ -2387,7 +2376,7 @@ ResumeExecution: pCtx->rip += 3; /* Note: hardcoded opcode size assumption! */ goto ResumeExecution; } - AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", rc)); + AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", VBOXSTRICTRC_VAL(rc))); break; } @@ -2419,8 +2408,6 @@ ResumeExecution: Log(("Forced #UD trap at %RGv\n", (RTGCPTR)pCtx->rip)); SVMR0InjectEvent(pVCpu, pVMCB, pCtx, &Event); - - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } @@ -2439,15 +2426,14 @@ ResumeExecution: Log(("SVM: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff)); /* Our patch code uses LSTAR for TPR caching. */ - rc = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff); - AssertRC(rc); + rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff); + AssertRC(rc2); } /* Skip the instruction and continue. */ pCtx->rip += 2; /* wrmsr = [0F 30] */ /* Only resume if successful. */ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } @@ -2460,10 +2446,9 @@ ResumeExecution: /* EIP has been updated already. */ /* Only resume if successful. */ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } - AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", (pVMCB->ctrl.u64ExitInfo1 == 0) ? "rdmsr" : "wrmsr", rc)); + AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", (pVMCB->ctrl.u64ExitInfo1 == 0) ? "rdmsr" : "wrmsr", VBOXSTRICTRC_VAL(rc))); break; } @@ -2485,8 +2470,8 @@ ResumeExecution: case SVM_EVENT_NMI: Log(("SVM_EXIT_TASK_SWITCH: reassert trap %d\n", Event.n.u8Vector)); Assert(!Event.n.u1ErrorCodeValid); - rc = TRPMAssertTrap(pVCpu, Event.n.u8Vector, TRPM_HARDWARE_INT); - AssertRC(rc); + rc2 = TRPMAssertTrap(pVCpu, Event.n.u8Vector, TRPM_HARDWARE_INT); + AssertRC(rc2); break; default: @@ -2524,6 +2509,9 @@ ResumeExecution: end: + /* We now going back to ring-3, so clear the action flag. */ + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3); + /* Signal changes for the recompiler. */ CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR | CPUM_CHANGED_LDTR | CPUM_CHANGED_GDTR | CPUM_CHANGED_IDTR | CPUM_CHANGED_TR | CPUM_CHANGED_HIDDEN_SEL_REGS); @@ -2555,8 +2543,10 @@ end: ASMSetFlags(uOldEFlags); #endif + STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, x); STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); - return rc; + STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); + return VBOXSTRICTRC_TODO(rc); } /** diff --git a/src/VBox/VMM/VMMR0/HWSVMR0.h b/src/VBox/VMM/VMMR0/HWSVMR0.h index ea2752cf1..f662d37f6 100644 --- a/src/VBox/VMM/VMMR0/HWSVMR0.h +++ b/src/VBox/VMM/VMMR0/HWSVMR0.h @@ -1,4 +1,4 @@ -/* $Id: HWSVMR0.h $ */ +/* $Id: HWSVMR0.h 28800 2010-04-27 08:22:32Z vboxsync $ */ /** @file * HWACCM AMD-V - Internal header file. */ diff --git a/src/VBox/VMM/VMMR0/HWVMXR0.cpp b/src/VBox/VMM/VMMR0/HWVMXR0.cpp index 0fb4c571d..673b0315b 100644 --- a/src/VBox/VMM/VMMR0/HWVMXR0.cpp +++ b/src/VBox/VMM/VMMR0/HWVMXR0.cpp @@ -1,4 +1,4 @@ -/* $Id: HWVMXR0.cpp $ */ +/* $Id: HWVMXR0.cpp 35023 2010-12-13 15:25:51Z vboxsync $ */ /** @file * HWACCM VMX - Host Context Ring 0. */ @@ -20,6 +20,7 @@ * Header Files * *******************************************************************************/ #define LOG_GROUP LOG_GROUP_HWACCM +#include <iprt/asm-amd64-x86.h> #include <VBox/hwaccm.h> #include <VBox/pgm.h> #include <VBox/dbgf.h> @@ -33,7 +34,6 @@ #include <VBox/pdmapi.h> #include <VBox/err.h> #include <VBox/log.h> -#include <iprt/asm-amd64-x86.h> #include <iprt/assert.h> #include <iprt/param.h> #include <iprt/string.h> @@ -69,7 +69,7 @@ extern "C" uint32_t g_fVMXIs64bitHost; /******************************************************************************* * Local Functions * *******************************************************************************/ -static void VMXR0ReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rc, PCPUMCTX pCtx); +static void VMXR0ReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx); static void vmxR0SetupTLBEPT(PVM pVM, PVMCPU pVCpu); static void vmxR0SetupTLBVPID(PVM pVM, PVMCPU pVCpu); static void vmxR0SetupTLBDummy(PVM pVM, PVMCPU pVCpu); @@ -728,7 +728,9 @@ static void vmxR0SetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool /** * Injects an event (trap or external interrupt) * - * @returns VBox status code. + * @returns VBox status code. Note that it may return VINF_EM_RESET to + * indicate a triple fault when injecting X86_XCPT_DF. + * * @param pVM The VM to operate on. * @param pVCpu The VMCPU to operate on. * @param pCtx CPU Context @@ -1294,6 +1296,45 @@ static void vmxR0UpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) } /** + * Loads a minimal guest state + * + * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!! + * + * @param pVM The VM to operate on. + * @param pVCpu The VMCPU to operate on. + * @param pCtx Guest context + */ +VMMR0DECL(void) VMXR0LoadMinimalGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) +{ + int rc; + X86EFLAGS eflags; + + Assert(!(pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_ALL_GUEST)); + + /* EIP, ESP and EFLAGS */ + rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_RIP, pCtx->rip); + rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_RSP, pCtx->rsp); + AssertRC(rc); + + /* Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1. */ + eflags = pCtx->eflags; + eflags.u32 &= VMX_EFLAGS_RESERVED_0; + eflags.u32 |= VMX_EFLAGS_RESERVED_1; + + /* Real mode emulation using v86 mode. */ + if ( CPUMIsGuestInRealModeEx(pCtx) + && pVM->hwaccm.s.vmx.pRealModeTSS) + { + pVCpu->hwaccm.s.vmx.RealMode.eflags = eflags; + + eflags.Bits.u1VM = 1; + eflags.Bits.u2IOPL = 0; /* must always be 0 or else certain instructions won't cause faults. */ + } + rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32); + AssertRC(rc); +} + +/** * Loads the guest state * * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!! @@ -1307,7 +1348,6 @@ VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) { int rc = VINF_SUCCESS; RTGCUINTPTR val; - X86EFLAGS eflags; /* VMX_VMCS_CTRL_ENTRY_CONTROLS * Set required bits to one and zero according to the MSR capabilities. @@ -1519,12 +1559,15 @@ VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) } /* - * Sysenter MSRs (unconditional) + * Sysenter MSRs */ - rc = VMXWriteVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs); - rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip); - rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp); - AssertRC(rc); + if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_MSR) + { + rc = VMXWriteVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs); + rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip); + rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp); + AssertRC(rc); + } /* Control registers */ if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR0) @@ -1583,12 +1626,16 @@ VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */ | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */ | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */ - | X86_CR0_TS - | X86_CR0_ET /* Bit not restored during VM-exit! */ | X86_CR0_CD /* Bit not restored during VM-exit! */ - | X86_CR0_NW /* Bit not restored during VM-exit! */ - | X86_CR0_NE - | X86_CR0_MP; + | X86_CR0_NW /* Bit not restored during VM-exit! */ + | X86_CR0_NE; + + /* When the guest's FPU state is active, then we no longer care about + * the FPU related bits. + */ + if (CPUMIsGuestFPUStateActive(pVCpu) == false) + val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_MP; + pVCpu->hwaccm.s.vmx.cr0_mask = val; rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR0_MASK, val); @@ -1615,7 +1662,7 @@ VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) case PGMMODE_PAE: /* PAE paging. */ case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */ - /** @todo use normal 32 bits paging */ + /** Must use PAE paging as we could use physical memory > 4 GB */ val |= X86_CR4_PAE; break; @@ -1771,71 +1818,6 @@ VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) AssertRC(rc); } - /* EIP, ESP and EFLAGS */ - rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_RIP, pCtx->rip); - rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_RSP, pCtx->rsp); - AssertRC(rc); - - /* Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1. */ - eflags = pCtx->eflags; - eflags.u32 &= VMX_EFLAGS_RESERVED_0; - eflags.u32 |= VMX_EFLAGS_RESERVED_1; - - /* Real mode emulation using v86 mode. */ - if ( CPUMIsGuestInRealModeEx(pCtx) - && pVM->hwaccm.s.vmx.pRealModeTSS) - { - pVCpu->hwaccm.s.vmx.RealMode.eflags = eflags; - - eflags.Bits.u1VM = 1; - eflags.Bits.u2IOPL = 0; /* must always be 0 or else certain instructions won't cause faults. */ - } - rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32); - AssertRC(rc); - - bool fOffsettedTsc; - if (pVM->hwaccm.s.vmx.fUsePreemptTimer) - { - uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hwaccm.s.vmx.u64TSCOffset); - cTicksToDeadline >>= pVM->hwaccm.s.vmx.cPreemptTimerShift; - uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16); - rc = VMXWriteVMCS(VMX_VMCS32_GUEST_PREEMPTION_TIMER_VALUE, cPreemptionTickCount); - AssertRC(rc); - } - else - fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hwaccm.s.vmx.u64TSCOffset); - if (fOffsettedTsc) - { - uint64_t u64CurTSC = ASMReadTSC(); - if (u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu)) - { - /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET */ - rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, pVCpu->hwaccm.s.vmx.u64TSCOffset); - AssertRC(rc); - - pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT; - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); - AssertRC(rc); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCOffset); - } - else - { - /* Fall back to rdtsc emulation as we would otherwise pass decreasing tsc values to the guest. */ - LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC, pVCpu->hwaccm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset, TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hwaccm.s.vmx.u64TSCOffset, TMCpuTickGet(pVCpu))); - pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT; - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); - AssertRC(rc); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCInterceptOverFlow); - } - } - else - { - pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT; - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); - AssertRC(rc); - STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCIntercept); - } - /* 64 bits guest mode? */ if (CPUMIsGuestInLongModeEx(pCtx)) { @@ -1850,11 +1832,14 @@ VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) # endif pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM64; #endif - /* Unconditionally update these as wrmsr might have changed them. */ - rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_FS_BASE, pCtx->fsHid.u64Base); - AssertRC(rc); - rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_GS_BASE, pCtx->gsHid.u64Base); - AssertRC(rc); + if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_MSR) + { + /* Update these as wrmsr might have changed them. */ + rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_FS_BASE, pCtx->fsHid.u64Base); + AssertRC(rc); + rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_GS_BASE, pCtx->gsHid.u64Base); + AssertRC(rc); + } } else { @@ -1911,9 +1896,61 @@ VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) AssertRC(rc); #endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */ - /* Done. */ + bool fOffsettedTsc; + if (pVM->hwaccm.s.vmx.fUsePreemptTimer) + { + uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hwaccm.s.vmx.u64TSCOffset); + + /* Make sure the returned values have sane upper and lower boundaries. */ + uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage); + + cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64 of a second */ + cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */ + + cTicksToDeadline >>= pVM->hwaccm.s.vmx.cPreemptTimerShift; + uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16); + rc = VMXWriteVMCS(VMX_VMCS32_GUEST_PREEMPTION_TIMER_VALUE, cPreemptionTickCount); + AssertRC(rc); + } + else + fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hwaccm.s.vmx.u64TSCOffset); + if (fOffsettedTsc) + { + uint64_t u64CurTSC = ASMReadTSC(); + if (u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu)) + { + /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET */ + rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, pVCpu->hwaccm.s.vmx.u64TSCOffset); + AssertRC(rc); + + pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT; + rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); + AssertRC(rc); + STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCOffset); + } + else + { + /* Fall back to rdtsc emulation as we would otherwise pass decreasing tsc values to the guest. */ + LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC, pVCpu->hwaccm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset, TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hwaccm.s.vmx.u64TSCOffset, TMCpuTickGet(pVCpu))); + pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT; + rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); + AssertRC(rc); + STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCInterceptOverFlow); + } + } + else + { + pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT; + rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); + AssertRC(rc); + STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCIntercept); + } + + /* Done with the major changes */ pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_ALL_GUEST; + /* Minimal guest state update (esp, eip, eflags mostly) */ + VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx); return rc; } @@ -2249,7 +2286,12 @@ static void vmxR0SetupTLBVPID(PVM pVM, PVMCPU pVCpu) */ VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) { - int rc = VINF_SUCCESS; + STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit1); + STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit2); + + VBOXSTRICTRC rc = VINF_SUCCESS; + int rc2; RTGCUINTREG val; RTGCUINTREG exitReason = (RTGCUINTREG)VMX_EXIT_INVALID; RTGCUINTREG instrError, cbInstr; @@ -2268,10 +2310,6 @@ VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) #ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0 uint64_t u64LastTime = RTTimeMilliTS(); #endif -#ifdef VBOX_WITH_STATISTICS - bool fStatEntryStarted = true; - bool fStatExit2Started = false; -#endif Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) || (pVCpu->hwaccm.s.vmx.pVAPIC && pVM->hwaccm.s.vmx.pAPIC)); @@ -2286,14 +2324,12 @@ VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) Log2(("\nE")); - STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x); - #ifdef VBOX_STRICT { RTCCUINTREG val2; - rc = VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val2); - AssertRC(rc); + rc2 = VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val2); + AssertRC(rc2); Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val2)); /* allowed zero */ @@ -2304,8 +2340,8 @@ VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0) Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n")); - rc = VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val2); - AssertRC(rc); + rc2 = VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val2); + AssertRC(rc2); Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val2)); /* Must be set according to the MSR, but can be cleared in case of EPT. */ @@ -2322,8 +2358,8 @@ VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0) Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n")); - rc = VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val2); - AssertRC(rc); + rc2 = VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val2); + AssertRC(rc2); Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val2)); /* allowed zero */ @@ -2334,8 +2370,8 @@ VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1) != 0) Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n")); - rc = VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val2); - AssertRC(rc); + rc2 = VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val2); + AssertRC(rc2); Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val2)); /* allowed zero */ @@ -2356,10 +2392,8 @@ VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) /* We can jump to this point to resume execution after determining that a VM-exit is innocent. */ ResumeExecution: - STAM_STATS({ - if (fStatExit2Started) { STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, y); fStatExit2Started = false; } - if (!fStatEntryStarted) { STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x); fStatEntryStarted = true; } - }); + if (!STAM_REL_PROFILE_ADV_IS_RUNNING(&pVCpu->hwaccm.s.StatEntry)) + STAM_REL_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit2, &pVCpu->hwaccm.s.StatEntry, x); AssertMsg(pVCpu->hwaccm.s.idEnteredCpu == RTMpCpuId(), ("Expected %d, I'm %d; cResume=%d exitReason=%RGv exitQualification=%RGv\n", (int)pVCpu->hwaccm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification)); @@ -2388,19 +2422,19 @@ ResumeExecution: */ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); /* Irq inhibition is no longer active; clear the corresponding VMX state. */ - rc = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0); - AssertRC(rc); + rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0); + AssertRC(rc2); } } else { /* Irq inhibition is no longer active; clear the corresponding VMX state. */ - rc = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0); - AssertRC(rc); + rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0); + AssertRC(rc2); } #ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0 - if (RT_UNLIKELY(cResume & 0xf) == 0) + if (RT_UNLIKELY((cResume & 0xf) == 0)) { uint64_t u64CurTime = RTTimeMilliTS(); @@ -2413,17 +2447,17 @@ ResumeExecution: #endif /* Check for pending actions that force us to go back to ring 3. */ - if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING) + if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA) || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_REQUEST)) { /* Check if a sync operation is pending. */ if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)) { rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)); - AssertRC(rc); if (rc != VINF_SUCCESS) { - Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", rc)); + AssertRC(VBOXSTRICTRC_VAL(rc)); + Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", VBOXSTRICTRC_VAL(rc))); goto end; } } @@ -2436,9 +2470,7 @@ ResumeExecution: if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK) || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK)) { - VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3); STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchToR3); - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3; goto end; } @@ -2448,7 +2480,6 @@ ResumeExecution: if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST) || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST)) { - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); rc = VINF_EM_PENDING_REQUEST; goto end; } @@ -2456,10 +2487,16 @@ ResumeExecution: /* Check if a pgm pool flush is in progress. */ if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING)) { - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); rc = VINF_PGM_POOL_FLUSH_PENDING; goto end; } + + /* Check if DMA work is pending (2nd+ run). */ + if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA) && cResume > 1) + { + rc = VINF_EM_RAW_TO_R3; + goto end; + } } #ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION @@ -2502,7 +2539,7 @@ ResumeExecution: /* TPR caching in CR8 */ bool fPending; - int rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending); + rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending); AssertRC(rc2); /* The TPR can be found at offset 0x80 in the APIC mmio page. */ pVCpu->hwaccm.s.vmx.pVAPIC[0x80] = u8LastTPR; @@ -2514,7 +2551,7 @@ ResumeExecution: * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts. */ rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, (fPending) ? (u8LastTPR >> 4) : 0); /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */ - AssertRC(rc); + AssertRC(VBOXSTRICTRC_VAL(rc)); if (pVM->hwaccm.s.fTPRPatchingActive) { @@ -2563,7 +2600,7 @@ ResumeExecution: } #endif #ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 - PGMDynMapFlushAutoSet(pVCpu); + PGMRZDynMapFlushAutoSet(pVCpu); #endif /* @@ -2577,18 +2614,31 @@ ResumeExecution: VMMR0LogFlushDisable(pVCpu); #endif /* Save the host state first. */ - rc = VMXR0SaveHostState(pVM, pVCpu); - if (RT_UNLIKELY(rc != VINF_SUCCESS)) + if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT) { - VMMR0LogFlushEnable(pVCpu); - goto end; + rc = VMXR0SaveHostState(pVM, pVCpu); + if (RT_UNLIKELY(rc != VINF_SUCCESS)) + { + VMMR0LogFlushEnable(pVCpu); + goto end; + } } + /* Load the guest state */ - rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx); - if (RT_UNLIKELY(rc != VINF_SUCCESS)) + if (!pVCpu->hwaccm.s.fContextUseFlags) { - VMMR0LogFlushEnable(pVCpu); - goto end; + VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx); + STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatLoadMinimal); + } + else + { + rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx); + if (RT_UNLIKELY(rc != VINF_SUCCESS)) + { + VMMR0LogFlushEnable(pVCpu); + goto end; + } + STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatLoadFull); } #ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION @@ -2601,16 +2651,14 @@ ResumeExecution: /* Non-register state Guest Context */ /** @todo change me according to cpu state */ - rc = VMXWriteVMCS(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE); - AssertRC(rc); + rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE); + AssertRC(rc2); /** Set TLB flush state as checked until we return from the world switch. */ ASMAtomicWriteU8(&pVCpu->hwaccm.s.fCheckedTLBFlush, true); /* Deal with tagged TLB setup and invalidation. */ pVM->hwaccm.s.vmx.pfnSetupTaggedTLB(pVM, pVCpu); - STAM_STATS({ STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); fStatEntryStarted = false; }); - /* Manual save and restore: * - General purpose registers except RIP, RSP * @@ -2624,7 +2672,7 @@ ResumeExecution: */ /* All done! Let's start VM execution. */ - STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatInGC, z); + STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatEntry, &pVCpu->hwaccm.s.StatInGC, x); Assert(idCpuCheck == RTMpCpuId()); #ifdef VBOX_WITH_CRASHDUMP_MAGIC @@ -2664,6 +2712,7 @@ ResumeExecution: ASMWrMsr(MSR_K8_LSTAR, u64OldLSTAR); } + STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatInGC, &pVCpu->hwaccm.s.StatExit1, x); ASMSetFlags(uOldEFlags); #ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION uOldEFlags = ~(RTCCUINTREG)0; @@ -2680,8 +2729,6 @@ ResumeExecution: * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatInGC, z); - STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit1, v); if (RT_UNLIKELY(rc != VINF_SUCCESS)) { @@ -2693,30 +2740,30 @@ ResumeExecution: /* Success. Query the guest state and figure out what has happened. */ /* Investigate why there was a VM-exit. */ - rc = VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason); + rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason); STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]); exitReason &= 0xffff; /* bit 0-15 contain the exit code. */ - rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError); - rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr); - rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo); + rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError); + rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr); + rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo); /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */ - rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE, &errCode); - rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo); - rc |= VMXReadCachedVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification); - AssertRC(rc); + rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE, &errCode); + rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo); + rc2 |= VMXReadCachedVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification); + AssertRC(rc2); /* Sync back the guest state */ - rc = VMXR0SaveGuestState(pVM, pVCpu, pCtx); - AssertRC(rc); + rc2 = VMXR0SaveGuestState(pVM, pVCpu, pCtx); + AssertRC(rc2); /* Note! NOW IT'S SAFE FOR LOGGING! */ VMMR0LogFlushEnable(pVCpu); Log2(("Raw exit reason %08x\n", exitReason)); /* Check if an injected event was interrupted prematurely. */ - rc = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_INFO, &val); - AssertRC(rc); + rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_INFO, &val); + AssertRC(rc2); pVCpu->hwaccm.s.Event.intInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val); if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo) /* Ignore 'int xx' as they'll be restarted anyway. */ @@ -2729,8 +2776,8 @@ ResumeExecution: /* Error code present? */ if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo)) { - rc = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_ERRCODE, &val); - AssertRC(rc); + rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_ERRCODE, &val); + AssertRC(rc2); pVCpu->hwaccm.s.Event.errCode = val; Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv pending error=%RX64\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val)); } @@ -2763,14 +2810,14 @@ ResumeExecution: if ( fSetupTPRCaching && u8LastTPR != pVCpu->hwaccm.s.vmx.pVAPIC[0x80]) { - rc = PDMApicSetTPR(pVCpu, pVCpu->hwaccm.s.vmx.pVAPIC[0x80]); - AssertRC(rc); + rc2 = PDMApicSetTPR(pVCpu, pVCpu->hwaccm.s.vmx.pVAPIC[0x80]); + AssertRC(rc2); } - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, v); - STAM_STATS({ STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2, y); fStatExit2Started = true; }); + STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit1, &pVCpu->hwaccm.s.StatExit2, x); /* Some cases don't need a complete resync of the guest CPU state; handle them here. */ + Assert(rc == VINF_SUCCESS); /* might consider VERR_IPE_UNINITIALIZED_STATUS here later... */ switch (exitReason) { case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */ @@ -2781,6 +2828,11 @@ ResumeExecution: if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo)) { Assert(exitReason == VMX_EXIT_EXTERNAL_IRQ); +#if 0 //def VBOX_WITH_VMMR0_DISABLE_PREEMPTION + if ( RTThreadPreemptIsPendingTrusty() + && !RTThreadPreemptIsPending(NIL_RTTHREAD)) + goto ResumeExecution; +#endif /* External interrupt; leave to allow it to be dispatched again. */ rc = VINF_EM_RAW_INTERRUPT; break; @@ -2827,8 +2879,8 @@ ResumeExecution: Log(("Forward #NM fault to the guest\n")); STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNM); - rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, 0); - AssertRC(rc); + rc2 = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, 0); + AssertRC(rc2); STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); goto ResumeExecution; } @@ -2840,7 +2892,7 @@ ResumeExecution: { /* A genuine pagefault. * Forward the trap to the guest by injecting the exception and resuming execution. */ - Log(("Guest page fault at %RGv cr2=%RGv error code %x rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification, errCode, (RTGCPTR)pCtx->rsp)); + Log(("Guest page fault at %RGv cr2=%RGv error code %RGv rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification, errCode, (RTGCPTR)pCtx->rsp)); Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx)); @@ -2848,8 +2900,8 @@ ResumeExecution: /* Now we must update CR2. */ pCtx->cr2 = exitQualification; - rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode); - AssertRC(rc); + rc2 = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode); + AssertRC(rc2); STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); goto ResumeExecution; @@ -2907,14 +2959,14 @@ ResumeExecution: && GCPhys == GCPhysApicBase) { Log(("Enable VT-x virtual APIC access filtering\n")); - rc = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P); - AssertRC(rc); + rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P); + AssertRC(rc2); } } /* Forward it to our trap handler first, in case our shadow pages are out of sync. */ rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification); - Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, rc)); + Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc))); if (rc == VINF_SUCCESS) { /* We've successfully synced our shadow pages, so let's just continue execution. */ @@ -2940,15 +2992,15 @@ ResumeExecution: /* Now we must update CR2. */ pCtx->cr2 = exitQualification; - rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode); - AssertRC(rc); + rc2 = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode); + AssertRC(rc2); STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); goto ResumeExecution; } #ifdef VBOX_STRICT if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK) - Log2(("PGMTrap0eHandler failed with %d\n", rc)); + Log2(("PGMTrap0eHandler failed with %d\n", VBOXSTRICTRC_VAL(rc))); #endif /* Need to go back to the recompiler to emulate the instruction. */ TRPMResetTrap(pVCpu); @@ -2966,8 +3018,8 @@ ResumeExecution: break; } Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip)); - rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode); - AssertRC(rc); + rc2 = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode); + AssertRC(rc2); STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); goto ResumeExecution; @@ -3011,18 +3063,18 @@ ResumeExecution: pCtx->dr[7] |= 0x400; /* must be one */ /* Resync DR7 */ - rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]); - AssertRC(rc); + rc2 = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]); + AssertRC(rc2); Log(("Trap %x (debug) at %RGv exit qualification %RX64 dr6=%x dr7=%x\n", vector, (RTGCPTR)pCtx->rip, exitQualification, (uint32_t)pCtx->dr[6], (uint32_t)pCtx->dr[7])); - rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode); - AssertRC(rc); + rc2 = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode); + AssertRC(rc2); STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); goto ResumeExecution; } /* Return to ring 3 to deal with the debug exit code. */ - Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, rc)); + Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, VBOXSTRICTRC_VAL(rc))); break; } @@ -3032,13 +3084,17 @@ ResumeExecution: if (rc == VINF_EM_RAW_GUEST_TRAP) { Log(("Guest #BP at %04x:%RGv\n", pCtx->cs, pCtx->rip)); - rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode); - AssertRC(rc); + rc2 = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode); + AssertRC(rc2); + STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); goto ResumeExecution; } if (rc == VINF_SUCCESS) + { + STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); goto ResumeExecution; - Log(("Debugger BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, rc)); + } + Log(("Debugger BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, VBOXSTRICTRC_VAL(rc))); break; } @@ -3053,9 +3109,9 @@ ResumeExecution: if ( !CPUMIsGuestInRealModeEx(pCtx) || !pVM->hwaccm.s.vmx.pRealModeTSS) { - Log(("Trap %x at %04X:%RGv errorCode=%x\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip, errCode)); - rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode); - AssertRC(rc); + Log(("Trap %x at %04X:%RGv errorCode=%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip, errCode)); + rc2 = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode); + AssertRC(rc2); STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); goto ResumeExecution; } @@ -3064,11 +3120,12 @@ ResumeExecution: LogFlow(("Real mode X86_XCPT_GP instruction emulation at %x:%RGv\n", pCtx->cs, (RTGCPTR)pCtx->rip)); - rc = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, &cbOp); - if (RT_SUCCESS(rc)) + rc2 = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, &cbOp); + if (RT_SUCCESS(rc2)) { bool fUpdateRIP = true; + rc = VINF_SUCCESS; Assert(cbOp == pDis->opsize); switch (pDis->pCurInstr->opcode) { @@ -3081,8 +3138,8 @@ ResumeExecution: pCtx->eflags.Bits.u1IF = 1; EMSetInhibitInterruptsPC(pVCpu, pCtx->rip + pDis->opsize); Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)); - rc = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI); - AssertRC(rc); + rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI); + AssertRC(rc2); STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitSti); break; @@ -3111,15 +3168,15 @@ ResumeExecution: uMask = 0xffff; } - rc = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack); - if (RT_FAILURE(rc)) + rc2 = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack); + if (RT_FAILURE(rc2)) { rc = VERR_EM_INTERPRETER; break; } eflags.u = 0; - rc = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm); - if (RT_FAILURE(rc)) + rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm); + if (RT_FAILURE(rc2)) { rc = VERR_EM_INTERPRETER; break; @@ -3153,8 +3210,8 @@ ResumeExecution: uMask = 0xffff; } - rc = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), (pCtx->esp - cbParm) & uMask, 0, &GCPtrStack); - if (RT_FAILURE(rc)) + rc2 = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), (pCtx->esp - cbParm) & uMask, 0, &GCPtrStack); + if (RT_FAILURE(rc2)) { rc = VERR_EM_INTERPRETER; break; @@ -3164,8 +3221,8 @@ ResumeExecution: eflags.Bits.u1RF = 0; eflags.Bits.u1VM = 0; - rc = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm); - if (RT_FAILURE(rc)) + rc2 = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm); + if (RT_FAILURE(rc2)) { rc = VERR_EM_INTERPRETER; break; @@ -3189,14 +3246,14 @@ ResumeExecution: break; } - rc = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack); - if (RT_FAILURE(rc)) + rc2 = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack); + if (RT_FAILURE(rc2)) { rc = VERR_EM_INTERPRETER; break; } - rc = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame)); - if (RT_FAILURE(rc)) + rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame)); + if (RT_FAILURE(rc2)) { rc = VERR_EM_INTERPRETER; break; @@ -3223,7 +3280,7 @@ ResumeExecution: intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0); - AssertRC(rc); + AssertRC(VBOXSTRICTRC_VAL(rc)); fUpdateRIP = false; STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt); break; @@ -3241,7 +3298,7 @@ ResumeExecution: intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0); - AssertRC(rc); + AssertRC(VBOXSTRICTRC_VAL(rc)); fUpdateRIP = false; STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt); } @@ -3258,14 +3315,14 @@ ResumeExecution: intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0); - AssertRC(rc); + AssertRC(VBOXSTRICTRC_VAL(rc)); fUpdateRIP = false; STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt); break; } default: - rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, CPUMCTX2CORE(pCtx), 0, &cbSize); + rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, CPUMCTX2CORE(pCtx), 0, EMCODETYPE_SUPERVISOR, &cbSize); break; } @@ -3285,7 +3342,7 @@ ResumeExecution: else rc = VERR_EM_INTERPRETER; - AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT, ("Unexpected rc=%Rrc\n", rc)); + AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT, ("Unexpected rc=%Rrc\n", VBOXSTRICTRC_VAL(rc))); break; } @@ -3313,8 +3370,8 @@ ResumeExecution: } Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip)); - rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode); - AssertRC(rc); + rc2 = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode); + AssertRC(rc2); STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3); goto ResumeExecution; @@ -3326,7 +3383,7 @@ ResumeExecution: { Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs, pCtx->eip, errCode)); rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode); - AssertRC(rc); + AssertRC(VBOXSTRICTRC_VAL(rc)); /* Strict RC check below. */ /* Go back to ring 3 in case of a triple fault. */ if ( vector == X86_XCPT_DF @@ -3345,7 +3402,7 @@ ResumeExecution: default: rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE; - AssertMsgFailed(("Unexpected interuption code %x\n", intInfo)); + AssertMsgFailed(("Unexpected interruption code %x\n", intInfo)); break; } @@ -3359,8 +3416,8 @@ ResumeExecution: Assert(pVM->hwaccm.s.fNestedPaging); - rc = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys); - AssertRC(rc); + rc2 = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys); + AssertRC(rc2); Assert(((exitQualification >> 7) & 3) != 2); /* Determine the kind of violation. */ @@ -3390,8 +3447,8 @@ ResumeExecution: if (GCPhys == GCPhysApicBase + 0x80) { Log(("Enable VT-x virtual APIC access filtering\n")); - rc = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P); - AssertRC(rc); + rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P); + AssertRC(rc2); } } } @@ -3404,7 +3461,7 @@ ResumeExecution: /* Handle the pagefault trap for the nested shadow table. */ rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys); - Log2(("PGMR0Trap0eHandlerNestedPaging %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, rc)); + Log2(("PGMR0Trap0eHandlerNestedPaging %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc))); if (rc == VINF_SUCCESS) { /* We've successfully synced our shadow pages, so let's just continue execution. */ Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode)); @@ -3416,7 +3473,7 @@ ResumeExecution: #ifdef VBOX_STRICT if (rc != VINF_EM_RAW_EMULATE_INSTR) - LogFlow(("PGMTrap0eHandlerNestedPaging failed with %d\n", rc)); + LogFlow(("PGMTrap0eHandlerNestedPaging failed with %d\n", VBOXSTRICTRC_VAL(rc))); #endif /* Need to go back to the recompiler to emulate the instruction. */ TRPMResetTrap(pVCpu); @@ -3429,10 +3486,35 @@ ResumeExecution: Assert(pVM->hwaccm.s.fNestedPaging); - rc = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys); - AssertRC(rc); - + rc2 = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys); + AssertRC(rc2); Log(("VMX_EXIT_EPT_MISCONFIG for %RGp\n", GCPhys)); + + /* Shortcut for APIC TPR reads and writes. */ + if ( (GCPhys & 0xfff) == 0x080 + && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */ + && fSetupTPRCaching + && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)) + { + RTGCPHYS GCPhysApicBase; + PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */ + GCPhysApicBase &= PAGE_BASE_GC_MASK; + if (GCPhys == GCPhysApicBase + 0x80) + { + Log(("Enable VT-x virtual APIC access filtering\n")); + rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P); + AssertRC(rc2); + } + } + + rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pCtx), GCPhys, UINT32_MAX); + if (rc == VINF_SUCCESS) + { + Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhys, (RTGCPTR)pCtx->rip)); + goto ResumeExecution; + } + + Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> %Rrc\n", GCPhys, (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc))); break; } @@ -3440,8 +3522,8 @@ ResumeExecution: /* Clear VM-exit on IF=1 change. */ LogFlow(("VMX_EXIT_IRQ_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip, VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF)); pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT; - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); - AssertRC(rc); + rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); + AssertRC(rc2); STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIrqWindow); goto ResumeExecution; /* we check for pending guest interrupts there */ @@ -3465,7 +3547,7 @@ ResumeExecution: pCtx->rip += cbInstr; goto ResumeExecution; } - AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", rc)); + AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", VBOXSTRICTRC_VAL(rc))); rc = VINF_EM_RAW_EMULATE_INSTR; break; } @@ -3515,7 +3597,7 @@ ResumeExecution: pCtx->rip += cbInstr; goto ResumeExecution; } - AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, rc)); + AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, VBOXSTRICTRC_VAL(rc))); break; } @@ -3531,7 +3613,7 @@ ResumeExecution: pCtx->rip += cbInstr; goto ResumeExecution; } - AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", rc)); + AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", VBOXSTRICTRC_VAL(rc))); break; } @@ -3546,17 +3628,17 @@ ResumeExecution: Log(("VMX: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff)); /* Our patch code uses LSTAR for TPR caching. */ - rc = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff); - AssertRC(rc); + rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff); + AssertRC(rc2); } /* Skip the instruction and continue. */ pCtx->rip += cbInstr; /* wrmsr = [0F 30] */ /* Only resume if successful. */ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); goto ResumeExecution; } + pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_MSR; /* no break */ case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */ { @@ -3574,7 +3656,7 @@ ResumeExecution: /* Only resume if successful. */ goto ResumeExecution; } - AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", rc)); + AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", VBOXSTRICTRC_VAL(rc))); break; } @@ -3667,12 +3749,12 @@ ResumeExecution: { /* Disable drx move intercepts. */ pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT; - rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); - AssertRC(rc); + rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls); + AssertRC(rc2); /* Save the host and load the guest debug state. */ - rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */); - AssertRC(rc); + rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */); + AssertRC(rc2); #ifdef LOG_ENABLED if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE) @@ -3758,20 +3840,20 @@ ResumeExecution: /* Disassemble manually to deal with segment prefixes. */ /** @todo VMX_VMCS_EXIT_GUEST_LINEAR_ADDR contains the flat pointer operand of the instruction. */ /** @todo VMX_VMCS32_RO_EXIT_INSTR_INFO also contains segment prefix info. */ - rc = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, NULL); - if (rc == VINF_SUCCESS) + rc2 = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, NULL); + if (RT_SUCCESS(rc)) { if (fIOWrite) { Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize)); STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringWrite); - rc = VBOXSTRICTRC_TODO(IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->prefix, cbSize)); + rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->prefix, cbSize); } else { Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize)); STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringRead); - rc = VBOXSTRICTRC_TODO(IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->prefix, cbSize)); + rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->prefix, cbSize); } } else @@ -3787,7 +3869,7 @@ ResumeExecution: if (fIOWrite) { STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOWrite); - rc = VBOXSTRICTRC_TODO(IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize)); + rc = IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize); if (rc == VINF_IOM_HC_IOPORT_WRITE) HWACCMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize); } @@ -3796,7 +3878,7 @@ ResumeExecution: uint32_t u32Val = 0; STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIORead); - rc = VBOXSTRICTRC_TODO(IOMIOPortRead(pVM, uPort, &u32Val, cbSize)); + rc = IOMIOPortRead(pVM, uPort, &u32Val, cbSize); if (IOM_SUCCESS(rc)) { /* Write back to the EAX register. */ @@ -3854,8 +3936,8 @@ ResumeExecution: pCtx->dr[7] |= 0x400; /* must be one */ /* Resync DR7 */ - rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]); - AssertRC(rc); + rc2 = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]); + AssertRC(rc2); /* Construct inject info. */ intInfo = X86_XCPT_DB; @@ -3863,15 +3945,14 @@ ResumeExecution: intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT); Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip)); - rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), 0, 0); - AssertRC(rc); + rc2 = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), 0, 0); + AssertRC(rc2); STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1); goto ResumeExecution; } } } - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1); goto ResumeExecution; } @@ -3885,7 +3966,7 @@ ResumeExecution: else if (rc == VINF_IOM_HC_IOPORT_WRITE) Assert(fIOWrite); else - AssertMsg(RT_FAILURE(rc) || rc == VINF_EM_RAW_EMULATE_INSTR || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", rc)); + AssertMsg(RT_FAILURE(rc) || rc == VINF_EM_RAW_EMULATE_INSTR || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); #endif STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1); break; @@ -3894,7 +3975,6 @@ ResumeExecution: case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */ LogFlow(("VMX_EXIT_TPR\n")); /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1); goto ResumeExecution; case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address on the APIC-access page. */ @@ -3913,12 +3993,9 @@ ResumeExecution: GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(exitQualification); LogFlow(("Apic access at %RGp\n", GCPhys)); - rc = VBOXSTRICTRC_TODO(IOMMMIOPhysHandler(pVM, (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW, CPUMCTX2CORE(pCtx), GCPhys)); + rc = IOMMMIOPhysHandler(pVM, (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW, CPUMCTX2CORE(pCtx), GCPhys); if (rc == VINF_SUCCESS) - { - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1); goto ResumeExecution; /* rip already updated */ - } break; } @@ -3930,7 +4007,6 @@ ResumeExecution: } case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */ - STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1); if (!TMTimerPollBool(pVM, pVCpu)) goto ResumeExecution; rc = VINF_EM_RAW_TIMER_PENDING; @@ -3949,6 +4025,7 @@ ResumeExecution: case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */ case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */ case VMX_EXIT_EPT_VIOLATION: + case VMX_EXIT_EPT_MISCONFIG: /* 49 EPT misconfig is used by the PGM/MMIO optimizations. */ case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */ /* Already handled above. */ break; @@ -3979,8 +4056,8 @@ ResumeExecution: Log(("VMX_EXIT_TASK_SWITCH: reassert trap %d\n", VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo))); Assert(!VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo)); - rc = TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo), TRPM_HARDWARE_INT); - AssertRC(rc); + rc2 = TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo), TRPM_HARDWARE_INT); + AssertRC(rc2); } /* else Exceptions and software interrupts can just be restarted. */ rc = VERR_EM_INTERPRETER; @@ -4013,7 +4090,7 @@ ResumeExecution: ) goto ResumeExecution; } - AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", rc)); + AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", VBOXSTRICTRC_VAL(rc))); break; case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */ @@ -4053,7 +4130,7 @@ ResumeExecution: || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_TRPM_XCPT_DISPATCHED || rc == VINF_EM_RESCHEDULE_REM, - ("rc = %d\n", rc)); + ("rc = %d\n", VBOXSTRICTRC_VAL(rc))); break; case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */ @@ -4120,6 +4197,9 @@ ResumeExecution: } end: + /* We now going back to ring-3, so clear the action flag. */ + VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3); + /* Signal changes for the recompiler. */ CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR | CPUM_CHANGED_LDTR | CPUM_CHANGED_GDTR | CPUM_CHANGED_IDTR | CPUM_CHANGED_TR | CPUM_CHANGED_HIDDEN_SEL_REGS); @@ -4161,12 +4241,11 @@ end: ASMSetFlags(uOldEFlags); #endif - STAM_STATS({ - if (fStatExit2Started) STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, y); - else if (fStatEntryStarted) STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); - }); + STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, x); + STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x); + STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); Log2(("X")); - return rc; + return VBOXSTRICTRC_TODO(rc); } @@ -4357,9 +4436,9 @@ VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys) * @param rc Return code * @param pCtx Current CPU context (not updated) */ -static void VMXR0ReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rc, PCPUMCTX pCtx) +static void VMXR0ReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx) { - switch (rc) + switch (VBOXSTRICTRC_VAL(rc)) { case VERR_VMX_INVALID_VMXON_PTR: AssertFailed(); @@ -4508,7 +4587,7 @@ static void VMXR0ReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rc, PCPUMCTX default: /* impossible */ - AssertMsgFailed(("%Rrc (%#x)\n", rc, rc)); + AssertMsgFailed(("%Rrc (%#x)\n", VBOXSTRICTRC_VAL(rc), VBOXSTRICTRC_VAL(rc))); break; } } diff --git a/src/VBox/VMM/VMMR0/HWVMXR0.h b/src/VBox/VMM/VMMR0/HWVMXR0.h index 9ba2224e1..f3993ef2c 100644 --- a/src/VBox/VMM/VMMR0/HWVMXR0.h +++ b/src/VBox/VMM/VMMR0/HWVMXR0.h @@ -1,4 +1,4 @@ -/* $Id: HWVMXR0.h $ */ +/* $Id: HWVMXR0.h 34998 2010-12-13 12:53:16Z vboxsync $ */ /** @file * HWACCM VT-x - Internal header file. */ @@ -234,7 +234,7 @@ VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, R val = 0xf3; \ } \ else \ - if ( ((!pCtx->csHid.Attr.n.u1DefBig && !CPUMIsGuestIn64BitCodeEx(pCtx)) || pCtx->reg) \ + if ( (pCtx->reg || !CPUMIsGuestInPagedProtectedModeEx(pCtx) || (!pCtx->csHid.Attr.n.u1DefBig && !CPUMIsGuestIn64BitCodeEx(pCtx))) \ && pCtx->reg##Hid.Attr.n.u1Present == 1) \ val = pCtx->reg##Hid.Attr.u | X86_SEL_TYPE_ACCESSED; \ else \ diff --git a/src/VBox/VMM/VMMR0/PDMR0Device.cpp b/src/VBox/VMM/VMMR0/PDMR0Device.cpp index 93ad8254e..8fd337d0c 100644 --- a/src/VBox/VMM/VMMR0/PDMR0Device.cpp +++ b/src/VBox/VMM/VMMR0/PDMR0Device.cpp @@ -1,4 +1,4 @@ -/* $Id: PDMR0Device.cpp $ */ +/* $Id: PDMR0Device.cpp 33799 2010-11-05 16:14:07Z vboxsync $ */ /** @file * PDM - Pluggable Device and Driver Manager, R0 Device parts. */ @@ -56,7 +56,7 @@ RT_C_DECLS_END *******************************************************************************/ static void pdmR0IsaSetIrq(PVM pVM, int iIrq, int iLevel); static void pdmR0IoApicSetIrq(PVM pVM, int iIrq, int iLevel); - +static void pdmR0IoApicSendMsi(PVM pVM, RTGCPHYS GCAddr, uint32_t uValue); @@ -251,6 +251,33 @@ static DECLCALLBACK(PVMCPU) pdmR0DevHlp_GetVMCPU(PPDMDEVINS pDevIns) } +/** @interface_method_impl{PDMDEVHLPR0,pfnTMTimeVirtGet} */ +static DECLCALLBACK(uint64_t) pdmR0DevHlp_TMTimeVirtGet(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR0DevHlp_TMTimeVirtGet: caller='%p'/%d\n", pDevIns, pDevIns->iInstance)); + return TMVirtualGet(pDevIns->Internal.s.pVMR0); +} + + +/** @interface_method_impl{PDMDEVHLPR0,pfnTMTimeVirtGetFreq} */ +static DECLCALLBACK(uint64_t) pdmR0DevHlp_TMTimeVirtGetFreq(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR0DevHlp_TMTimeVirtGetFreq: caller='%p'/%d\n", pDevIns, pDevIns->iInstance)); + return TMVirtualGetFreq(pDevIns->Internal.s.pVMR0); +} + + +/** @interface_method_impl{PDMDEVHLPR0,pfnTMTimeVirtGetNano} */ +static DECLCALLBACK(uint64_t) pdmR0DevHlp_TMTimeVirtGetNano(PPDMDEVINS pDevIns) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + LogFlow(("pdmR0DevHlp_TMTimeVirtGetNano: caller='%p'/%d\n", pDevIns, pDevIns->iInstance)); + return TMVirtualToNano(pDevIns->Internal.s.pVMR0, TMVirtualGet(pDevIns->Internal.s.pVMR0)); +} + + /** * The Ring-0 Device Helper Callbacks. */ @@ -271,6 +298,9 @@ extern DECLEXPORT(const PDMDEVHLPR0) g_pdmR0DevHlp = pdmR0DevHlp_GetVM, pdmR0DevHlp_CanEmulateIoBlock, pdmR0DevHlp_GetVMCPU, + pdmR0DevHlp_TMTimeVirtGet, + pdmR0DevHlp_TMTimeVirtGetFreq, + pdmR0DevHlp_TMTimeVirtGetNano, PDM_DEVHLPR0_VERSION }; @@ -591,6 +621,13 @@ static DECLCALLBACK(void) pdmR0PciHlp_IoApicSetIrq(PPDMDEVINS pDevIns, int iIrq, pdmR0IoApicSetIrq(pDevIns->Internal.s.pVMR0, iIrq, iLevel); } +/** @interface_method_impl{PDMPCIHLPR0,pfnIoApicSendMsi} */ +static DECLCALLBACK(void) pdmR0PciHlp_IoApicSendMsi(PPDMDEVINS pDevIns, RTGCPHYS GCAddr, uint32_t uValue) +{ + PDMDEV_ASSERT_DEVINS(pDevIns); + Log4(("pdmR0PciHlp_IoApicSendMsi: Address=%p Value=%d\n", GCAddr, uValue)); + pdmR0IoApicSendMsi(pDevIns->Internal.s.pVMR0, GCAddr, uValue); +} /** @interface_method_impl{PDMPCIHLPR0,pfnLock} */ static DECLCALLBACK(int) pdmR0PciHlp_Lock(PPDMDEVINS pDevIns, int rc) @@ -616,6 +653,7 @@ extern DECLEXPORT(const PDMPCIHLPR0) g_pdmR0PciHlp = PDM_PCIHLPR0_VERSION, pdmR0PciHlp_IsaSetIrq, pdmR0PciHlp_IoApicSetIrq, + pdmR0PciHlp_IoApicSendMsi, pdmR0PciHlp_Lock, pdmR0PciHlp_Unlock, PDM_PCIHLPR0_VERSION, /* the end */ @@ -716,6 +754,14 @@ static DECLCALLBACK(bool) pdmR0DrvHlp_AssertOther(PPDMDRVINS pDrvIns, const char } +/** @interface_method_impl{PDMDRVHLPR0,pfnFTSetCheckpoint} */ +static DECLCALLBACK(int) pdmR0DrvHlp_FTSetCheckpoint(PPDMDRVINS pDrvIns, FTMCHECKPOINTTYPE enmType) +{ + PDMDRV_ASSERT_DRVINS(pDrvIns); + return FTMSetCheckpoint(pDrvIns->Internal.s.pVMR0, enmType); +} + + /** * The Ring-0 Context Driver Helper Callbacks. */ @@ -728,6 +774,7 @@ extern DECLEXPORT(const PDMDRVHLPR0) g_pdmR0DrvHlp = pdmR0DrvHlp_VMSetRuntimeErrorV, pdmR0DrvHlp_AssertEMT, pdmR0DrvHlp_AssertOther, + pdmR0DrvHlp_FTSetCheckpoint, PDM_DRVHLPRC_VERSION }; @@ -836,3 +883,20 @@ VMMR0_INT_DECL(int) PDMR0DeviceCallReqHandler(PVM pVM, PPDMDEVICECALLREQHANDLERR return pfnReqHandlerR0(pDevIns, pReq->uOperation, pReq->u64Arg); } +/** + * Sends an MSI to I/O APIC. + * + * @param pVM The VM handle. + * @param GCAddr Address of the message. + * @param uValue Value of the message. + */ +static void pdmR0IoApicSendMsi(PVM pVM, RTGCPHYS GCAddr, uint32_t uValue) +{ + if (pVM->pdm.s.IoApic.pDevInsR0) + { + pdmLock(pVM); + pVM->pdm.s.IoApic.pfnSendMsiR0(pVM->pdm.s.IoApic.pDevInsR0, GCAddr, uValue); + pdmUnlock(pVM); + } +} + diff --git a/src/VBox/VMM/VMMR0/PDMR0Driver.cpp b/src/VBox/VMM/VMMR0/PDMR0Driver.cpp index 6ca706f53..388dad723 100644 --- a/src/VBox/VMM/VMMR0/PDMR0Driver.cpp +++ b/src/VBox/VMM/VMMR0/PDMR0Driver.cpp @@ -1,4 +1,4 @@ -/* $Id: PDMR0Driver.cpp $ */ +/* $Id: PDMR0Driver.cpp 28800 2010-04-27 08:22:32Z vboxsync $ */ /** @file * PDM - Pluggable Device and Driver Manager, R0 Driver parts. */ diff --git a/src/VBox/VMM/VMMR0/PGMR0.cpp b/src/VBox/VMM/VMMR0/PGMR0.cpp index 72d4733f7..28c1289f0 100644 --- a/src/VBox/VMM/VMMR0/PGMR0.cpp +++ b/src/VBox/VMM/VMMR0/PGMR0.cpp @@ -1,10 +1,10 @@ -/* $Id: PGMR0.cpp $ */ +/* $Id: PGMR0.cpp 32431 2010-09-11 18:02:17Z vboxsync $ */ /** @file * PGM - Page Manager and Monitor, Ring-0. */ /* - * Copyright (C) 2007 Oracle Corporation + * Copyright (C) 2007-2010 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; @@ -29,7 +29,10 @@ #include <iprt/assert.h> #include <iprt/mem.h> -RT_C_DECLS_BEGIN + +/* + * Instantiate the ring-0 header/code templates. + */ #define PGM_BTH_NAME(name) PGM_BTH_NAME_32BIT_PROT(name) #include "PGMR0Bth.h" #undef PGM_BTH_NAME @@ -46,8 +49,6 @@ RT_C_DECLS_BEGIN #include "PGMR0Bth.h" #undef PGM_BTH_NAME -RT_C_DECLS_END - /** * Worker function for PGMR3PhysAllocateHandyPages and pgmPhysEnsureHandyPage. @@ -161,6 +162,7 @@ VMMR0DECL(int) PGMR0PhysAllocateHandyPages(PVM pVM, PVMCPU pVCpu) return rc; } + /** * Worker function for PGMR3PhysAllocateLargeHandyPage * @@ -186,27 +188,34 @@ VMMR0DECL(int) PGMR0PhysAllocateLargeHandyPage(PVM pVM, PVMCPU pVCpu) return rc; } + /** * #PF Handler for nested paging. * * @returns VBox status code (appropriate for trap handling and GC return). * @param pVM VM Handle. * @param pVCpu VMCPU Handle. - * @param enmShwPagingMode Paging mode for the nested page tables + * @param enmShwPagingMode Paging mode for the nested page tables. * @param uErr The trap error code. * @param pRegFrame Trap register frame. - * @param pvFault The fault address. + * @param GCPhysFault The fault address. */ -VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PVM pVM, PVMCPU pVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPHYS pvFault) +VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PVM pVM, PVMCPU pVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr, + PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault) { int rc; - LogFlow(("PGMTrap0eHandler: uErr=%RGx pvFault=%RGp eip=%RGv\n", uErr, pvFault, (RTGCPTR)pRegFrame->rip)); + LogFlow(("PGMTrap0eHandler: uErr=%RGx GCPhysFault=%RGp eip=%RGv\n", uErr, GCPhysFault, (RTGCPTR)pRegFrame->rip)); STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0e, a); STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = NULL; } ); /* AMD uses the host's paging mode; Intel has a single mode (EPT). */ - AssertMsg(enmShwPagingMode == PGMMODE_32_BIT || enmShwPagingMode == PGMMODE_PAE || enmShwPagingMode == PGMMODE_PAE_NX || enmShwPagingMode == PGMMODE_AMD64 || enmShwPagingMode == PGMMODE_AMD64_NX || enmShwPagingMode == PGMMODE_EPT, ("enmShwPagingMode=%d\n", enmShwPagingMode)); + AssertMsg( enmShwPagingMode == PGMMODE_32_BIT || enmShwPagingMode == PGMMODE_PAE || enmShwPagingMode == PGMMODE_PAE_NX + || enmShwPagingMode == PGMMODE_AMD64 || enmShwPagingMode == PGMMODE_AMD64_NX || enmShwPagingMode == PGMMODE_EPT, + ("enmShwPagingMode=%d\n", enmShwPagingMode)); + + /* Reserved shouldn't end up here. */ + Assert(!(uErr & X86_TRAP_PF_RSVD)); #ifdef VBOX_WITH_STATISTICS /* @@ -217,87 +226,184 @@ VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PVM pVM, PVMCPU pVCpu, PGMMODE enm if (!(uErr & X86_TRAP_PF_P)) { if (uErr & X86_TRAP_PF_RW) - STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eUSNotPresentWrite); + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNotPresentWrite); else - STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eUSNotPresentRead); + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNotPresentRead); } else if (uErr & X86_TRAP_PF_RW) - STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eUSWrite); + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSWrite); else if (uErr & X86_TRAP_PF_RSVD) - STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eUSReserved); + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSReserved); else if (uErr & X86_TRAP_PF_ID) - STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eUSNXE); + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNXE); else - STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eUSRead); + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSRead); } else { /* Supervisor */ if (!(uErr & X86_TRAP_PF_P)) { if (uErr & X86_TRAP_PF_RW) - STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eSVNotPresentWrite); + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVNotPresentWrite); else - STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eSVNotPresentRead); + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVNotPresentRead); } else if (uErr & X86_TRAP_PF_RW) - STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eSVWrite); + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVWrite); else if (uErr & X86_TRAP_PF_ID) - STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eSNXE); + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSNXE); else if (uErr & X86_TRAP_PF_RSVD) - STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0eSVReserved); + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVReserved); } #endif /* * Call the worker. * - * We pretend the guest is in protected mode without paging, so we can use existing code to build the - * nested page tables. + * Note! We pretend the guest is in protected mode without paging, so we + * can use existing code to build the nested page tables. */ bool fLockTaken = false; switch(enmShwPagingMode) { - case PGMMODE_32_BIT: - rc = PGM_BTH_NAME_32BIT_PROT(Trap0eHandler)(pVCpu, uErr, pRegFrame, pvFault, &fLockTaken); - break; - case PGMMODE_PAE: - case PGMMODE_PAE_NX: - rc = PGM_BTH_NAME_PAE_PROT(Trap0eHandler)(pVCpu, uErr, pRegFrame, pvFault, &fLockTaken); - break; - case PGMMODE_AMD64: - case PGMMODE_AMD64_NX: - rc = PGM_BTH_NAME_AMD64_PROT(Trap0eHandler)(pVCpu, uErr, pRegFrame, pvFault, &fLockTaken); - break; - case PGMMODE_EPT: - rc = PGM_BTH_NAME_EPT_PROT(Trap0eHandler)(pVCpu, uErr, pRegFrame, pvFault, &fLockTaken); - break; - default: - AssertFailed(); - rc = VERR_INVALID_PARAMETER; - break; + case PGMMODE_32_BIT: + rc = PGM_BTH_NAME_32BIT_PROT(Trap0eHandler)(pVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken); + break; + case PGMMODE_PAE: + case PGMMODE_PAE_NX: + rc = PGM_BTH_NAME_PAE_PROT(Trap0eHandler)(pVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken); + break; + case PGMMODE_AMD64: + case PGMMODE_AMD64_NX: + rc = PGM_BTH_NAME_AMD64_PROT(Trap0eHandler)(pVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken); + break; + case PGMMODE_EPT: + rc = PGM_BTH_NAME_EPT_PROT(Trap0eHandler)(pVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken); + break; + default: + AssertFailed(); + rc = VERR_INVALID_PARAMETER; + break; } if (fLockTaken) { Assert(PGMIsLockOwner(pVM)); pgmUnlock(pVM); } + if (rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE) rc = VINF_SUCCESS; - else /* Note: hack alert for difficult to reproduce problem. */ - if ( rc == VERR_PAGE_NOT_PRESENT /* SMP only ; disassembly might fail. */ - || rc == VERR_PAGE_TABLE_NOT_PRESENT /* seen with UNI & SMP */ - || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT /* seen with SMP */ - || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT) /* precaution */ + else if ( rc == VERR_PAGE_NOT_PRESENT /* SMP only ; disassembly might fail. */ + || rc == VERR_PAGE_TABLE_NOT_PRESENT /* seen with UNI & SMP */ + || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT /* seen with SMP */ + || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT) /* precaution */ { - Log(("WARNING: Unexpected VERR_PAGE_TABLE_NOT_PRESENT (%d) for page fault at %RGp error code %x (rip=%RGv)\n", rc, pvFault, uErr, pRegFrame->rip)); - /* Some kind of inconsistency in the SMP case; it's safe to just execute the instruction again; not sure about single VCPU VMs though. */ + Log(("WARNING: Unexpected VERR_PAGE_TABLE_NOT_PRESENT (%d) for page fault at %RGp error code %x (rip=%RGv)\n", rc, GCPhysFault, uErr, pRegFrame->rip)); + /* Some kind of inconsistency in the SMP case; it's safe to just execute the instruction again; not sure about + single VCPU VMs though. */ rc = VINF_SUCCESS; } STAM_STATS({ if (!pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution)) - pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.StatRZTrap0eTime2Misc; }); - STAM_PROFILE_STOP_EX(&pVCpu->pgm.s.StatRZTrap0e, pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution), a); + pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Misc; }); + STAM_PROFILE_STOP_EX(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0e, pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution), a); return rc; } + +/** + * #PF Handler for deliberate nested paging misconfiguration (/reserved bit) + * employed for MMIO pages. + * + * @returns VBox status code (appropriate for trap handling and GC return). + * @param pVM The VM Handle. + * @param pVCpu The current CPU. + * @param enmShwPagingMode Paging mode for the nested page tables. + * @param pRegFrame Trap register frame. + * @param GCPhysFault The fault address. + * @param uErr The error code, UINT32_MAX if not available + * (VT-x). + */ +VMMR0DECL(VBOXSTRICTRC) PGMR0Trap0eHandlerNPMisconfig(PVM pVM, PVMCPU pVCpu, PGMMODE enmShwPagingMode, + PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, uint32_t uErr) +{ +#ifdef PGM_WITH_MMIO_OPTIMIZATIONS + STAM_PROFILE_START(&pVCpu->CTX_SUFF(pStats)->StatR0NpMiscfg, a); + VBOXSTRICTRC rc; + + /* + * Try lookup the all access physical handler for the address. + */ + pgmLock(pVM); + PPGMPHYSHANDLER pHandler = pgmHandlerPhysicalLookup(pVM, GCPhysFault); + if (RT_LIKELY(pHandler && pHandler->enmType != PGMPHYSHANDLERTYPE_PHYSICAL_WRITE)) + { + /* + * If the handle has aliases page or pages that have been temporarily + * disabled, we'll have to take a detour to make sure we resync them + * to avoid lots of unnecessary exits. + */ + PPGMPAGE pPage; + if ( ( pHandler->cAliasedPages + || pHandler->cTmpOffPages) + && ( (pPage = pgmPhysGetPage(&pVM->pgm.s, GCPhysFault)) == NULL + || PGM_PAGE_GET_HNDL_PHYS_STATE(pPage) == PGM_PAGE_HNDL_PHYS_STATE_DISABLED) + ) + { + Log(("PGMR0Trap0eHandlerNPMisconfig: Resyncing aliases / tmp-off page at %RGp (uErr=%#x) %R[pgmpage]\n", GCPhysFault, uErr, pPage)); + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfgSyncPage); + rc = pgmShwSyncNestedPageLocked(pVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode); + pgmUnlock(pVM); + } + else + { + if (pHandler->CTX_SUFF(pfnHandler)) + { + CTX_MID(PFNPGM,PHYSHANDLER) pfnHandler = pHandler->CTX_SUFF(pfnHandler); + void *pvUser = pHandler->CTX_SUFF(pvUser); + STAM_PROFILE_START(&pHandler->Stat, h); + pgmUnlock(pVM); + + Log6(("PGMR0Trap0eHandlerNPMisconfig: calling %p(,%#x,,%RGp,%p)\n", pfnHandler, uErr, GCPhysFault, pvUser)); + rc = pfnHandler(pVM, uErr == UINT32_MAX ? RTGCPTR_MAX : uErr, pRegFrame, GCPhysFault, GCPhysFault, pvUser); + +#ifdef VBOX_WITH_STATISTICS + pgmLock(pVM); + pHandler = pgmHandlerPhysicalLookup(pVM, GCPhysFault); + if (pHandler) + STAM_PROFILE_STOP(&pHandler->Stat, h); + pgmUnlock(pVM); +#endif + } + else + { + pgmUnlock(pVM); + Log(("PGMR0Trap0eHandlerNPMisconfig: %RGp (uErr=%#x) -> R3\n", GCPhysFault, uErr)); + rc = VINF_EM_RAW_EMULATE_INSTR; + } + } + } + else + { + /* + * Must be out of sync, so do a SyncPage and restart the instruction. + * + * ASSUMES that ALL handlers are page aligned and covers whole pages + * (assumption asserted in PGMHandlerPhysicalRegisterEx). + */ + Log(("PGMR0Trap0eHandlerNPMisconfig: Out of sync page at %RGp (uErr=%#x)\n", GCPhysFault, uErr)); + STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfgSyncPage); + rc = pgmShwSyncNestedPageLocked(pVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode); + pgmUnlock(pVM); + } + + STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfg, a); + return rc; + +#else + AssertLogRelFailed(); + return VERR_INTERNAL_ERROR_4; +#endif +} + diff --git a/src/VBox/VMM/VMMR0/PGMR0Bth.h b/src/VBox/VMM/VMMR0/PGMR0Bth.h index 6c0bb8de3..6ec721147 100644 --- a/src/VBox/VMM/VMMR0/PGMR0Bth.h +++ b/src/VBox/VMM/VMMR0/PGMR0Bth.h @@ -1,4 +1,4 @@ -/* $Id: PGMR0Bth.h $ */ +/* $Id: PGMR0Bth.h 28800 2010-04-27 08:22:32Z vboxsync $ */ /** @file * VBox - Page Manager / Monitor, Shadow+Guest Paging Template. */ diff --git a/src/VBox/VMM/VMMR0/PGMR0DynMap.cpp b/src/VBox/VMM/VMMR0/PGMR0DynMap.cpp deleted file mode 100644 index c48758477..000000000 --- a/src/VBox/VMM/VMMR0/PGMR0DynMap.cpp +++ /dev/null @@ -1,2240 +0,0 @@ -/* $Id: PGMR0DynMap.cpp $ */ -/** @file - * PGM - Page Manager and Monitor, ring-0 dynamic mapping cache. - */ - -/* - * Copyright (C) 2008 Oracle Corporation - * - * This file is part of VirtualBox Open Source Edition (OSE), as - * available from http://www.virtualbox.org. This file is free software; - * you can redistribute it and/or modify it under the terms of the GNU - * General Public License (GPL) as published by the Free Software - * Foundation, in version 2 as it comes in the "COPYING" file of the - * VirtualBox OSE distribution. VirtualBox OSE is distributed in the - * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. - */ - -/******************************************************************************* -* Internal Functions * -*******************************************************************************/ -#define LOG_GROUP LOG_GROUP_PGM -#include <VBox/pgm.h> -#include "../PGMInternal.h" -#include <VBox/vm.h> -#include "../PGMInline.h" -#include <VBox/sup.h> -#include <VBox/err.h> -#include <iprt/asm.h> -#include <iprt/asm-amd64-x86.h> -#include <iprt/alloc.h> -#include <iprt/assert.h> -#include <iprt/cpuset.h> -#include <iprt/memobj.h> -#include <iprt/mp.h> -#include <iprt/semaphore.h> -#include <iprt/spinlock.h> -#include <iprt/string.h> - - -/******************************************************************************* -* Defined Constants And Macros * -*******************************************************************************/ -/** The max size of the mapping cache (in pages). */ -#define PGMR0DYNMAP_MAX_PAGES ((16*_1M) >> PAGE_SHIFT) -/** The small segment size that is adopted on out-of-memory conditions with a - * single big segment. */ -#define PGMR0DYNMAP_SMALL_SEG_PAGES 128 -/** The number of pages we reserve per CPU. */ -#define PGMR0DYNMAP_PAGES_PER_CPU 256 -/** The minimum number of pages we reserve per CPU. - * This must be equal or larger than the autoset size. */ -#define PGMR0DYNMAP_PAGES_PER_CPU_MIN 64 -/** The number of guard pages. - * @remarks Never do tuning of the hashing or whatnot with a strict build! */ -#if defined(VBOX_STRICT) -# define PGMR0DYNMAP_GUARD_PAGES 1 -#else -# define PGMR0DYNMAP_GUARD_PAGES 0 -#endif -/** The dummy physical address of guard pages. */ -#define PGMR0DYNMAP_GUARD_PAGE_HCPHYS UINT32_C(0x7777feed) -/** The dummy reference count of guard pages. (Must be non-zero.) */ -#define PGMR0DYNMAP_GUARD_PAGE_REF_COUNT INT32_C(0x7777feed) -#if 0 -/** Define this to just clear the present bit on guard pages. - * The alternative is to replace the entire PTE with an bad not-present - * PTE. Either way, XNU will screw us. :-/ */ -#define PGMR0DYNMAP_GUARD_NP -#endif -/** The dummy PTE value for a page. */ -#define PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE X86_PTE_PG_MASK -/** The dummy PTE value for a page. */ -#define PGMR0DYNMAP_GUARD_PAGE_PAE_PTE UINT64_MAX /*X86_PTE_PAE_PG_MASK*/ -/** Calcs the overload threshold. Current set at 50%. */ -#define PGMR0DYNMAP_CALC_OVERLOAD(cPages) ((cPages) / 2) - -#if 0 -/* Assertions causes panics if preemption is disabled, this can be used to work aroudn that. */ -//#define RTSpinlockAcquire(a,b) do {} while (0) -//#define RTSpinlockRelease(a,b) do {} while (0) -#endif - - -/******************************************************************************* -* Structures and Typedefs * -*******************************************************************************/ -/** - * Ring-0 dynamic mapping cache segment. - * - * The dynamic mapping cache can be extended with additional segments if the - * load is found to be too high. This done the next time a VM is created, under - * the protection of the init mutex. The arrays is reallocated and the new - * segment is added to the end of these. Nothing is rehashed of course, as the - * indexes / addresses must remain unchanged. - * - * This structure is only modified while owning the init mutex or during module - * init / term. - */ -typedef struct PGMR0DYNMAPSEG -{ - /** Pointer to the next segment. */ - struct PGMR0DYNMAPSEG *pNext; - /** The memory object for the virtual address range that we're abusing. */ - RTR0MEMOBJ hMemObj; - /** The start page in the cache. (I.e. index into the arrays.) */ - uint16_t iPage; - /** The number of pages this segment contributes. */ - uint16_t cPages; - /** The number of page tables. */ - uint16_t cPTs; - /** The memory objects for the page tables. */ - RTR0MEMOBJ ahMemObjPTs[1]; -} PGMR0DYNMAPSEG; -/** Pointer to a ring-0 dynamic mapping cache segment. */ -typedef PGMR0DYNMAPSEG *PPGMR0DYNMAPSEG; - - -/** - * Ring-0 dynamic mapping cache entry. - * - * This structure tracks - */ -typedef struct PGMR0DYNMAPENTRY -{ - /** The physical address of the currently mapped page. - * This is duplicate for three reasons: cache locality, cache policy of the PT - * mappings and sanity checks. */ - RTHCPHYS HCPhys; - /** Pointer to the page. */ - void *pvPage; - /** The number of references. */ - int32_t volatile cRefs; - /** PTE pointer union. */ - union PGMR0DYNMAPENTRY_PPTE - { - /** PTE pointer, 32-bit legacy version. */ - PX86PTE pLegacy; - /** PTE pointer, PAE version. */ - PX86PTEPAE pPae; - /** PTE pointer, the void version. */ - void *pv; - } uPte; - /** CPUs that haven't invalidated this entry after it's last update. */ - RTCPUSET PendingSet; -} PGMR0DYNMAPENTRY; -/** Pointer to a ring-0 dynamic mapping cache entry. */ -typedef PGMR0DYNMAPENTRY *PPGMR0DYNMAPENTRY; - - -/** - * Ring-0 dynamic mapping cache. - * - * This is initialized during VMMR0 module init but no segments are allocated at - * that time. Segments will be added when the first VM is started and removed - * again when the last VM shuts down, thus avoid consuming memory while dormant. - * At module termination, the remaining bits will be freed up. - */ -typedef struct PGMR0DYNMAP -{ - /** The usual magic number / eye catcher (PGMR0DYNMAP_MAGIC). */ - uint32_t u32Magic; - /** Spinlock serializing the normal operation of the cache. */ - RTSPINLOCK hSpinlock; - /** Array for tracking and managing the pages. */ - PPGMR0DYNMAPENTRY paPages; - /** The cache size given as a number of pages. */ - uint32_t cPages; - /** Whether it's 32-bit legacy or PAE/AMD64 paging mode. */ - bool fLegacyMode; - /** The current load. - * This does not include guard pages. */ - uint32_t cLoad; - /** The max load ever. - * This is maintained to get trigger adding of more mapping space. */ - uint32_t cMaxLoad; - /** Initialization / termination lock. */ - RTSEMFASTMUTEX hInitLock; - /** The number of guard pages. */ - uint32_t cGuardPages; - /** The number of users (protected by hInitLock). */ - uint32_t cUsers; - /** Array containing a copy of the original page tables. - * The entries are either X86PTE or X86PTEPAE according to fLegacyMode. */ - void *pvSavedPTEs; - /** List of segments. */ - PPGMR0DYNMAPSEG pSegHead; - /** The paging mode. */ - SUPPAGINGMODE enmPgMode; -} PGMR0DYNMAP; -/** Pointer to the ring-0 dynamic mapping cache */ -typedef PGMR0DYNMAP *PPGMR0DYNMAP; - -/** PGMR0DYNMAP::u32Magic. (Jens Christian Bugge Wesseltoft) */ -#define PGMR0DYNMAP_MAGIC 0x19640201 - - -/** - * Paging level data. - */ -typedef struct PGMR0DYNMAPPGLVL -{ - uint32_t cLevels; /**< The number of levels. */ - struct - { - RTHCPHYS HCPhys; /**< The address of the page for the current level, - * i.e. what hMemObj/hMapObj is currently mapping. */ - RTHCPHYS fPhysMask; /**< Mask for extracting HCPhys from uEntry. */ - RTR0MEMOBJ hMemObj; /**< Memory object for HCPhys, PAGE_SIZE. */ - RTR0MEMOBJ hMapObj; /**< Mapping object for hMemObj. */ - uint32_t fPtrShift; /**< The pointer shift count. */ - uint64_t fPtrMask; /**< The mask to apply to the shifted pointer to get the table index. */ - uint64_t fAndMask; /**< And mask to check entry flags. */ - uint64_t fResMask; /**< The result from applying fAndMask. */ - union - { - void *pv; /**< hMapObj address. */ - PX86PGUINT paLegacy; /**< Legacy table view. */ - PX86PGPAEUINT paPae; /**< PAE/AMD64 table view. */ - } u; - } a[4]; -} PGMR0DYNMAPPGLVL; -/** Pointer to paging level data. */ -typedef PGMR0DYNMAPPGLVL *PPGMR0DYNMAPPGLVL; - - -/******************************************************************************* -* Global Variables * -*******************************************************************************/ -/** Pointer to the ring-0 dynamic mapping cache. */ -static PPGMR0DYNMAP g_pPGMR0DynMap; -/** For overflow testing. */ -static bool g_fPGMR0DynMapTestRunning = false; - - -/******************************************************************************* -* Internal Functions * -*******************************************************************************/ -static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs); -static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis); -static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis); -static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis); -#if 0 /*def DEBUG*/ -static int pgmR0DynMapTest(PVM pVM); -#endif - - -/** - * Initializes the ring-0 dynamic mapping cache. - * - * @returns VBox status code. - */ -VMMR0DECL(int) PGMR0DynMapInit(void) -{ - Assert(!g_pPGMR0DynMap); - - /* - * Create and initialize the cache instance. - */ - PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)RTMemAllocZ(sizeof(*pThis)); - AssertLogRelReturn(pThis, VERR_NO_MEMORY); - int rc = VINF_SUCCESS; - pThis->enmPgMode = SUPR0GetPagingMode(); - switch (pThis->enmPgMode) - { - case SUPPAGINGMODE_32_BIT: - case SUPPAGINGMODE_32_BIT_GLOBAL: - pThis->fLegacyMode = false; - break; - case SUPPAGINGMODE_PAE: - case SUPPAGINGMODE_PAE_GLOBAL: - case SUPPAGINGMODE_PAE_NX: - case SUPPAGINGMODE_PAE_GLOBAL_NX: - case SUPPAGINGMODE_AMD64: - case SUPPAGINGMODE_AMD64_GLOBAL: - case SUPPAGINGMODE_AMD64_NX: - case SUPPAGINGMODE_AMD64_GLOBAL_NX: - pThis->fLegacyMode = false; - break; - default: - rc = VERR_INTERNAL_ERROR; - break; - } - if (RT_SUCCESS(rc)) - { - rc = RTSemFastMutexCreate(&pThis->hInitLock); - if (RT_SUCCESS(rc)) - { - rc = RTSpinlockCreate(&pThis->hSpinlock); - if (RT_SUCCESS(rc)) - { - pThis->u32Magic = PGMR0DYNMAP_MAGIC; - g_pPGMR0DynMap = pThis; - return VINF_SUCCESS; - } - RTSemFastMutexDestroy(pThis->hInitLock); - } - } - RTMemFree(pThis); - return rc; -} - - -/** - * Terminates the ring-0 dynamic mapping cache. - */ -VMMR0DECL(void) PGMR0DynMapTerm(void) -{ - /* - * Destroy the cache. - * - * There is not supposed to be any races here, the loader should - * make sure about that. So, don't bother locking anything. - * - * The VM objects should all be destroyed by now, so there is no - * dangling users or anything like that to clean up. This routine - * is just a mirror image of PGMR0DynMapInit. - */ - PPGMR0DYNMAP pThis = g_pPGMR0DynMap; - if (pThis) - { - AssertPtr(pThis); - g_pPGMR0DynMap = NULL; - - /* This should *never* happen, but in case it does try not to leak memory. */ - AssertLogRelMsg(!pThis->cUsers && !pThis->paPages && !pThis->pvSavedPTEs && !pThis->cPages, - ("cUsers=%d paPages=%p pvSavedPTEs=%p cPages=%#x\n", - pThis->cUsers, pThis->paPages, pThis->pvSavedPTEs, pThis->cPages)); - if (pThis->paPages) - pgmR0DynMapTearDown(pThis); - - /* Free the associated resources. */ - RTSemFastMutexDestroy(pThis->hInitLock); - pThis->hInitLock = NIL_RTSEMFASTMUTEX; - RTSpinlockDestroy(pThis->hSpinlock); - pThis->hSpinlock = NIL_RTSPINLOCK; - pThis->u32Magic = UINT32_MAX; - RTMemFree(pThis); - } -} - - -/** - * Initializes the dynamic mapping cache for a new VM. - * - * @returns VBox status code. - * @param pVM Pointer to the shared VM structure. - */ -VMMR0DECL(int) PGMR0DynMapInitVM(PVM pVM) -{ - AssertMsgReturn(!pVM->pgm.s.pvR0DynMapUsed, ("%p (pThis=%p)\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap), VERR_WRONG_ORDER); - - /* - * Initialize the auto sets. - */ - VMCPUID idCpu = pVM->cCpus; - AssertReturn(idCpu > 0 && idCpu <= VMM_MAX_CPU_COUNT, VERR_INTERNAL_ERROR); - while (idCpu-- > 0) - { - PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet; - uint32_t j = RT_ELEMENTS(pSet->aEntries); - while (j-- > 0) - { - pSet->aEntries[j].iPage = UINT16_MAX; - pSet->aEntries[j].cRefs = 0; - pSet->aEntries[j].pvPage = NULL; - pSet->aEntries[j].HCPhys = NIL_RTHCPHYS; - } - pSet->cEntries = PGMMAPSET_CLOSED; - pSet->iSubset = UINT32_MAX; - pSet->iCpu = -1; - memset(&pSet->aiHashTable[0], 0xff, sizeof(pSet->aiHashTable)); - } - - /* - * Do we need the cache? Skip the last bit if we don't. - */ - if (!VMMIsHwVirtExtForced(pVM)) - return VINF_SUCCESS; - - /* - * Reference and if necessary setup or expand the cache. - */ - PPGMR0DYNMAP pThis = g_pPGMR0DynMap; - AssertPtrReturn(pThis, VERR_INTERNAL_ERROR); - int rc = RTSemFastMutexRequest(pThis->hInitLock); - AssertLogRelRCReturn(rc, rc); - - pThis->cUsers++; - if (pThis->cUsers == 1) - { - rc = pgmR0DynMapSetup(pThis); -#if 0 /*def DEBUG*/ - if (RT_SUCCESS(rc)) - { - rc = pgmR0DynMapTest(pVM); - if (RT_FAILURE(rc)) - pgmR0DynMapTearDown(pThis); - } -#endif - } - else if (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(pThis->cPages - pThis->cGuardPages)) - rc = pgmR0DynMapExpand(pThis); - if (RT_SUCCESS(rc)) - pVM->pgm.s.pvR0DynMapUsed = pThis; - else - pThis->cUsers--; - - RTSemFastMutexRelease(pThis->hInitLock); - return rc; -} - - -/** - * Terminates the dynamic mapping cache usage for a VM. - * - * @param pVM Pointer to the shared VM structure. - */ -VMMR0DECL(void) PGMR0DynMapTermVM(PVM pVM) -{ - /* - * Return immediately if we're not using the cache. - */ - if (!pVM->pgm.s.pvR0DynMapUsed) - return; - - PPGMR0DYNMAP pThis = g_pPGMR0DynMap; - AssertPtrReturnVoid(pThis); - - int rc = RTSemFastMutexRequest(pThis->hInitLock); - AssertLogRelRCReturnVoid(rc); - - if (pVM->pgm.s.pvR0DynMapUsed == pThis) - { - pVM->pgm.s.pvR0DynMapUsed = NULL; - -#ifdef VBOX_STRICT - PGMR0DynMapAssertIntegrity(); -#endif - - /* - * Clean up and check the auto sets. - */ - VMCPUID idCpu = pVM->cCpus; - while (idCpu-- > 0) - { - PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet; - uint32_t j = pSet->cEntries; - if (j <= RT_ELEMENTS(pSet->aEntries)) - { - /* - * The set is open, close it. - */ - while (j-- > 0) - { - int32_t cRefs = pSet->aEntries[j].cRefs; - uint32_t iPage = pSet->aEntries[j].iPage; - LogRel(("PGMR0DynMapTermVM: %d dangling refs to %#x\n", cRefs, iPage)); - if (iPage < pThis->cPages && cRefs > 0) - pgmR0DynMapReleasePage(pThis, iPage, cRefs); - else - AssertLogRelMsgFailed(("cRefs=%d iPage=%#x cPages=%u\n", cRefs, iPage, pThis->cPages)); - - pSet->aEntries[j].iPage = UINT16_MAX; - pSet->aEntries[j].cRefs = 0; - pSet->aEntries[j].pvPage = NULL; - pSet->aEntries[j].HCPhys = NIL_RTHCPHYS; - } - pSet->cEntries = PGMMAPSET_CLOSED; - pSet->iSubset = UINT32_MAX; - pSet->iCpu = -1; - } - else - AssertMsg(j == PGMMAPSET_CLOSED, ("cEntries=%#x\n", j)); - - j = RT_ELEMENTS(pSet->aEntries); - while (j-- > 0) - { - Assert(pSet->aEntries[j].iPage == UINT16_MAX); - Assert(!pSet->aEntries[j].cRefs); - } - } - - /* - * Release our reference to the mapping cache. - */ - Assert(pThis->cUsers > 0); - pThis->cUsers--; - if (!pThis->cUsers) - pgmR0DynMapTearDown(pThis); - } - else - AssertLogRelMsgFailed(("pvR0DynMapUsed=%p pThis=%p\n", pVM->pgm.s.pvR0DynMapUsed, pThis)); - - RTSemFastMutexRelease(pThis->hInitLock); -} - - -/** - * Shoots down the TLBs for all the cache pages, pgmR0DynMapTearDown helper. - * - * @param idCpu The current CPU. - * @param pvUser1 The dynamic mapping cache instance. - * @param pvUser2 Unused, NULL. - */ -static DECLCALLBACK(void) pgmR0DynMapShootDownTlbs(RTCPUID idCpu, void *pvUser1, void *pvUser2) -{ - Assert(!pvUser2); - PPGMR0DYNMAP pThis = (PPGMR0DYNMAP)pvUser1; - Assert(pThis == g_pPGMR0DynMap); - PPGMR0DYNMAPENTRY paPages = pThis->paPages; - uint32_t iPage = pThis->cPages; - while (iPage-- > 0) - ASMInvalidatePage(paPages[iPage].pvPage); -} - - -/** - * Shoot down the TLBs for every single cache entry on all CPUs. - * - * @returns IPRT status code (RTMpOnAll). - * @param pThis The dynamic mapping cache instance. - */ -static int pgmR0DynMapTlbShootDown(PPGMR0DYNMAP pThis) -{ - int rc = RTMpOnAll(pgmR0DynMapShootDownTlbs, pThis, NULL); - AssertRC(rc); - if (RT_FAILURE(rc)) - { - uint32_t iPage = pThis->cPages; - while (iPage-- > 0) - ASMInvalidatePage(pThis->paPages[iPage].pvPage); - } - return rc; -} - - -/** - * Calculate the new cache size based on cMaxLoad statistics. - * - * @returns Number of pages. - * @param pThis The dynamic mapping cache instance. - * @param pcMinPages The minimal size in pages. - */ -static uint32_t pgmR0DynMapCalcNewSize(PPGMR0DYNMAP pThis, uint32_t *pcMinPages) -{ - Assert(pThis->cPages <= PGMR0DYNMAP_MAX_PAGES); - - /* cCpus * PGMR0DYNMAP_PAGES_PER_CPU(_MIN). */ - RTCPUID cCpus = RTMpGetCount(); - AssertReturn(cCpus > 0 && cCpus <= RTCPUSET_MAX_CPUS, 0); - uint32_t cPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU; - uint32_t cMinPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU_MIN; - - /* adjust against cMaxLoad. */ - AssertMsg(pThis->cMaxLoad <= PGMR0DYNMAP_MAX_PAGES, ("%#x\n", pThis->cMaxLoad)); - if (pThis->cMaxLoad > PGMR0DYNMAP_MAX_PAGES) - pThis->cMaxLoad = 0; - - while (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(cPages)) - cPages += PGMR0DYNMAP_PAGES_PER_CPU; - - if (pThis->cMaxLoad > cMinPages) - cMinPages = pThis->cMaxLoad; - - /* adjust against max and current size. */ - if (cPages < pThis->cPages) - cPages = pThis->cPages; - cPages *= PGMR0DYNMAP_GUARD_PAGES + 1; - if (cPages > PGMR0DYNMAP_MAX_PAGES) - cPages = PGMR0DYNMAP_MAX_PAGES; - - if (cMinPages < pThis->cPages) - cMinPages = pThis->cPages; - cMinPages *= PGMR0DYNMAP_GUARD_PAGES + 1; - if (cMinPages > PGMR0DYNMAP_MAX_PAGES) - cMinPages = PGMR0DYNMAP_MAX_PAGES; - - Assert(cMinPages); - *pcMinPages = cMinPages; - return cPages; -} - - -/** - * Initializes the paging level data. - * - * @param pThis The dynamic mapping cache instance. - * @param pPgLvl The paging level data. - */ -void pgmR0DynMapPagingArrayInit(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl) -{ - RTCCUINTREG cr4 = ASMGetCR4(); - switch (pThis->enmPgMode) - { - case SUPPAGINGMODE_32_BIT: - case SUPPAGINGMODE_32_BIT_GLOBAL: - pPgLvl->cLevels = 2; - pPgLvl->a[0].fPhysMask = X86_CR3_PAGE_MASK; - pPgLvl->a[0].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0); - pPgLvl->a[0].fResMask = X86_PDE_P | X86_PDE_RW; - pPgLvl->a[0].fPtrMask = X86_PD_MASK; - pPgLvl->a[0].fPtrShift = X86_PD_SHIFT; - - pPgLvl->a[1].fPhysMask = X86_PDE_PG_MASK; - pPgLvl->a[1].fAndMask = X86_PTE_P | X86_PTE_RW; - pPgLvl->a[1].fResMask = X86_PTE_P | X86_PTE_RW; - pPgLvl->a[1].fPtrMask = X86_PT_MASK; - pPgLvl->a[1].fPtrShift = X86_PT_SHIFT; - break; - - case SUPPAGINGMODE_PAE: - case SUPPAGINGMODE_PAE_GLOBAL: - case SUPPAGINGMODE_PAE_NX: - case SUPPAGINGMODE_PAE_GLOBAL_NX: - pPgLvl->cLevels = 3; - pPgLvl->a[0].fPhysMask = X86_CR3_PAE_PAGE_MASK; - pPgLvl->a[0].fPtrMask = X86_PDPT_MASK_PAE; - pPgLvl->a[0].fPtrShift = X86_PDPT_SHIFT; - pPgLvl->a[0].fAndMask = X86_PDPE_P; - pPgLvl->a[0].fResMask = X86_PDPE_P; - - pPgLvl->a[1].fPhysMask = X86_PDPE_PG_MASK; - pPgLvl->a[1].fPtrMask = X86_PD_PAE_MASK; - pPgLvl->a[1].fPtrShift = X86_PD_PAE_SHIFT; - pPgLvl->a[1].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0); - pPgLvl->a[1].fResMask = X86_PDE_P | X86_PDE_RW; - - pPgLvl->a[2].fPhysMask = X86_PDE_PAE_PG_MASK; - pPgLvl->a[2].fPtrMask = X86_PT_PAE_MASK; - pPgLvl->a[2].fPtrShift = X86_PT_PAE_SHIFT; - pPgLvl->a[2].fAndMask = X86_PTE_P | X86_PTE_RW; - pPgLvl->a[2].fResMask = X86_PTE_P | X86_PTE_RW; - break; - - case SUPPAGINGMODE_AMD64: - case SUPPAGINGMODE_AMD64_GLOBAL: - case SUPPAGINGMODE_AMD64_NX: - case SUPPAGINGMODE_AMD64_GLOBAL_NX: - pPgLvl->cLevels = 4; - pPgLvl->a[0].fPhysMask = X86_CR3_AMD64_PAGE_MASK; - pPgLvl->a[0].fPtrShift = X86_PML4_SHIFT; - pPgLvl->a[0].fPtrMask = X86_PML4_MASK; - pPgLvl->a[0].fAndMask = X86_PML4E_P | X86_PML4E_RW; - pPgLvl->a[0].fResMask = X86_PML4E_P | X86_PML4E_RW; - - pPgLvl->a[1].fPhysMask = X86_PML4E_PG_MASK; - pPgLvl->a[1].fPtrShift = X86_PDPT_SHIFT; - pPgLvl->a[1].fPtrMask = X86_PDPT_MASK_AMD64; - pPgLvl->a[1].fAndMask = X86_PDPE_P | X86_PDPE_RW /** @todo check for X86_PDPT_PS support. */; - pPgLvl->a[1].fResMask = X86_PDPE_P | X86_PDPE_RW; - - pPgLvl->a[2].fPhysMask = X86_PDPE_PG_MASK; - pPgLvl->a[2].fPtrShift = X86_PD_PAE_SHIFT; - pPgLvl->a[2].fPtrMask = X86_PD_PAE_MASK; - pPgLvl->a[2].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0); - pPgLvl->a[2].fResMask = X86_PDE_P | X86_PDE_RW; - - pPgLvl->a[3].fPhysMask = X86_PDE_PAE_PG_MASK; - pPgLvl->a[3].fPtrShift = X86_PT_PAE_SHIFT; - pPgLvl->a[3].fPtrMask = X86_PT_PAE_MASK; - pPgLvl->a[3].fAndMask = X86_PTE_P | X86_PTE_RW; - pPgLvl->a[3].fResMask = X86_PTE_P | X86_PTE_RW; - break; - - default: - AssertFailed(); - pPgLvl->cLevels = 0; - break; - } - - for (uint32_t i = 0; i < 4; i++) /* ASSUMING array size. */ - { - pPgLvl->a[i].HCPhys = NIL_RTHCPHYS; - pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ; - pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ; - pPgLvl->a[i].u.pv = NULL; - } -} - - -/** - * Maps a PTE. - * - * This will update the segment structure when new PTs are mapped. - * - * It also assumes that we (for paranoid reasons) wish to establish a mapping - * chain from CR3 to the PT that all corresponds to the processor we're - * currently running on, and go about this by running with interrupts disabled - * and restarting from CR3 for every change. - * - * @returns VBox status code, VINF_TRY_AGAIN if we changed any mappings and had - * to re-enable interrupts. - * @param pThis The dynamic mapping cache instance. - * @param pPgLvl The paging level structure. - * @param pvPage The page. - * @param pSeg The segment. - * @param cMaxPTs The max number of PTs expected in the segment. - * @param ppvPTE Where to store the PTE address. - */ -static int pgmR0DynMapPagingArrayMapPte(PPGMR0DYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl, void *pvPage, - PPGMR0DYNMAPSEG pSeg, uint32_t cMaxPTs, void **ppvPTE) -{ - Assert(!(ASMGetFlags() & X86_EFL_IF)); - void *pvEntry = NULL; - X86PGPAEUINT uEntry = ASMGetCR3(); - for (uint32_t i = 0; i < pPgLvl->cLevels; i++) - { - RTHCPHYS HCPhys = uEntry & pPgLvl->a[i].fPhysMask; - if (pPgLvl->a[i].HCPhys != HCPhys) - { - /* - * Need to remap this level. - * The final level, the PT, will not be freed since that is what it's all about. - */ - ASMIntEnable(); - if (i + 1 == pPgLvl->cLevels) - AssertReturn(pSeg->cPTs < cMaxPTs, VERR_INTERNAL_ERROR); - else - { - int rc2 = RTR0MemObjFree(pPgLvl->a[i].hMemObj, true /* fFreeMappings */); AssertRC(rc2); - pPgLvl->a[i].hMemObj = pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ; - } - - int rc = RTR0MemObjEnterPhys(&pPgLvl->a[i].hMemObj, HCPhys, PAGE_SIZE, RTMEM_CACHE_POLICY_DONT_CARE); - if (RT_SUCCESS(rc)) - { - rc = RTR0MemObjMapKernel(&pPgLvl->a[i].hMapObj, pPgLvl->a[i].hMemObj, - (void *)-1 /* pvFixed */, 0 /* cbAlignment */, - RTMEM_PROT_WRITE | RTMEM_PROT_READ); - if (RT_SUCCESS(rc)) - { - pPgLvl->a[i].u.pv = RTR0MemObjAddress(pPgLvl->a[i].hMapObj); - AssertMsg(((uintptr_t)pPgLvl->a[i].u.pv & ~(uintptr_t)PAGE_OFFSET_MASK), ("%p\n", pPgLvl->a[i].u.pv)); - pPgLvl->a[i].HCPhys = HCPhys; - if (i + 1 == pPgLvl->cLevels) - pSeg->ahMemObjPTs[pSeg->cPTs++] = pPgLvl->a[i].hMemObj; - ASMIntDisable(); - return VINF_TRY_AGAIN; - } - - pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ; - } - else - pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ; - pPgLvl->a[i].HCPhys = NIL_RTHCPHYS; - return rc; - } - - /* - * The next level. - */ - uint32_t iEntry = ((uint64_t)(uintptr_t)pvPage >> pPgLvl->a[i].fPtrShift) & pPgLvl->a[i].fPtrMask; - if (pThis->fLegacyMode) - { - pvEntry = &pPgLvl->a[i].u.paLegacy[iEntry]; - uEntry = pPgLvl->a[i].u.paLegacy[iEntry]; - } - else - { - pvEntry = &pPgLvl->a[i].u.paPae[iEntry]; - uEntry = pPgLvl->a[i].u.paPae[iEntry]; - } - - if ((uEntry & pPgLvl->a[i].fAndMask) != pPgLvl->a[i].fResMask) - { - LogRel(("PGMR0DynMap: internal error - iPgLvl=%u cLevels=%u uEntry=%#llx fAnd=%#llx fRes=%#llx got=%#llx\n" - "PGMR0DynMap: pv=%p pvPage=%p iEntry=%#x fLegacyMode=%RTbool\n", - i, pPgLvl->cLevels, uEntry, pPgLvl->a[i].fAndMask, pPgLvl->a[i].fResMask, uEntry & pPgLvl->a[i].fAndMask, - pPgLvl->a[i].u.pv, pvPage, iEntry, pThis->fLegacyMode)); - return VERR_INTERNAL_ERROR; - } - /*Log(("#%d: iEntry=%4d uEntry=%#llx pvEntry=%p HCPhys=%RHp \n", i, iEntry, uEntry, pvEntry, pPgLvl->a[i].HCPhys));*/ - } - - /* made it thru without needing to remap anything. */ - *ppvPTE = pvEntry; - return VINF_SUCCESS; -} - - -/** - * Sets up a guard page. - * - * @param pThis The dynamic mapping cache instance. - * @param pPage The page. - */ -DECLINLINE(void) pgmR0DynMapSetupGuardPage(PPGMR0DYNMAP pThis, PPGMR0DYNMAPENTRY pPage) -{ - memset(pPage->pvPage, 0xfd, PAGE_SIZE); - pPage->cRefs = PGMR0DYNMAP_GUARD_PAGE_REF_COUNT; - pPage->HCPhys = PGMR0DYNMAP_GUARD_PAGE_HCPHYS; -#ifdef PGMR0DYNMAP_GUARD_NP - ASMAtomicBitClear(pPage->uPte.pv, X86_PTE_BIT_P); -#else - if (pThis->fLegacyMode) - ASMAtomicWriteU32(&pPage->uPte.pLegacy->u, PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE); - else - ASMAtomicWriteU64(&pPage->uPte.pPae->u, PGMR0DYNMAP_GUARD_PAGE_PAE_PTE); -#endif - pThis->cGuardPages++; -} - - -/** - * Adds a new segment of the specified size. - * - * @returns VBox status code. - * @param pThis The dynamic mapping cache instance. - * @param cPages The size of the new segment, give as a page count. - */ -static int pgmR0DynMapAddSeg(PPGMR0DYNMAP pThis, uint32_t cPages) -{ - int rc2; - AssertReturn(ASMGetFlags() & X86_EFL_IF, VERR_PREEMPT_DISABLED); - - /* - * Do the array reallocations first. - * (The pages array has to be replaced behind the spinlock of course.) - */ - void *pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * (pThis->cPages + cPages)); - if (!pvSavedPTEs) - return VERR_NO_MEMORY; - pThis->pvSavedPTEs = pvSavedPTEs; - - void *pvPages = RTMemAllocZ(sizeof(pThis->paPages[0]) * (pThis->cPages + cPages)); - if (!pvPages) - { - pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * pThis->cPages); - if (pvSavedPTEs) - pThis->pvSavedPTEs = pvSavedPTEs; - return VERR_NO_MEMORY; - } - - RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER; - RTSpinlockAcquire(pThis->hSpinlock, &Tmp); - - memcpy(pvPages, pThis->paPages, sizeof(pThis->paPages[0]) * pThis->cPages); - void *pvToFree = pThis->paPages; - pThis->paPages = (PPGMR0DYNMAPENTRY)pvPages; - - RTSpinlockRelease(pThis->hSpinlock, &Tmp); - RTMemFree(pvToFree); - - /* - * Allocate the segment structure and pages of memory, then touch all the pages (paranoia). - */ - uint32_t cMaxPTs = cPages / (pThis->fLegacyMode ? X86_PG_ENTRIES : X86_PG_PAE_ENTRIES) + 2; - PPGMR0DYNMAPSEG pSeg = (PPGMR0DYNMAPSEG)RTMemAllocZ(RT_UOFFSETOF(PGMR0DYNMAPSEG, ahMemObjPTs[cMaxPTs])); - if (!pSeg) - return VERR_NO_MEMORY; - pSeg->pNext = NULL; - pSeg->cPages = cPages; - pSeg->iPage = pThis->cPages; - pSeg->cPTs = 0; - int rc = RTR0MemObjAllocPage(&pSeg->hMemObj, cPages << PAGE_SHIFT, false); - if (RT_SUCCESS(rc)) - { - uint8_t *pbPage = (uint8_t *)RTR0MemObjAddress(pSeg->hMemObj); - AssertMsg(VALID_PTR(pbPage) && !((uintptr_t)pbPage & PAGE_OFFSET_MASK), ("%p\n", pbPage)); - memset(pbPage, 0xfe, cPages << PAGE_SHIFT); - - /* - * Walk thru the pages and set them up with a mapping of their PTE and everything. - */ - ASMIntDisable(); - PGMR0DYNMAPPGLVL PgLvl; - pgmR0DynMapPagingArrayInit(pThis, &PgLvl); - uint32_t const iEndPage = pSeg->iPage + cPages; - for (uint32_t iPage = pSeg->iPage; - iPage < iEndPage; - iPage++, pbPage += PAGE_SIZE) - { - /* Initialize the page data. */ - pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS; - pThis->paPages[iPage].pvPage = pbPage; - pThis->paPages[iPage].cRefs = 0; - pThis->paPages[iPage].uPte.pPae = 0; - RTCpuSetFill(&pThis->paPages[iPage].PendingSet); - - /* Map its page table, retry until we've got a clean run (paranoia). */ - do - rc = pgmR0DynMapPagingArrayMapPte(pThis, &PgLvl, pbPage, pSeg, cMaxPTs, - &pThis->paPages[iPage].uPte.pv); - while (rc == VINF_TRY_AGAIN); - if (RT_FAILURE(rc)) - break; - - /* Save the PTE. */ - if (pThis->fLegacyMode) - ((PX86PGUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pLegacy->u; - else - ((PX86PGPAEUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pPae->u; - -#ifdef VBOX_STRICT - /* Check that we've got the right entry. */ - RTHCPHYS HCPhysPage = RTR0MemObjGetPagePhysAddr(pSeg->hMemObj, iPage - pSeg->iPage); - RTHCPHYS HCPhysPte = pThis->fLegacyMode - ? pThis->paPages[iPage].uPte.pLegacy->u & X86_PTE_PG_MASK - : pThis->paPages[iPage].uPte.pPae->u & X86_PTE_PAE_PG_MASK; - if (HCPhysPage != HCPhysPte) - { - LogRel(("pgmR0DynMapAddSeg: internal error - page #%u HCPhysPage=%RHp HCPhysPte=%RHp pbPage=%p pvPte=%p\n", - iPage - pSeg->iPage, HCPhysPage, HCPhysPte, pbPage, pThis->paPages[iPage].uPte.pv)); - rc = VERR_INTERNAL_ERROR; - break; - } -#endif - } /* for each page */ - ASMIntEnable(); - - /* cleanup non-PT mappings */ - for (uint32_t i = 0; i < PgLvl.cLevels - 1; i++) - RTR0MemObjFree(PgLvl.a[i].hMemObj, true /* fFreeMappings */); - - if (RT_SUCCESS(rc)) - { -#if PGMR0DYNMAP_GUARD_PAGES > 0 - /* - * Setup guard pages. - * (Note: TLBs will be shot down later on.) - */ - uint32_t iPage = pSeg->iPage; - while (iPage < iEndPage) - { - for (uint32_t iGPg = 0; iGPg < PGMR0DYNMAP_GUARD_PAGES && iPage < iEndPage; iGPg++, iPage++) - pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]); - iPage++; /* the guarded page */ - } - - /* Make sure the very last page is a guard page too. */ - iPage = iEndPage - 1; - if (pThis->paPages[iPage].cRefs != PGMR0DYNMAP_GUARD_PAGE_REF_COUNT) - pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]); -#endif /* PGMR0DYNMAP_GUARD_PAGES > 0 */ - - /* - * Commit it by adding the segment to the list and updating the page count. - */ - pSeg->pNext = pThis->pSegHead; - pThis->pSegHead = pSeg; - pThis->cPages += cPages; - return VINF_SUCCESS; - } - - /* - * Bail out. - */ - while (pSeg->cPTs-- > 0) - { - rc2 = RTR0MemObjFree(pSeg->ahMemObjPTs[pSeg->cPTs], true /* fFreeMappings */); - AssertRC(rc2); - pSeg->ahMemObjPTs[pSeg->cPTs] = NIL_RTR0MEMOBJ; - } - - rc2 = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */); - AssertRC(rc2); - pSeg->hMemObj = NIL_RTR0MEMOBJ; - } - RTMemFree(pSeg); - - /* Don't bother resizing the arrays, but free them if we're the only user. */ - if (!pThis->cPages) - { - RTMemFree(pThis->paPages); - pThis->paPages = NULL; - RTMemFree(pThis->pvSavedPTEs); - pThis->pvSavedPTEs = NULL; - } - return rc; -} - - -/** - * Called by PGMR0DynMapInitVM under the init lock. - * - * @returns VBox status code. - * @param pThis The dynamic mapping cache instance. - */ -static int pgmR0DynMapSetup(PPGMR0DYNMAP pThis) -{ - /* - * Calc the size and add a segment of that size. - */ - uint32_t cMinPages; - uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages); - AssertReturn(cPages, VERR_INTERNAL_ERROR); - int rc = pgmR0DynMapAddSeg(pThis, cPages); - if (rc == VERR_NO_MEMORY) - { - /* - * Try adding smaller segments. - */ - do - rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES); - while (RT_SUCCESS(rc) && pThis->cPages < cPages); - if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages) - rc = VINF_SUCCESS; - if (rc == VERR_NO_MEMORY) - { - if (pThis->cPages) - pgmR0DynMapTearDown(pThis); - rc = VERR_PGM_DYNMAP_SETUP_ERROR; - } - } - Assert(ASMGetFlags() & X86_EFL_IF); - -#if PGMR0DYNMAP_GUARD_PAGES > 0 - /* paranoia */ - if (RT_SUCCESS(rc)) - pgmR0DynMapTlbShootDown(pThis); -#endif - return rc; -} - - -/** - * Called by PGMR0DynMapInitVM under the init lock. - * - * @returns VBox status code. - * @param pThis The dynamic mapping cache instance. - */ -static int pgmR0DynMapExpand(PPGMR0DYNMAP pThis) -{ - /* - * Calc the new target size and add a segment of the appropriate size. - */ - uint32_t cMinPages; - uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages); - AssertReturn(cPages, VERR_INTERNAL_ERROR); - if (pThis->cPages >= cPages) - return VINF_SUCCESS; - - uint32_t cAdd = cPages - pThis->cPages; - int rc = pgmR0DynMapAddSeg(pThis, cAdd); - if (rc == VERR_NO_MEMORY) - { - /* - * Try adding smaller segments. - */ - do - rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES); - while (RT_SUCCESS(rc) && pThis->cPages < cPages); - if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages) - rc = VINF_SUCCESS; - if (rc == VERR_NO_MEMORY) - rc = VERR_PGM_DYNMAP_EXPAND_ERROR; - } - Assert(ASMGetFlags() & X86_EFL_IF); - -#if PGMR0DYNMAP_GUARD_PAGES > 0 - /* paranoia */ - if (RT_SUCCESS(rc)) - pgmR0DynMapTlbShootDown(pThis); -#endif - return rc; -} - - -/** - * Called by PGMR0DynMapTermVM under the init lock. - * - * @returns VBox status code. - * @param pThis The dynamic mapping cache instance. - */ -static void pgmR0DynMapTearDown(PPGMR0DYNMAP pThis) -{ - /* - * Restore the original page table entries - */ - PPGMR0DYNMAPENTRY paPages = pThis->paPages; - uint32_t iPage = pThis->cPages; - if (pThis->fLegacyMode) - { - X86PGUINT const *paSavedPTEs = (X86PGUINT const *)pThis->pvSavedPTEs; - while (iPage-- > 0) - { - X86PGUINT uOld = paPages[iPage].uPte.pLegacy->u; - X86PGUINT uOld2 = uOld; NOREF(uOld2); - X86PGUINT uNew = paSavedPTEs[iPage]; - while (!ASMAtomicCmpXchgExU32(&paPages[iPage].uPte.pLegacy->u, uNew, uOld, &uOld)) - AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew)); - Assert(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage]); - } - } - else - { - X86PGPAEUINT const *paSavedPTEs = (X86PGPAEUINT const *)pThis->pvSavedPTEs; - while (iPage-- > 0) - { - X86PGPAEUINT uOld = paPages[iPage].uPte.pPae->u; - X86PGPAEUINT uOld2 = uOld; NOREF(uOld2); - X86PGPAEUINT uNew = paSavedPTEs[iPage]; - while (!ASMAtomicCmpXchgExU64(&paPages[iPage].uPte.pPae->u, uNew, uOld, &uOld)) - AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew)); - Assert(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage]); - } - } - - /* - * Shoot down the TLBs on all CPUs before freeing them. - */ - pgmR0DynMapTlbShootDown(pThis); - - /* - * Free the segments. - */ - while (pThis->pSegHead) - { - int rc; - PPGMR0DYNMAPSEG pSeg = pThis->pSegHead; - pThis->pSegHead = pSeg->pNext; - - uint32_t iPT = pSeg->cPTs; - while (iPT-- > 0) - { - rc = RTR0MemObjFree(pSeg->ahMemObjPTs[iPT], true /* fFreeMappings */); AssertRC(rc); - pSeg->ahMemObjPTs[iPT] = NIL_RTR0MEMOBJ; - } - rc = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */); AssertRC(rc); - pSeg->hMemObj = NIL_RTR0MEMOBJ; - pSeg->pNext = NULL; - pSeg->iPage = UINT16_MAX; - pSeg->cPages = 0; - pSeg->cPTs = 0; - RTMemFree(pSeg); - } - - /* - * Free the arrays and restore the initial state. - * The cLoadMax value is left behind for the next setup. - */ - RTMemFree(pThis->paPages); - pThis->paPages = NULL; - RTMemFree(pThis->pvSavedPTEs); - pThis->pvSavedPTEs = NULL; - pThis->cPages = 0; - pThis->cLoad = 0; - pThis->cGuardPages = 0; -} - - -/** - * Release references to a page, caller owns the spin lock. - * - * @param pThis The dynamic mapping cache instance. - * @param iPage The page. - * @param cRefs The number of references to release. - */ -DECLINLINE(void) pgmR0DynMapReleasePageLocked(PPGMR0DYNMAP pThis, uint32_t iPage, int32_t cRefs) -{ - cRefs = ASMAtomicSubS32(&pThis->paPages[iPage].cRefs, cRefs) - cRefs; - AssertMsg(cRefs >= 0, ("%d\n", cRefs)); - if (!cRefs) - pThis->cLoad--; -} - - -/** - * Release references to a page, caller does not own the spin lock. - * - * @param pThis The dynamic mapping cache instance. - * @param iPage The page. - * @param cRefs The number of references to release. - */ -static void pgmR0DynMapReleasePage(PPGMR0DYNMAP pThis, uint32_t iPage, uint32_t cRefs) -{ - RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER; - RTSpinlockAcquire(pThis->hSpinlock, &Tmp); - pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs); - RTSpinlockRelease(pThis->hSpinlock, &Tmp); -} - - -/** - * pgmR0DynMapPage worker that deals with the tedious bits. - * - * @returns The page index on success, UINT32_MAX on failure. - * @param pThis The dynamic mapping cache instance. - * @param HCPhys The address of the page to be mapped. - * @param iPage The page index pgmR0DynMapPage hashed HCPhys to. - * @param pVM The shared VM structure, for statistics only. - */ -static uint32_t pgmR0DynMapPageSlow(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, uint32_t iPage, PVM pVM) -{ -#ifdef VBOX_WITH_STATISTICS - PVMCPU pVCpu = VMMGetCpu(pVM); -#endif - STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageSlow); - - /* - * Check if any of the first 3 pages are unreferenced since the caller - * already has made sure they aren't matching. - */ -#ifdef VBOX_WITH_STATISTICS - bool fLooped = false; -#endif - uint32_t const cPages = pThis->cPages; - PPGMR0DYNMAPENTRY paPages = pThis->paPages; - uint32_t iFreePage; - if (!paPages[iPage].cRefs) - iFreePage = iPage; - else if (!paPages[(iPage + 1) % cPages].cRefs) - iFreePage = (iPage + 1) % cPages; - else if (!paPages[(iPage + 2) % cPages].cRefs) - iFreePage = (iPage + 2) % cPages; - else - { - /* - * Search for an unused or matching entry. - */ - iFreePage = (iPage + 3) % cPages; - for (;;) - { - if (paPages[iFreePage].HCPhys == HCPhys) - { - STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageSlowLoopHits); - return iFreePage; - } - if (!paPages[iFreePage].cRefs) - break; - - /* advance */ - iFreePage = (iFreePage + 1) % cPages; - if (RT_UNLIKELY(iFreePage == iPage)) - return UINT32_MAX; - } - STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageSlowLoopMisses); -#ifdef VBOX_WITH_STATISTICS - fLooped = true; -#endif - } - Assert(iFreePage < cPages); - -#if 0 //def VBOX_WITH_STATISTICS - /* Check for lost hits. */ - if (!fLooped) - for (uint32_t iPage2 = (iPage + 3) % cPages; iPage2 != iPage; iPage2 = (iPage2 + 1) % cPages) - if (paPages[iPage2].HCPhys == HCPhys) - STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageSlowLostHits); -#endif - - /* - * Setup the new entry. - */ - /*Log6(("pgmR0DynMapPageSlow: old - %RHp %#x %#llx\n", paPages[iFreePage].HCPhys, paPages[iFreePage].cRefs, paPages[iFreePage].uPte.pPae->u));*/ - paPages[iFreePage].HCPhys = HCPhys; - RTCpuSetFill(&paPages[iFreePage].PendingSet); - if (pThis->fLegacyMode) - { - X86PGUINT uOld = paPages[iFreePage].uPte.pLegacy->u; - X86PGUINT uOld2 = uOld; NOREF(uOld2); - X86PGUINT uNew = (uOld & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)) - | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D - | (HCPhys & X86_PTE_PG_MASK); - while (!ASMAtomicCmpXchgExU32(&paPages[iFreePage].uPte.pLegacy->u, uNew, uOld, &uOld)) - AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew)); - Assert(paPages[iFreePage].uPte.pLegacy->u == uNew); - } - else - { - X86PGPAEUINT uOld = paPages[iFreePage].uPte.pPae->u; - X86PGPAEUINT uOld2 = uOld; NOREF(uOld2); - X86PGPAEUINT uNew = (uOld & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)) - | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D - | (HCPhys & X86_PTE_PAE_PG_MASK); - while (!ASMAtomicCmpXchgExU64(&paPages[iFreePage].uPte.pPae->u, uNew, uOld, &uOld)) - AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew)); - Assert(paPages[iFreePage].uPte.pPae->u == uNew); - /*Log6(("pgmR0DynMapPageSlow: #%x - %RHp %p %#llx\n", iFreePage, HCPhys, paPages[iFreePage].pvPage, uNew));*/ - } - return iFreePage; -} - - -/** - * Maps a page into the pool. - * - * @returns Page index on success, UINT32_MAX on failure. - * @param pThis The dynamic mapping cache instance. - * @param HCPhys The address of the page to be mapped. - * @param iRealCpu The real cpu set index. (optimization) - * @param pVM The shared VM structure, for statistics only. - * @param ppvPage Where to the page address. - */ -DECLINLINE(uint32_t) pgmR0DynMapPage(PPGMR0DYNMAP pThis, RTHCPHYS HCPhys, int32_t iRealCpu, PVM pVM, void **ppvPage) -{ -#ifdef VBOX_WITH_STATISTICS - PVMCPU pVCpu = VMMGetCpu(pVM); -#endif - RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER; - RTSpinlockAcquire(pThis->hSpinlock, &Tmp); - AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys)); - STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPage); - - /* - * Find an entry, if possible a matching one. The HCPhys address is hashed - * down to a page index, collisions are handled by linear searching. - * Optimized for a hit in the first 3 pages. - * - * Field easy hits here and defer the tedious searching and inserting - * to pgmR0DynMapPageSlow(). - */ - uint32_t const cPages = pThis->cPages; - uint32_t iPage = (HCPhys >> PAGE_SHIFT) % cPages; - PPGMR0DYNMAPENTRY paPages = pThis->paPages; - if (RT_LIKELY(paPages[iPage].HCPhys == HCPhys)) - STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageHits0); - else - { - uint32_t iPage2 = (iPage + 1) % cPages; - if (RT_LIKELY(paPages[iPage2].HCPhys == HCPhys)) - { - iPage = iPage2; - STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageHits1); - } - else - { - iPage2 = (iPage + 2) % cPages; - if (paPages[iPage2].HCPhys == HCPhys) - { - iPage = iPage2; - STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageHits2); - } - else - { - iPage = pgmR0DynMapPageSlow(pThis, HCPhys, iPage, pVM); - if (RT_UNLIKELY(iPage == UINT32_MAX)) - { - RTSpinlockRelease(pThis->hSpinlock, &Tmp); - *ppvPage = NULL; - return iPage; - } - } - } - } - - /* - * Reference it, update statistics and get the return address. - */ - int32_t cRefs = ASMAtomicIncS32(&paPages[iPage].cRefs); - if (cRefs == 1) - { - pThis->cLoad++; - if (pThis->cLoad > pThis->cMaxLoad) - pThis->cMaxLoad = pThis->cLoad; - AssertMsg(pThis->cLoad <= pThis->cPages - pThis->cGuardPages, ("%d/%d\n", pThis->cLoad, pThis->cPages - pThis->cGuardPages)); - } - else if (RT_UNLIKELY(cRefs <= 0)) - { - ASMAtomicDecS32(&paPages[iPage].cRefs); - RTSpinlockRelease(pThis->hSpinlock, &Tmp); - *ppvPage = NULL; - AssertLogRelMsgFailedReturn(("cRefs=%d iPage=%p HCPhys=%RHp\n", cRefs, iPage, HCPhys), UINT32_MAX); - } - void *pvPage = paPages[iPage].pvPage; - - /* - * Invalidate the entry? - */ - bool fInvalidateIt = RTCpuSetIsMemberByIndex(&paPages[iPage].PendingSet, iRealCpu); - if (RT_UNLIKELY(fInvalidateIt)) - RTCpuSetDelByIndex(&paPages[iPage].PendingSet, iRealCpu); - - RTSpinlockRelease(pThis->hSpinlock, &Tmp); - - /* - * Do the actual invalidation outside the spinlock. - */ - if (RT_UNLIKELY(fInvalidateIt)) - { - STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapPageInvlPg); - ASMInvalidatePage(pvPage); - } - - *ppvPage = pvPage; - return iPage; -} - - -/** - * Assert the the integrity of the pool. - * - * @returns VBox status code. - */ -VMMR0DECL(int) PGMR0DynMapAssertIntegrity(void) -{ - /* - * Basic pool stuff that doesn't require any lock, just assumes we're a user. - */ - PPGMR0DYNMAP pThis = g_pPGMR0DynMap; - if (!pThis) - return VINF_SUCCESS; - AssertPtrReturn(pThis, VERR_INVALID_POINTER); - AssertReturn(pThis->u32Magic == PGMR0DYNMAP_MAGIC, VERR_INVALID_MAGIC); - if (!pThis->cUsers) - return VERR_INVALID_PARAMETER; - - - int rc = VINF_SUCCESS; - RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER; - RTSpinlockAcquire(pThis->hSpinlock, &Tmp); - -#define CHECK_RET(expr, a) \ - do { \ - if (RT_UNLIKELY(!(expr))) \ - { \ - RTSpinlockRelease(pThis->hSpinlock, &Tmp); \ - RTAssertMsg1Weak(#expr, __LINE__, __FILE__, __PRETTY_FUNCTION__); \ - RTAssertMsg2Weak a; \ - return VERR_INTERNAL_ERROR; \ - } \ - } while (0) - - /* - * Check that the PTEs are correct. - */ - uint32_t cGuard = 0; - uint32_t cLoad = 0; - PPGMR0DYNMAPENTRY paPages = pThis->paPages; - uint32_t iPage = pThis->cPages; - if (pThis->fLegacyMode) - { - PCX86PGUINT paSavedPTEs = (PCX86PGUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs); - while (iPage-- > 0) - { - CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage)); - if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT - && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS) - { -#ifdef PGMR0DYNMAP_GUARD_NP - CHECK_RET(paPages[iPage].uPte.pLegacy->u == (paSavedPTEs[iPage] & ~(X86PGUINT)X86_PTE_P), - ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage])); -#else - CHECK_RET(paPages[iPage].uPte.pLegacy->u == PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE, - ("#%u: %#x", iPage, paPages[iPage].uPte.pLegacy->u)); -#endif - cGuard++; - } - else if (paPages[iPage].HCPhys != NIL_RTHCPHYS) - { - CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys)); - X86PGUINT uPte = (paSavedPTEs[iPage] & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)) - | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D - | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK); - CHECK_RET(paPages[iPage].uPte.pLegacy->u == uPte, - ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, uPte)); - if (paPages[iPage].cRefs) - cLoad++; - } - else - CHECK_RET(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage], - ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage])); - } - } - else - { - PCX86PGPAEUINT paSavedPTEs = (PCX86PGPAEUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs); - while (iPage-- > 0) - { - CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage)); - if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT - && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS) - { -#ifdef PGMR0DYNMAP_GUARD_NP - CHECK_RET(paPages[iPage].uPte.pPae->u == (paSavedPTEs[iPage] & ~(X86PGPAEUINT)X86_PTE_P), - ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage])); -#else - CHECK_RET(paPages[iPage].uPte.pPae->u == PGMR0DYNMAP_GUARD_PAGE_PAE_PTE, - ("#%u: %#llx", iPage, paPages[iPage].uPte.pPae->u)); -#endif - cGuard++; - } - else if (paPages[iPage].HCPhys != NIL_RTHCPHYS) - { - CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys)); - X86PGPAEUINT uPte = (paSavedPTEs[iPage] & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT)) - | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D - | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK); - CHECK_RET(paPages[iPage].uPte.pPae->u == uPte, - ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pLegacy->u, uPte)); - if (paPages[iPage].cRefs) - cLoad++; - } - else - CHECK_RET(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage], - ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage])); - } - } - - CHECK_RET(cLoad == pThis->cLoad, ("%u %u\n", cLoad, pThis->cLoad)); - CHECK_RET(cGuard == pThis->cGuardPages, ("%u %u\n", cGuard, pThis->cGuardPages)); - -#undef CHECK_RET - RTSpinlockRelease(pThis->hSpinlock, &Tmp); - return VINF_SUCCESS; -} - - -/** - * Signals the start of a new set of mappings. - * - * Mostly for strictness. PGMDynMapHCPage won't work unless this - * API is called. - * - * @param pVCpu The shared data for the current virtual CPU. - */ -VMMDECL(void) PGMDynMapStartAutoSet(PVMCPU pVCpu) -{ - Assert(pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED); - Assert(pVCpu->pgm.s.AutoSet.iSubset == UINT32_MAX); - pVCpu->pgm.s.AutoSet.cEntries = 0; - pVCpu->pgm.s.AutoSet.iCpu = RTMpCpuIdToSetIndex(RTMpCpuId()); -} - - -/** - * Starts or migrates the autoset of a virtual CPU. - * - * This is used by HWACCMR0Enter. When we've longjumped out of the HWACCM - * execution loop with the set open, we'll migrate it when re-entering. While - * under normal circumstances, we'll start it so VMXR0LoadGuestState can access - * guest memory. - * - * @returns @c true if started, @c false if migrated. - * @param pVCpu The shared data for the current virtual CPU. - * @thread EMT - */ -VMMDECL(bool) PGMDynMapStartOrMigrateAutoSet(PVMCPU pVCpu) -{ - bool fStartIt = pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED; - if (fStartIt) - PGMDynMapStartAutoSet(pVCpu); - else - PGMDynMapMigrateAutoSet(pVCpu); - return fStartIt; -} - - -/** - * Worker that performs the actual flushing of the set. - * - * @param pSet The set to flush. - * @param cEntries The number of entries. - */ -DECLINLINE(void) pgmDynMapFlushAutoSetWorker(PPGMMAPSET pSet, uint32_t cEntries) -{ - /* - * Release any pages it's referencing. - */ - if ( cEntries != 0 - && RT_LIKELY(cEntries <= RT_ELEMENTS(pSet->aEntries))) - { - PPGMR0DYNMAP pThis = g_pPGMR0DynMap; - RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER; - RTSpinlockAcquire(pThis->hSpinlock, &Tmp); - - uint32_t i = cEntries; - while (i-- > 0) - { - uint32_t iPage = pSet->aEntries[i].iPage; - Assert(iPage < pThis->cPages); - int32_t cRefs = pSet->aEntries[i].cRefs; - Assert(cRefs > 0); - pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs); - - pSet->aEntries[i].iPage = UINT16_MAX; - pSet->aEntries[i].cRefs = 0; - } - - Assert(pThis->cLoad <= pThis->cPages - pThis->cGuardPages); - RTSpinlockRelease(pThis->hSpinlock, &Tmp); - } -} - - -/** - * Releases the dynamic memory mappings made by PGMDynMapHCPage and associates - * since the PGMDynMapStartAutoSet call. - * - * @param pVCpu The shared data for the current virtual CPU. - */ -VMMDECL(void) PGMDynMapReleaseAutoSet(PVMCPU pVCpu) -{ - PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet; - - /* - * Close and flush the set. - */ - uint32_t cEntries = pSet->cEntries; - AssertReturnVoid(cEntries != PGMMAPSET_CLOSED); - pSet->cEntries = PGMMAPSET_CLOSED; - pSet->iSubset = UINT32_MAX; - pSet->iCpu = -1; - - STAM_COUNTER_INC(&pVCpu->pgm.s.aStatR0DynMapSetSize[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]); - AssertMsg(cEntries < PGMMAPSET_MAX_FILL, ("%u\n", cEntries)); - if (cEntries > RT_ELEMENTS(pSet->aEntries) * 50 / 100) - Log(("PGMDynMapReleaseAutoSet: cEntries=%d\n", pSet->cEntries)); - - pgmDynMapFlushAutoSetWorker(pSet, cEntries); -} - - -/** - * Flushes the set if it's above a certain threshold. - * - * @param pVCpu The shared data for the current virtual CPU. - */ -VMMDECL(void) PGMDynMapFlushAutoSet(PVMCPU pVCpu) -{ - PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet; - AssertMsg(pSet->iCpu == RTMpCpuIdToSetIndex(RTMpCpuId()), ("%d %d(%d) efl=%#x\n", pSet->iCpu, RTMpCpuIdToSetIndex(RTMpCpuId()), RTMpCpuId(), ASMGetFlags())); - - /* - * Only flush it if it's 45% full. - */ - uint32_t cEntries = pSet->cEntries; - AssertReturnVoid(cEntries != PGMMAPSET_CLOSED); - STAM_COUNTER_INC(&pVCpu->pgm.s.aStatR0DynMapSetSize[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]); - if (cEntries >= RT_ELEMENTS(pSet->aEntries) * 45 / 100) - { - pSet->cEntries = 0; - - AssertMsg(cEntries < PGMMAPSET_MAX_FILL, ("%u\n", cEntries)); - Log(("PGMDynMapFlushAutoSet: cEntries=%d\n", pSet->cEntries)); - - pgmDynMapFlushAutoSetWorker(pSet, cEntries); - AssertMsg(pSet->iCpu == RTMpCpuIdToSetIndex(RTMpCpuId()), ("%d %d(%d) efl=%#x\n", pSet->iCpu, RTMpCpuIdToSetIndex(RTMpCpuId()), RTMpCpuId(), ASMGetFlags())); - } -} - - -/** - * Migrates the automatic mapping set of the current vCPU if it's active and - * necessary. - * - * This is called when re-entering the hardware assisted execution mode after a - * nip down to ring-3. We run the risk that the CPU might have change and we - * will therefore make sure all the cache entries currently in the auto set will - * be valid on the new CPU. If the cpu didn't change nothing will happen as all - * the entries will have been flagged as invalidated. - * - * @param pVCpu The shared data for the current virtual CPU. - * @thread EMT - */ -VMMDECL(void) PGMDynMapMigrateAutoSet(PVMCPU pVCpu) -{ - PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet; - int32_t iRealCpu = RTMpCpuIdToSetIndex(RTMpCpuId()); - if (pSet->iCpu != iRealCpu) - { - uint32_t i = pSet->cEntries; - if (i != PGMMAPSET_CLOSED) - { - AssertMsg(i <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", i, i)); - if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pSet->aEntries))) - { - PPGMR0DYNMAP pThis = g_pPGMR0DynMap; - RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER; - RTSpinlockAcquire(pThis->hSpinlock, &Tmp); - - while (i-- > 0) - { - Assert(pSet->aEntries[i].cRefs > 0); - uint32_t iPage = pSet->aEntries[i].iPage; - Assert(iPage < pThis->cPages); - if (RTCpuSetIsMemberByIndex(&pThis->paPages[iPage].PendingSet, iRealCpu)) - { - RTCpuSetDelByIndex(&pThis->paPages[iPage].PendingSet, iRealCpu); - RTSpinlockRelease(pThis->hSpinlock, &Tmp); - - ASMInvalidatePage(pThis->paPages[iPage].pvPage); - STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapMigrateInvlPg); - - RTSpinlockAcquire(pThis->hSpinlock, &Tmp); - } - } - - RTSpinlockRelease(pThis->hSpinlock, &Tmp); - } - } - pSet->iCpu = iRealCpu; - } -} - - -/** - * Worker function that flushes the current subset. - * - * This is called when the set is popped or when the set - * hash a too high load. As also pointed out elsewhere, the - * whole subset thing is a hack for working around code that - * accesses too many pages. Like PGMPool. - * - * @param pSet The set which subset to flush. - */ -static void pgmDynMapFlushSubset(PPGMMAPSET pSet) -{ - uint32_t iSubset = pSet->iSubset; - uint32_t i = pSet->cEntries; - Assert(i <= RT_ELEMENTS(pSet->aEntries)); - if ( i > iSubset - && i <= RT_ELEMENTS(pSet->aEntries)) - { - Log(("pgmDynMapFlushSubset: cEntries=%d iSubset=%d\n", pSet->cEntries, iSubset)); - pSet->cEntries = iSubset; - - PPGMR0DYNMAP pThis = g_pPGMR0DynMap; - RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER; - RTSpinlockAcquire(pThis->hSpinlock, &Tmp); - - while (i-- > iSubset) - { - uint32_t iPage = pSet->aEntries[i].iPage; - Assert(iPage < pThis->cPages); - int32_t cRefs = pSet->aEntries[i].cRefs; - Assert(cRefs > 0); - pgmR0DynMapReleasePageLocked(pThis, iPage, cRefs); - - pSet->aEntries[i].iPage = UINT16_MAX; - pSet->aEntries[i].cRefs = 0; - } - - RTSpinlockRelease(pThis->hSpinlock, &Tmp); - } -} - - -/** - * Creates a subset. - * - * A subset is a hack to avoid having to rewrite code that touches a lot of - * pages. It prevents the mapping set from being overflowed by automatically - * flushing previous mappings when a certain threshold is reached. - * - * Pages mapped after calling this function are only valid until the next page - * is mapped. - * - * @returns The index of the previous subset. Pass this to - * PGMDynMapPopAutoSubset when poping it. - * @param pVCpu Pointer to the virtual cpu data. - */ -VMMDECL(uint32_t) PGMDynMapPushAutoSubset(PVMCPU pVCpu) -{ - PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet; - AssertReturn(pSet->cEntries != PGMMAPSET_CLOSED, UINT32_MAX); - uint32_t iPrevSubset = pSet->iSubset; - LogFlow(("PGMDynMapPushAutoSubset: pVCpu=%p iPrevSubset=%u\n", pVCpu, iPrevSubset)); - - pSet->iSubset = pSet->cEntries; - STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapSubsets); - return iPrevSubset; -} - - -/** - * Pops a subset created by a previous call to PGMDynMapPushAutoSubset. - * - * @param pVCpu Pointer to the virtual cpu data. - * @param iPrevSubset What PGMDynMapPushAutoSubset returned. - */ -VMMDECL(void) PGMDynMapPopAutoSubset(PVMCPU pVCpu, uint32_t iPrevSubset) -{ - PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet; - uint32_t cEntries = pSet->cEntries; - LogFlow(("PGMDynMapPopAutoSubset: pVCpu=%p iPrevSubset=%u iSubset=%u cEntries=%u\n", pVCpu, iPrevSubset, pSet->iSubset, cEntries)); - AssertReturnVoid(cEntries != PGMMAPSET_CLOSED); - AssertReturnVoid(pSet->iSubset >= iPrevSubset || iPrevSubset == UINT32_MAX); - STAM_COUNTER_INC(&pVCpu->pgm.s.aStatR0DynMapSetSize[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]); - if ( cEntries >= RT_ELEMENTS(pSet->aEntries) * 40 / 100 - && cEntries != pSet->iSubset) - { - AssertMsg(cEntries < PGMMAPSET_MAX_FILL, ("%u\n", cEntries)); - pgmDynMapFlushSubset(pSet); - } - pSet->iSubset = iPrevSubset; -} - - -/** - * As a final resort for a full auto set, try merge duplicate entries. - * - * @param pSet The set. - */ -static void pgmDynMapOptimizeAutoSet(PPGMMAPSET pSet) -{ - for (uint32_t i = 0 ; i < pSet->cEntries; i++) - { - uint16_t const iPage = pSet->aEntries[i].iPage; - uint32_t j = i + 1; - while (j < pSet->cEntries) - { - if (pSet->aEntries[j].iPage != iPage) - j++; - else if ((uint32_t)pSet->aEntries[i].cRefs + (uint32_t)pSet->aEntries[j].cRefs < UINT16_MAX) - { - /* merge j into i removing j. */ - pSet->aEntries[i].cRefs += pSet->aEntries[j].cRefs; - pSet->cEntries--; - if (j < pSet->cEntries) - { - pSet->aEntries[j] = pSet->aEntries[pSet->cEntries]; - pSet->aEntries[pSet->cEntries].iPage = UINT16_MAX; - pSet->aEntries[pSet->cEntries].cRefs = 0; - } - else - { - pSet->aEntries[j].iPage = UINT16_MAX; - pSet->aEntries[j].cRefs = 0; - } - } - else - { - /* migrate the max number of refs from j into i and quit the inner loop. */ - uint32_t cMigrate = UINT16_MAX - 1 - pSet->aEntries[i].cRefs; - Assert(pSet->aEntries[j].cRefs > cMigrate); - pSet->aEntries[j].cRefs -= cMigrate; - pSet->aEntries[i].cRefs = UINT16_MAX - 1; - break; - } - } - } -} - - -/** - * Common worker code for PGMDynMapHCPhys, pgmR0DynMapHCPageInlined and - * pgmR0DynMapGCPageInlined. - * - * @returns VINF_SUCCESS, bails out to ring-3 on failure. - * @param pVM The shared VM structure (for statistics). - * @param pSet The set. - * @param HCPhys The physical address of the page. - * @param ppv Where to store the address of the mapping on success. - * - * @remarks This is a very hot path. - */ -int pgmR0DynMapHCPageCommon(PVM pVM, PPGMMAPSET pSet, RTHCPHYS HCPhys, void **ppv) -{ - LogFlow(("pgmR0DynMapHCPageCommon: pVM=%p pSet=%p HCPhys=%RHp ppv=%p\n", - pVM, pSet, HCPhys, ppv)); -#ifdef VBOX_WITH_STATISTICS - PVMCPU pVCpu = VMMGetCpu(pVM); -#endif - AssertMsg(pSet->iCpu == RTMpCpuIdToSetIndex(RTMpCpuId()), ("%d %d(%d) efl=%#x\n", pSet->iCpu, RTMpCpuIdToSetIndex(RTMpCpuId()), RTMpCpuId(), ASMGetFlags())); - - /* - * Map it. - */ - void *pvPage; - uint32_t const iPage = pgmR0DynMapPage(g_pPGMR0DynMap, HCPhys, pSet->iCpu, pVM, &pvPage); - if (RT_UNLIKELY(iPage == UINT32_MAX)) - { - RTAssertMsg2Weak("PGMDynMapHCPage: cLoad=%u/%u cPages=%u cGuardPages=%u\n", - g_pPGMR0DynMap->cLoad, g_pPGMR0DynMap->cMaxLoad, g_pPGMR0DynMap->cPages, g_pPGMR0DynMap->cGuardPages); - if (!g_fPGMR0DynMapTestRunning) - VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_VM_R0_ASSERTION, 0); - *ppv = NULL; - return VERR_PGM_DYNMAP_FAILED; - } - - /* - * Add the page to the auto reference set. - * - * The typical usage pattern means that the same pages will be mapped - * several times in the same set. We can catch most of these - * remappings by looking a few pages back into the set. (The searching - * and set optimizing path will hardly ever be used when doing this.) - */ - AssertCompile(RT_ELEMENTS(pSet->aEntries) >= 8); - int32_t i = pSet->cEntries; - if (i-- < 5) - { - unsigned iEntry = pSet->cEntries++; - pSet->aEntries[iEntry].cRefs = 1; - pSet->aEntries[iEntry].iPage = iPage; - pSet->aEntries[iEntry].pvPage = pvPage; - pSet->aEntries[iEntry].HCPhys = HCPhys; - pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry; - } - /* Any of the last 5 pages? */ - else if ( pSet->aEntries[i - 0].iPage == iPage - && pSet->aEntries[i - 0].cRefs < UINT16_MAX - 1) - pSet->aEntries[i - 0].cRefs++; - else if ( pSet->aEntries[i - 1].iPage == iPage - && pSet->aEntries[i - 1].cRefs < UINT16_MAX - 1) - pSet->aEntries[i - 1].cRefs++; - else if ( pSet->aEntries[i - 2].iPage == iPage - && pSet->aEntries[i - 2].cRefs < UINT16_MAX - 1) - pSet->aEntries[i - 2].cRefs++; - else if ( pSet->aEntries[i - 3].iPage == iPage - && pSet->aEntries[i - 3].cRefs < UINT16_MAX - 1) - pSet->aEntries[i - 3].cRefs++; - else if ( pSet->aEntries[i - 4].iPage == iPage - && pSet->aEntries[i - 4].cRefs < UINT16_MAX - 1) - pSet->aEntries[i - 4].cRefs++; - /* Don't bother searching unless we're above a 60% load. */ - else if (RT_LIKELY(i <= (int32_t)RT_ELEMENTS(pSet->aEntries) * 60 / 100)) - { - unsigned iEntry = pSet->cEntries++; - pSet->aEntries[iEntry].cRefs = 1; - pSet->aEntries[iEntry].iPage = iPage; - pSet->aEntries[iEntry].pvPage = pvPage; - pSet->aEntries[iEntry].HCPhys = HCPhys; - pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry; - } - else - { - /* Search the rest of the set. */ - Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries)); - i -= 4; - while (i-- > 0) - if ( pSet->aEntries[i].iPage == iPage - && pSet->aEntries[i].cRefs < UINT16_MAX - 1) - { - pSet->aEntries[i].cRefs++; - STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapSetSearchHits); - break; - } - if (i < 0) - { - STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapSetSearchMisses); - if (pSet->iSubset < pSet->cEntries) - { - STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapSetSearchFlushes); - STAM_COUNTER_INC(&pVCpu->pgm.s.aStatR0DynMapSetSize[(pSet->cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]); - AssertMsg(pSet->cEntries < PGMMAPSET_MAX_FILL, ("%u\n", pSet->cEntries)); - pgmDynMapFlushSubset(pSet); - } - - if (RT_UNLIKELY(pSet->cEntries >= RT_ELEMENTS(pSet->aEntries))) - { - STAM_COUNTER_INC(&pVCpu->pgm.s.StatR0DynMapSetOptimize); - pgmDynMapOptimizeAutoSet(pSet); - } - - if (RT_LIKELY(pSet->cEntries < RT_ELEMENTS(pSet->aEntries))) - { - unsigned iEntry = pSet->cEntries++; - pSet->aEntries[iEntry].cRefs = 1; - pSet->aEntries[iEntry].iPage = iPage; - pSet->aEntries[iEntry].pvPage = pvPage; - pSet->aEntries[iEntry].HCPhys = HCPhys; - pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry; - } - else - { - /* We're screwed. */ - pgmR0DynMapReleasePage(g_pPGMR0DynMap, iPage, 1); - - RTAssertMsg2Weak("PGMDynMapHCPage: set is full!\n"); - if (!g_fPGMR0DynMapTestRunning) - VMMRZCallRing3NoCpu(pVM, VMMCALLRING3_VM_R0_ASSERTION, 0); - *ppv = NULL; - return VERR_PGM_DYNMAP_FULL_SET; - } - } - } - - *ppv = pvPage; - return VINF_SUCCESS; -} - - -#if 0 /* Not used in R0, should internalized the other PGMDynMapHC/GCPage too. */ -/* documented elsewhere - a bit of a mess. */ -VMMDECL(int) PGMDynMapHCPage(PVM pVM, RTHCPHYS HCPhys, void **ppv) -{ -#ifdef VBOX_WITH_STATISTICS - PVMCPU pVCpu = VMMGetCpu(pVM); -#endif - /* - * Validate state. - */ - STAM_PROFILE_START(&pVCpu->pgm.s.StatR0DynMapHCPage, a); - AssertPtr(ppv); - AssertMsg(pVM->pgm.s.pvR0DynMapUsed == g_pPGMR0DynMap, - ("%p != %p\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap)); - AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys)); - PVMCPU pVCpu = VMMGetCpu(pVM); - AssertPtr(pVCpu); - PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet; - AssertMsg(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries), - ("%#x (%u)\n", pSet->cEntries, pSet->cEntries)); - - /* - * Call common code. - */ - int rc = pgmR0DynMapHCPageCommon(pVM, pSet, HCPhys, ppv); - - STAM_PROFILE_STOP(&pVCpu->pgm.s.StatR0DynMapHCPage, a); - return rc; -} -#endif - - -#if 0 /*def DEBUG*/ -/** For pgmR0DynMapTest3PerCpu. */ -typedef struct PGMR0DYNMAPTEST -{ - uint32_t u32Expect; - uint32_t *pu32; - uint32_t volatile cFailures; -} PGMR0DYNMAPTEST; -typedef PGMR0DYNMAPTEST *PPGMR0DYNMAPTEST; - -/** - * Checks that the content of the page is the same on all CPUs, i.e. that there - * are no CPU specfic PTs or similar nasty stuff involved. - * - * @param idCpu The current CPU. - * @param pvUser1 Pointer a PGMR0DYNMAPTEST structure. - * @param pvUser2 Unused, ignored. - */ -static DECLCALLBACK(void) pgmR0DynMapTest3PerCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2) -{ - PPGMR0DYNMAPTEST pTest = (PPGMR0DYNMAPTEST)pvUser1; - ASMInvalidatePage(pTest->pu32); - if (*pTest->pu32 != pTest->u32Expect) - ASMAtomicIncU32(&pTest->cFailures); - NOREF(pvUser2); NOREF(idCpu); -} - - -/** - * Performs some basic tests in debug builds. - */ -static int pgmR0DynMapTest(PVM pVM) -{ - LogRel(("pgmR0DynMapTest: ****** START ******\n")); - PPGMR0DYNMAP pThis = g_pPGMR0DynMap; - PPGMMAPSET pSet = &pVM->aCpus[0].pgm.s.AutoSet; - uint32_t i; - - /* - * Assert internal integrity first. - */ - LogRel(("Test #0\n")); - int rc = PGMR0DynMapAssertIntegrity(); - if (RT_FAILURE(rc)) - return rc; - - void *pvR0DynMapUsedSaved = pVM->pgm.s.pvR0DynMapUsed; - pVM->pgm.s.pvR0DynMapUsed = pThis; - g_fPGMR0DynMapTestRunning = true; - - /* - * Simple test, map CR3 twice and check that we're getting the - * same mapping address back. - */ - LogRel(("Test #1\n")); - ASMIntDisable(); - PGMDynMapStartAutoSet(&pVM->aCpus[0]); - - uint64_t cr3 = ASMGetCR3() & ~(uint64_t)PAGE_OFFSET_MASK; - void *pv = (void *)(intptr_t)-1; - void *pv2 = (void *)(intptr_t)-2; - rc = PGMDynMapHCPage(pVM, cr3, &pv); - int rc2 = PGMDynMapHCPage(pVM, cr3, &pv2); - ASMIntEnable(); - if ( RT_SUCCESS(rc2) - && RT_SUCCESS(rc) - && pv == pv2) - { - LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries))); - rc = PGMR0DynMapAssertIntegrity(); - - /* - * Check that the simple set overflow code works by filling it - * with more CR3 mappings. - */ - LogRel(("Test #2\n")); - ASMIntDisable(); - PGMDynMapMigrateAutoSet(&pVM->aCpus[0]); - for (i = 0 ; i < UINT16_MAX*2 - 1 && RT_SUCCESS(rc) && pv2 == pv; i++) - { - pv2 = (void *)(intptr_t)-4; - rc = PGMDynMapHCPage(pVM, cr3, &pv2); - } - ASMIntEnable(); - if (RT_FAILURE(rc) || pv != pv2) - { - LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%p\n", __LINE__, rc, pv, pv2, i)); - if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR; - } - else if (pSet->cEntries != 5) - { - LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries) / 2)); - rc = VERR_INTERNAL_ERROR; - } - else if ( pSet->aEntries[4].cRefs != UINT16_MAX - 1 - || pSet->aEntries[3].cRefs != UINT16_MAX - 1 - || pSet->aEntries[2].cRefs != 1 - || pSet->aEntries[1].cRefs != 1 - || pSet->aEntries[0].cRefs != 1) - { - LogRel(("failed(%d): bad set dist: ", __LINE__)); - for (i = 0; i < pSet->cEntries; i++) - LogRel(("[%d]=%d, ", i, pSet->aEntries[i].cRefs)); - LogRel(("\n")); - rc = VERR_INTERNAL_ERROR; - } - if (RT_SUCCESS(rc)) - rc = PGMR0DynMapAssertIntegrity(); - if (RT_SUCCESS(rc)) - { - /* - * Trigger an set optimization run (exactly). - */ - LogRel(("Test #3\n")); - ASMIntDisable(); - PGMDynMapMigrateAutoSet(&pVM->aCpus[0]); - pv2 = NULL; - for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) - 5 && RT_SUCCESS(rc) && pv2 != pv; i++) - { - pv2 = (void *)(intptr_t)(-5 - i); - rc = PGMDynMapHCPage(pVM, cr3 + PAGE_SIZE * (i + 5), &pv2); - } - ASMIntEnable(); - if (RT_FAILURE(rc) || pv == pv2) - { - LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%d\n", __LINE__, rc, pv, pv2, i)); - if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR; - } - else if (pSet->cEntries != RT_ELEMENTS(pSet->aEntries)) - { - LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries))); - rc = VERR_INTERNAL_ERROR; - } - LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries))); - if (RT_SUCCESS(rc)) - rc = PGMR0DynMapAssertIntegrity(); - if (RT_SUCCESS(rc)) - { - /* - * Trigger an overflow error. - */ - LogRel(("Test #4\n")); - ASMIntDisable(); - PGMDynMapMigrateAutoSet(&pVM->aCpus[0]); - for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) + 2; i++) - { - rc = PGMDynMapHCPage(pVM, cr3 - PAGE_SIZE * (i + 5), &pv2); - if (RT_SUCCESS(rc)) - rc = PGMR0DynMapAssertIntegrity(); - if (RT_FAILURE(rc)) - break; - } - ASMIntEnable(); - if (rc == VERR_PGM_DYNMAP_FULL_SET) - { - /* flush the set. */ - LogRel(("Test #5\n")); - ASMIntDisable(); - PGMDynMapMigrateAutoSet(&pVM->aCpus[0]); - PGMDynMapReleaseAutoSet(&pVM->aCpus[0]); - PGMDynMapStartAutoSet(&pVM->aCpus[0]); - ASMIntEnable(); - - rc = PGMR0DynMapAssertIntegrity(); - } - else - { - LogRel(("failed(%d): rc=%Rrc, wanted %d ; pv2=%p Set=%u/%u; i=%d\n", __LINE__, - rc, VERR_PGM_DYNMAP_FULL_SET, pv2, pSet->cEntries, RT_ELEMENTS(pSet->aEntries), i)); - if (RT_SUCCESS(rc)) rc = VERR_INTERNAL_ERROR; - } - } - } - } - else - { - LogRel(("failed(%d): rc=%Rrc rc2=%Rrc; pv=%p pv2=%p\n", __LINE__, rc, rc2, pv, pv2)); - if (RT_SUCCESS(rc)) - rc = rc2; - } - - /* - * Check that everyone sees the same stuff. - */ - if (RT_SUCCESS(rc)) - { - LogRel(("Test #5\n")); - ASMIntDisable(); - PGMDynMapMigrateAutoSet(&pVM->aCpus[0]); - RTHCPHYS HCPhysPT = RTR0MemObjGetPagePhysAddr(pThis->pSegHead->ahMemObjPTs[0], 0); - rc = PGMDynMapHCPage(pVM, HCPhysPT, &pv); - if (RT_SUCCESS(rc)) - { - PGMR0DYNMAPTEST Test; - uint32_t *pu32Real = &pThis->paPages[pThis->pSegHead->iPage].uPte.pLegacy->u; - Test.pu32 = (uint32_t *)((uintptr_t)pv | ((uintptr_t)pu32Real & PAGE_OFFSET_MASK)); - Test.u32Expect = *pu32Real; - ASMAtomicWriteU32(&Test.cFailures, 0); - ASMIntEnable(); - - rc = RTMpOnAll(pgmR0DynMapTest3PerCpu, &Test, NULL); - if (RT_FAILURE(rc)) - LogRel(("failed(%d): RTMpOnAll rc=%Rrc\n", __LINE__, rc)); - else if (Test.cFailures) - { - LogRel(("failed(%d): cFailures=%d pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n", __LINE__, - Test.cFailures, pu32Real, Test.pu32, Test.u32Expect, *Test.pu32)); - rc = VERR_INTERNAL_ERROR; - } - else - LogRel(("pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n", - pu32Real, Test.pu32, Test.u32Expect, *Test.pu32)); - } - else - { - ASMIntEnable(); - LogRel(("failed(%d): rc=%Rrc\n", rc)); - } - } - - /* - * Clean up. - */ - LogRel(("Cleanup.\n")); - ASMIntDisable(); - PGMDynMapMigrateAutoSet(&pVM->aCpus[0]); - PGMDynMapFlushAutoSet(&pVM->aCpus[0]); - PGMDynMapReleaseAutoSet(&pVM->aCpus[0]); - ASMIntEnable(); - - if (RT_SUCCESS(rc)) - rc = PGMR0DynMapAssertIntegrity(); - else - PGMR0DynMapAssertIntegrity(); - - g_fPGMR0DynMapTestRunning = false; - LogRel(("Result: rc=%Rrc Load=%u/%u/%u Set=%#x/%u\n", rc, - pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries))); - pVM->pgm.s.pvR0DynMapUsed = pvR0DynMapUsedSaved; - LogRel(("pgmR0DynMapTest: ****** END ******\n")); - return rc; -} -#endif /* DEBUG */ - diff --git a/src/VBox/VMM/VMMR0/PGMR0SharedPage.cpp b/src/VBox/VMM/VMMR0/PGMR0SharedPage.cpp index e2157ca78..77c0f34f8 100644 --- a/src/VBox/VMM/VMMR0/PGMR0SharedPage.cpp +++ b/src/VBox/VMM/VMMR0/PGMR0SharedPage.cpp @@ -1,10 +1,10 @@ -/* $Id: PGMR0SharedPage.cpp $ */ +/* $Id: PGMR0SharedPage.cpp 31441 2010-08-06 14:13:01Z vboxsync $ */ /** @file - * PGM - Page Manager and Monitor, Ring-0. + * PGM - Page Manager and Monitor, Page Sharing, Ring-0. */ /* - * Copyright (C) 2007 Oracle Corporation + * Copyright (C) 2010 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; @@ -52,7 +52,7 @@ VMMR0DECL(int) PGMR0SharedModuleCheck(PVM pVM, PGVM pGVM, VMCPUID idCpu, PGMMSHA Log(("PGMR0SharedModuleCheck: check %s %s base=%RGv size=%x\n", pModule->szName, pModule->szVersion, pModule->Core.Key, pModule->cbModule)); - pgmLock(pVM); + Assert(PGMIsLockOwner(pVM)); /* This cannot fail as we grab the lock in pgmR3SharedModuleRegRendezvous before calling into ring-0. */ /* Check every region of the shared module. */ for (unsigned idxRegion = 0; idxRegion < cRegions; idxRegion++) @@ -69,7 +69,7 @@ VMMR0DECL(int) PGMR0SharedModuleCheck(PVM pVM, PGVM pGVM, VMCPUID idCpu, PGMMSHA RTGCPHYS GCPhys; uint64_t fFlags; - /** todo: inefficient to fetch each guest page like this... */ + /** @todo inefficient to fetch each guest page like this... */ rc = PGMGstGetPage(pVCpu, GCRegion, &fFlags, &GCPhys); if ( rc == VINF_SUCCESS && !(fFlags & X86_PTE_RW)) /* important as we make assumptions about this below! */ @@ -123,11 +123,11 @@ VMMR0DECL(int) PGMR0SharedModuleCheck(PVM pVM, PGVM pGVM, VMCPUID idCpu, PGMMSHA } else { - Assert( rc == VINF_SUCCESS + Assert( rc == VINF_SUCCESS || rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT - || rc == VERR_PAGE_TABLE_NOT_PRESENT); + || rc == VERR_PAGE_TABLE_NOT_PRESENT); rc = VINF_SUCCESS; /* ignore error */ } @@ -137,7 +137,6 @@ VMMR0DECL(int) PGMR0SharedModuleCheck(PVM pVM, PGVM pGVM, VMCPUID idCpu, PGMMSHA } } - pgmUnlock(pVM); if (fFlushTLBs) PGM_INVL_ALL_VCPU_TLBS(pVM); diff --git a/src/VBox/VMM/VMMR0/TRPMR0.cpp b/src/VBox/VMM/VMMR0/TRPMR0.cpp index 9b951e57c..78bf84a46 100644 --- a/src/VBox/VMM/VMMR0/TRPMR0.cpp +++ b/src/VBox/VMM/VMMR0/TRPMR0.cpp @@ -1,4 +1,4 @@ -/* $Id: TRPMR0.cpp $ */ +/* $Id: TRPMR0.cpp 34020 2010-11-12 09:12:09Z vboxsync $ */ /** @file * TRPM - The Trap Monitor - HC Ring 0 */ @@ -77,7 +77,7 @@ VMMR0DECL(void) TRPMR0DispatchHostInterrupt(PVM pVM) # if HC_ARCH_BITS == 32 PVBOXIDTE pIdte = &((PVBOXIDTE)Idtr.pIdt)[uActiveVector]; # else - PVBOXIDTE pIdte = &((PVBOXIDTE)Idtr.pIdt)[uActiveVector * 2]; + PVBOXIDTE64 pIdte = &((PVBOXIDTE64)Idtr.pIdt)[uActiveVector]; # endif AssertMsgReturnVoid(pIdte->Gen.u1Present, ("The IDT entry (%d) is not present!\n", uActiveVector)); AssertMsgReturnVoid( pIdte->Gen.u3Type1 == VBOX_IDTE_TYPE1 @@ -93,12 +93,11 @@ VMMR0DECL(void) TRPMR0DispatchHostInterrupt(PVM pVM) # else /* 64-bit: */ RTFAR64 pfnHandler; - pfnHandler.off = VBOXIDTE_OFFSET(*pIdte); - pfnHandler.off |= (uint64_t)(*(uint32_t *)(pIdte + 1)) << 32; //cleanup! + pfnHandler.off = VBOXIDTE64_OFFSET(*pIdte); pfnHandler.sel = pIdte->Gen.u16SegSel; - RTR0UINTREG uRSP = ~(RTR0UINTREG)0; - if (pIdte->au32[1] & 0x7 /*IST*/) + const RTR0UINTREG uRSP = ~(RTR0UINTREG)0; + if (pIdte->Gen.u3Ist) { trpmR0DispatchHostInterruptSimple(uActiveVector); return; diff --git a/src/VBox/VMM/VMMR0/TRPMR0A.asm b/src/VBox/VMM/VMMR0/TRPMR0A.asm index a19de3a5e..c5944e229 100644 --- a/src/VBox/VMM/VMMR0/TRPMR0A.asm +++ b/src/VBox/VMM/VMMR0/TRPMR0A.asm @@ -1,4 +1,4 @@ -; $Id: TRPMR0A.asm $ +; $Id: TRPMR0A.asm 28800 2010-04-27 08:22:32Z vboxsync $ ;; @file ; TRPM - Host Context Ring-0 ; diff --git a/src/VBox/VMM/VMMR0/VMMR0.cpp b/src/VBox/VMM/VMMR0/VMMR0.cpp index ca4c48fe9..b57dd0b05 100644 --- a/src/VBox/VMM/VMMR0/VMMR0.cpp +++ b/src/VBox/VMM/VMMR0/VMMR0.cpp @@ -1,4 +1,4 @@ -/* $Id: VMMR0.cpp $ */ +/* $Id: VMMR0.cpp 35298 2010-12-22 12:35:46Z vboxsync $ */ /** @file * VMM - Host Context Ring 0. */ @@ -41,14 +41,13 @@ #include <iprt/asm-amd64-x86.h> #include <iprt/assert.h> -#include <iprt/crc32.h> +#include <iprt/crc.h> #include <iprt/mp.h> #include <iprt/once.h> #include <iprt/stdarg.h> #include <iprt/string.h> -#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION -# include <iprt/thread.h> -#endif +#include <iprt/thread.h> +#include <iprt/timer.h> #if defined(_MSC_VER) && defined(RT_ARCH_AMD64) /** @todo check this with with VC7! */ # pragma intrinsic(_AddressOfReturnAddress) @@ -61,6 +60,11 @@ RT_C_DECLS_BEGIN VMMR0DECL(int) ModuleInit(void); VMMR0DECL(void) ModuleTerm(void); + +#if defined(RT_ARCH_X86) && (defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)) +extern uint64_t __udivdi3(uint64_t, uint64_t); +extern uint64_t __umoddi3(uint64_t, uint64_t); +#endif // RT_ARCH_X86 && (RT_OS_SOLARIS || RT_OS_FREEBSD) RT_C_DECLS_END @@ -72,10 +76,22 @@ RT_C_DECLS_END PFNRT g_VMMGCDeps[] = { (PFNRT)RTCrc32, - (PFNRT)RTOnce + (PFNRT)RTOnce, +#if defined(RT_ARCH_X86) && (defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)) + (PFNRT)__udivdi3, + (PFNRT)__umoddi3, +#endif // RT_ARCH_X86 && (RT_OS_SOLARIS || RT_OS_FREEBSD) + NULL }; +#if defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64) +/* Increase the size of the image to work around the refusal of Win64 to + * load images in the 0x80000 range. + */ +static uint64_t u64BloatImage[8192] = {0}; +#endif + /** * Initialize the module. * This is called when we're first loaded. @@ -176,7 +192,7 @@ VMMR0DECL(void) ModuleTerm(void) /** - * Initaties the R0 driver for a particular VM instance. + * Initiates the R0 driver for a particular VM instance. * * @returns VBox status code. * @@ -220,14 +236,14 @@ static int vmmR0InitVM(PVM pVM, uint32_t uSvnRev) LogCom(("vmmR0InitVM: after %p dereg\n", RTLogDefaultInstance())); pR0Logger->Logger.pfnLogger("hello ring-0 logger\n"); - LogCom(("vmmR0InitVM: returned succesfully from direct logger call.\n")); + LogCom(("vmmR0InitVM: returned successfully from direct logger call.\n")); pR0Logger->Logger.pfnFlush(&pR0Logger->Logger); - LogCom(("vmmR0InitVM: returned succesfully from direct flush call.\n")); + LogCom(("vmmR0InitVM: returned successfully from direct flush call.\n")); RTLogSetDefaultInstanceThread(&pR0Logger->Logger, (uintptr_t)pVM->pSession); LogCom(("vmmR0InitVM: after %p reg2\n", RTLogDefaultInstance())); pR0Logger->Logger.pfnLogger("hello ring-0 logger\n"); - LogCom(("vmmR0InitVM: returned succesfully from direct logger call (2). offScratch=%d\n", pR0Logger->Logger.offScratch)); + LogCom(("vmmR0InitVM: returned successfully from direct logger call (2). offScratch=%d\n", pR0Logger->Logger.offScratch)); RTLogSetDefaultInstanceThread(NULL, pVM->pSession); LogCom(("vmmR0InitVM: after %p dereg2\n", RTLogDefaultInstance())); @@ -245,6 +261,13 @@ static int vmmR0InitVM(PVM pVM, uint32_t uSvnRev) #endif /* LOG_ENABLED */ /* + * Check if the host supports high resolution timers or not. + */ + if ( pVM->vmm.s.fUsePeriodicPreemptionTimers + && !RTTimerCanDoHighResolution()) + pVM->vmm.s.fUsePeriodicPreemptionTimers = false; + + /* * Initialize the per VM data for GVMM and GMM. */ int rc = GVMMR0InitVM(pVM); @@ -419,8 +442,33 @@ static void vmmR0RecordRC(PVM pVM, PVMCPU pVCpu, int rc) STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetRescheduleREM); break; case VINF_EM_RAW_TO_R3: - STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3); + if (VM_FF_ISPENDING(pVM, VM_FF_TM_VIRTUAL_SYNC)) + STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3TMVirt); + else + if (VM_FF_ISPENDING(pVM, VM_FF_PGM_NEED_HANDY_PAGES)) + STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3HandyPages); + else + if (VM_FF_ISPENDING(pVM, VM_FF_PDM_QUEUES)) + STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3PDMQueues); + else + if (VM_FF_ISPENDING(pVM, VM_FF_EMT_RENDEZVOUS)) + STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Rendezvous); + else + if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA)) + STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3DMA); + else + if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TIMER)) + STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Timer); + else + if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PDM_CRITSECT)) + STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3CritSect); + else + if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TO_R3)) + STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3); + else + STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Unknown); break; + case VINF_EM_RAW_TIMER_PENDING: STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetTimerPending); break; @@ -531,72 +579,71 @@ VMMR0DECL(void) VMMR0EntryFast(PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperati */ case VMMR0_DO_RAW_RUN: { - /* Safety precaution as hwaccm disables the switcher. */ - if (RT_LIKELY(!pVM->vmm.s.fSwitcherDisabled)) - { - RTCCUINTREG uFlags = ASMIntDisableFlags(); - int rc; - bool fVTxDisabled; - - if (RT_UNLIKELY(pVM->cCpus > 1)) - { - pVCpu->vmm.s.iLastGZRc = VERR_RAW_MODE_INVALID_SMP; - ASMSetFlags(uFlags); - return; - } - + /* Some safety precautions first. */ #ifndef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 - if (RT_UNLIKELY(!PGMGetHyperCR3(pVCpu))) - { - pVCpu->vmm.s.iLastGZRc = VERR_PGM_NO_CR3_SHADOW_ROOT; - ASMSetFlags(uFlags); - return; - } + if (RT_LIKELY( !pVM->vmm.s.fSwitcherDisabled /* hwaccm */ + && pVM->cCpus == 1 /* !smp */ + && PGMGetHyperCR3(pVCpu))) +#else + if (RT_LIKELY( !pVM->vmm.s.fSwitcherDisabled + && pVM->cCpus == 1)) #endif - + { + /* Disable preemption and update the periodic preemption timer. */ + RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER; + RTThreadPreemptDisable(&PreemptState); RTCPUID idHostCpu = RTMpCpuId(); #ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI CPUMR0SetLApic(pVM, idHostCpu); #endif + ASMAtomicWriteU32(&pVCpu->idHostCpu, idHostCpu); + if (pVM->vmm.s.fUsePeriodicPreemptionTimers) + GVMMR0SchedUpdatePeriodicPreemptionTimer(pVM, pVCpu->idHostCpu, TMCalcHostTimerFrequency(pVM, pVCpu)); /* We might need to disable VT-x if the active switcher turns off paging. */ - rc = HWACCMR0EnterSwitcher(pVM, &fVTxDisabled); - if (RT_FAILURE(rc)) + bool fVTxDisabled; + int rc = HWACCMR0EnterSwitcher(pVM, &fVTxDisabled); + if (RT_SUCCESS(rc)) { - pVCpu->vmm.s.iLastGZRc = rc; - ASMSetFlags(uFlags); - return; - } - - ASMAtomicWriteU32(&pVCpu->idHostCpu, idHostCpu); - VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); + RTCCUINTREG uFlags = ASMIntDisableFlags(); + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); - TMNotifyStartOfExecution(pVCpu); - rc = pVM->vmm.s.pfnHostToGuestR0(pVM); - pVCpu->vmm.s.iLastGZRc = rc; - TMNotifyEndOfExecution(pVCpu); + TMNotifyStartOfExecution(pVCpu); + rc = pVM->vmm.s.pfnHostToGuestR0(pVM); + pVCpu->vmm.s.iLastGZRc = rc; + TMNotifyEndOfExecution(pVCpu); - VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED); - ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID); + VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED); - /* Re-enable VT-x if previously turned off. */ - HWACCMR0LeaveSwitcher(pVM, fVTxDisabled); + /* Re-enable VT-x if previously turned off. */ + HWACCMR0LeaveSwitcher(pVM, fVTxDisabled); - if ( rc == VINF_EM_RAW_INTERRUPT - || rc == VINF_EM_RAW_INTERRUPT_HYPER) - TRPMR0DispatchHostInterrupt(pVM); + if ( rc == VINF_EM_RAW_INTERRUPT + || rc == VINF_EM_RAW_INTERRUPT_HYPER) + TRPMR0DispatchHostInterrupt(pVM); - ASMSetFlags(uFlags); + ASMSetFlags(uFlags); #ifdef VBOX_WITH_STATISTICS - STAM_COUNTER_INC(&pVM->vmm.s.StatRunRC); - vmmR0RecordRC(pVM, pVCpu, rc); + STAM_COUNTER_INC(&pVM->vmm.s.StatRunRC); + vmmR0RecordRC(pVM, pVCpu, rc); #endif + } + else + pVCpu->vmm.s.iLastGZRc = rc; + ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID); + RTThreadPreemptRestore(&PreemptState); } else { Assert(!pVM->vmm.s.fSwitcherDisabled); pVCpu->vmm.s.iLastGZRc = VERR_NOT_SUPPORTED; + if (pVM->cCpus != 1) + pVCpu->vmm.s.iLastGZRc = VERR_RAW_MODE_INVALID_SMP; +#ifndef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 + if (!PGMGetHyperCR3(pVCpu)) + pVCpu->vmm.s.iLastGZRc = VERR_PGM_NO_CR3_SHADOW_ROOT; +#endif } break; } @@ -610,10 +657,6 @@ VMMR0DECL(void) VMMR0EntryFast(PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperati */ case VMMR0_DO_HWACC_RUN: { - int rc; - - STAM_COUNTER_INC(&pVM->vmm.s.StatRunRC); - #ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER; RTThreadPreemptDisable(&PreemptState); @@ -621,6 +664,8 @@ VMMR0DECL(void) VMMR0EntryFast(PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperati RTCCUINTREG uFlags = ASMIntDisableFlags(); #endif ASMAtomicWriteU32(&pVCpu->idHostCpu, RTMpCpuId()); + if (pVM->vmm.s.fUsePeriodicPreemptionTimers) + GVMMR0SchedUpdatePeriodicPreemptionTimer(pVM, pVCpu->idHostCpu, TMCalcHostTimerFrequency(pVM, pVCpu)); #ifdef LOG_ENABLED if (pVCpu->idCpu > 0) @@ -635,6 +680,7 @@ VMMR0DECL(void) VMMR0EntryFast(PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperati } } #endif + int rc; if (!HWACCMR0SuspendPending()) { rc = HWACCMR0Enter(pVM, pVCpu); @@ -644,6 +690,7 @@ VMMR0DECL(void) VMMR0EntryFast(PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperati int rc2 = HWACCMR0Leave(pVM, pVCpu); AssertRC(rc2); } + STAM_COUNTER_INC(&pVM->vmm.s.StatRunRC); } else { @@ -982,10 +1029,7 @@ static int vmmR0EntryExWorker(PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperatio return VERR_INVALID_PARAMETER; PVMCPU pVCpu = &pVM->aCpus[idCpu]; - - /* Initialize the r0 native thread handle on the fly. */ - if (pVCpu->hNativeThreadR0 == NIL_RTNATIVETHREAD) - pVCpu->hNativeThreadR0 = RTThreadNativeSelf(); + Assert(pVCpu->hNativeThreadR0 == RTThreadNativeSelf()); # ifdef DEBUG_sandervl /* Make sure that log flushes can jump back to ring-3; annoying to get an incomplete log (this is risky though as the code doesn't take this into account). */ @@ -1005,6 +1049,15 @@ static int vmmR0EntryExWorker(PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperatio } #endif +#if defined(VBOX_STRICT) && HC_ARCH_BITS == 64 + case VMMR0_DO_GMM_FIND_DUPLICATE_PAGE: + { + if (u64Arg) + return VERR_INVALID_PARAMETER; + return GMMR0FindDuplicatePageReq(pVM, (PGMMFINDDUPLICATEPAGEREQ)pReqHdr); + } +#endif + /* * A quick GCFGM mock-up. */ @@ -1288,7 +1341,7 @@ VMMR0DECL(void) vmmR0LoggerFlush(PRTLOGGER pLogger) } /** - * Interal R0 logger worker: Custom prefix. + * Internal R0 logger worker: Custom prefix. * * @returns Number of chars written. * @@ -1319,8 +1372,8 @@ VMMR0DECL(size_t) vmmR0LoggerPrefix(PRTLOGGER pLogger, char *pchBuf, size_t cchB #endif } - #ifdef LOG_ENABLED + /** * Disables flushing of the ring-0 debug log. * @@ -1345,7 +1398,8 @@ VMMR0DECL(void) VMMR0LogFlushEnable(PVMCPU pVCpu) if (pVCpu->vmm.s.pR0LoggerR0) pVCpu->vmm.s.pR0LoggerR0->fFlushingDisabled = false; } -#endif + +#endif /* LOG_ENABLED */ /** * Jump back to ring-3 if we're the EMT and the longjmp is armed. diff --git a/src/VBox/VMM/VMMR0/VMMR0.def b/src/VBox/VMM/VMMR0/VMMR0.def index 32b7e66fb..b249ae8ad 100644 --- a/src/VBox/VMM/VMMR0/VMMR0.def +++ b/src/VBox/VMM/VMMR0/VMMR0.def @@ -1,4 +1,4 @@ -; $Id: VMMR0.def $ +; $Id: VMMR0.def 32488 2010-09-14 14:48:30Z vboxsync $ ;; @file ; VMM Ring 0 DLL - Definition file. @@ -54,6 +54,7 @@ EXPORTS TMTimerSetMillies TMTimerSetMicro TMTimerSetNano + TMTimerSetFrequencyHint TMTimerStop VMMGetSvnRev vmmR0LoggerFlush @@ -82,6 +83,7 @@ EXPORTS RTTimeNanoTSLFenceSync RTTimeNanoTSLFenceAsync RTTimeSystemNanoTS + RTTimeNanoTS ASMMultU64ByU32DivByU32 ; not-os2 ASMAtomicXchgU8 ; not-x86 nocrt_memchr diff --git a/src/VBox/VMM/VMMR0/VMMR0JmpA-amd64.asm b/src/VBox/VMM/VMMR0/VMMR0JmpA-amd64.asm index bb5ed9c1a..377fec3ff 100644 --- a/src/VBox/VMM/VMMR0/VMMR0JmpA-amd64.asm +++ b/src/VBox/VMM/VMMR0/VMMR0JmpA-amd64.asm @@ -1,4 +1,4 @@ -; $Id: VMMR0JmpA-amd64.asm $ +; $Id: VMMR0JmpA-amd64.asm 33540 2010-10-28 09:27:05Z vboxsync $ ;; @file ; VMM - R0 SetJmp / LongJmp routines for AMD64. ; @@ -31,7 +31,7 @@ %define STACK_PADDING 0eeeeeeeeeeeeeeeeh -; For vmmR0LoggerWrapper. (The other architecture(s) use(s) C99 variadict macros.) +; For vmmR0LoggerWrapper. (The other architecture(s) use(s) C99 variadic macros.) extern NAME(RTLogLogger) diff --git a/src/VBox/VMM/VMMR0/VMMR0JmpA-x86.asm b/src/VBox/VMM/VMMR0/VMMR0JmpA-x86.asm index efb5f2b18..e818a8ae6 100644 --- a/src/VBox/VMM/VMMR0/VMMR0JmpA-x86.asm +++ b/src/VBox/VMM/VMMR0/VMMR0JmpA-x86.asm @@ -1,4 +1,4 @@ -; $Id: VMMR0JmpA-x86.asm $ +; $Id: VMMR0JmpA-x86.asm 33540 2010-10-28 09:27:05Z vboxsync $ ;; @file ; VMM - R0 SetJmp / LongJmp routines for X86. ; @@ -31,7 +31,7 @@ %define STACK_PADDING 0eeeeeeeeh -; For vmmR0LoggerWrapper. (The other architecture(s) use(s) C99 variadict macros.) +; For vmmR0LoggerWrapper. (The other architecture(s) use(s) C99 variadic macros.) extern NAME(RTLogLogger) @@ -155,7 +155,7 @@ GLOBALNAME vmmR0CallRing3SetJmpEx mov ecx, [esp + 0ch] ; pvArg1 mov edx, [esp + 10h] ; pvArg2 mov eax, [esp + 08h] ; pfn - sub esp, 12 ; align the stack on a 16-byte boundrary. + sub esp, 12 ; align the stack on a 16-byte boundary. mov [esp ], ecx mov [esp + 04h], edx call eax |