summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorbholler <none@none>2007-06-15 16:39:48 -0700
committerbholler <none@none>2007-06-15 16:39:48 -0700
commitf98fbcec489fdf363410d0c1cedc2baff1d60d9c (patch)
treea70dc97afd8926744fddaacb102b22d2aafa1de9 /usr/src
parent758f6e0b258f20dcb5b772642e2a18b998ee7927 (diff)
downloadillumos-joyent-f98fbcec489fdf363410d0c1cedc2baff1d60d9c.tar.gz
6495392 use monitor/mwait for halting idle CPUs where supported
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c89
-rw-r--r--usr/src/uts/i86pc/os/mp_machdep.c249
-rw-r--r--usr/src/uts/i86pc/os/mp_startup.c10
-rw-r--r--usr/src/uts/i86pc/sys/machcpuvar.h5
-rw-r--r--usr/src/uts/i86pc/sys/machsystm.h1
-rw-r--r--usr/src/uts/intel/ia32/ml/i86_subr.s73
-rw-r--r--usr/src/uts/intel/sys/cpu.h11
-rw-r--r--usr/src/uts/intel/sys/x86_archext.h4
8 files changed, 413 insertions, 29 deletions
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index 90564199f8..13c8901ab0 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -115,6 +115,15 @@ uint_t enable486;
const char CyrixInstead[] = "CyrixInstead";
/*
+ * monitor/mwait info.
+ */
+struct mwait_info {
+ size_t mon_min; /* min size to avoid missed wakeups */
+ size_t mon_max; /* size to avoid false wakeups */
+ uint32_t support; /* processor support of monitor/mwait */
+};
+
+/*
* These constants determine how many of the elements of the
* cpuid we cache in the cpuid_info data structure; the
* remaining elements are accessible via the cpuid instruction.
@@ -168,6 +177,8 @@ struct cpuid_info {
uint32_t cpi_chiprev; /* See X86_CHIPREV_* in x86_archext.h */
const char *cpi_chiprevstr; /* May be NULL if chiprev unknown */
uint32_t cpi_socket; /* Chip package/socket type */
+
+ struct mwait_info cpi_mwait; /* fn 5: monitor/mwait info */
};
@@ -291,6 +302,29 @@ static const struct amd_rev_mapent {
{ 0xf, 0x60, 0x6f, 0x0, 0xf, X86_CHIPREV_AMD_F_REV_G, "G", 1 },
};
+/*
+ * Info for monitor/mwait idle loop.
+ *
+ * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
+ * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
+ * 2006.
+ * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
+ * Documentation Updates" #33633, Rev 2.05, December 2006.
+ */
+#define MWAIT_SUPPORT (0x00000001) /* mwait supported */
+#define MWAIT_EXTENSIONS (0x00000002) /* extenstion supported */
+#define MWAIT_ECX_INT_ENABLE (0x00000004) /* ecx 1 extension supported */
+#define MWAIT_SUPPORTED(cpi) ((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
+#define MWAIT_INT_ENABLE(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x2)
+#define MWAIT_EXTENSION(cpi) ((cpi)->cpi_std[5].cp_ecx & 0x1)
+#define MWAIT_SIZE_MIN(cpi) BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
+#define MWAIT_SIZE_MAX(cpi) BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
+/*
+ * Number of sub-cstates for a given c-state.
+ */
+#define MWAIT_NUM_SUBC_STATES(cpi, c_state) \
+ BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
+
static void
synth_amd_info(struct cpuid_info *cpi)
{
@@ -695,6 +729,10 @@ cpuid_pass1(cpu_t *cpu)
}
if (cp->cp_edx & CPUID_INTC_EDX_DE)
feature |= X86_DE;
+ if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
+ cpi->cpi_mwait.support |= MWAIT_SUPPORT;
+ feature |= X86_MWAIT;
+ }
if (feature & X86_PAE)
cpi->cpi_pabits = 36;
@@ -1064,9 +1102,31 @@ cpuid_pass2(cpu_t *cpu)
*dp++ = p[i];
}
break;
+
case 3: /* Processor serial number, if PSN supported */
+ break;
+
case 4: /* Deterministic cache parameters */
+ break;
+
case 5: /* Monitor/Mwait parameters */
+
+ /*
+ * check cpi_mwait.support which was set in cpuid_pass1
+ */
+ if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
+ break;
+
+ cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
+ cpi->cpi_mwait.mon_max = (size_t)MWAIT_SIZE_MAX(cpi);
+ if (MWAIT_EXTENSION(cpi)) {
+ cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
+ if (MWAIT_INT_ENABLE(cpi))
+ cpi->cpi_mwait.support |=
+ MWAIT_ECX_INT_ENABLE;
+ }
+ break;
+
default:
break;
}
@@ -2779,7 +2839,7 @@ add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi)
* create a child node for cpu identified as 'cpu_id'
*/
cpu_devi = ddi_add_child(cpu_nex_devi, "cpu", DEVI_SID_NODEID,
- cpu_id);
+ cpu_id);
if (cpu_devi == NULL) {
mutex_exit(&cpu_node_lock);
return;
@@ -2818,7 +2878,7 @@ add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi)
/* vendor-id */
(void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
- "vendor-id", cpi->cpi_vendorstr);
+ "vendor-id", cpi->cpi_vendorstr);
if (cpi->cpi_maxeax == 0) {
mutex_exit(&cpu_node_lock);
@@ -2829,11 +2889,11 @@ add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi)
* family, model, and step
*/
(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
- "family", CPI_FAMILY(cpi));
+ "family", CPI_FAMILY(cpi));
(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
- "cpu-model", CPI_MODEL(cpi));
+ "cpu-model", CPI_MODEL(cpi));
(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
- "stepping-id", CPI_STEP(cpi));
+ "stepping-id", CPI_STEP(cpi));
/* type */
@@ -2847,7 +2907,7 @@ add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi)
}
if (create)
(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
- "type", CPI_TYPE(cpi));
+ "type", CPI_TYPE(cpi));
/* ext-family */
@@ -2879,7 +2939,7 @@ add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi)
}
if (create)
(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
- "ext-model", CPI_MODEL_XTD(cpi));
+ "ext-model", CPI_MODEL_XTD(cpi));
/* generation */
@@ -2939,9 +2999,9 @@ add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi)
}
if (create) {
(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
- "chunks", CPI_CHUNKS(cpi));
+ "chunks", CPI_CHUNKS(cpi));
(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
- "apic-id", CPI_APIC_ID(cpi));
+ "apic-id", CPI_APIC_ID(cpi));
if (cpi->cpi_chipid >= 0) {
(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
"chip#", cpi->cpi_chipid);
@@ -2986,9 +3046,9 @@ add_cpunode2devtree(processorid_t cpu_id, struct cpuid_info *cpi)
}
if (create) {
(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
- "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
+ "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
- "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
+ "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
}
/*
@@ -3103,3 +3163,10 @@ getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
}
return (l2i->l2i_ret);
}
+
+size_t
+cpuid_get_mwait_size(cpu_t *cpu)
+{
+ ASSERT(cpuid_checkpass(cpu, 2));
+ return (cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max);
+}
diff --git a/usr/src/uts/i86pc/os/mp_machdep.c b/usr/src/uts/i86pc/os/mp_machdep.c
index f7d3a802f7..06c69da052 100644
--- a/usr/src/uts/i86pc/os/mp_machdep.c
+++ b/usr/src/uts/i86pc/os/mp_machdep.c
@@ -38,13 +38,16 @@
#include <sys/x86_archext.h>
#include <sys/cpupart.h>
#include <sys/cpuvar.h>
+#include <sys/cpu.h>
#include <sys/pghw.h>
#include <sys/disp.h>
#include <sys/archsystm.h>
#include <sys/machsystm.h>
+#include <sys/sysmacros.h>
#include <sys/param.h>
#include <sys/promif.h>
#include <sys/mach_intr.h>
+#include <vm/hat_i86.h>
#define OFFSETOF(s, m) (size_t)(&(((s *)0)->m))
@@ -73,6 +76,8 @@ static hrtime_t dummy_hrtime(void);
static void dummy_scalehrtime(hrtime_t *);
static void cpu_idle(void);
static void cpu_wakeup(cpu_t *, int);
+static void cpu_idle_mwait(void);
+static void cpu_wakeup_mwait(cpu_t *, int);
/*
* External reference functions
*/
@@ -144,6 +149,11 @@ static ushort_t mach_ver[4] = {0, 0, 0, 0};
*/
int idle_cpu_use_hlt = 1;
+/*
+ * If non-zero, idle cpus will use mwait if available to halt instead of hlt.
+ */
+int idle_cpu_prefer_mwait = 1;
+
/*ARGSUSED*/
int
@@ -422,6 +432,172 @@ cpu_wakeup(cpu_t *cpu, int bound)
poke_cpu(cpu_found);
}
+/*
+ * Idle the present CPU until awoken via touching its monitored line
+ */
+static void
+cpu_idle_mwait(void)
+{
+ volatile uint32_t *mcpu_mwait = CPU->cpu_m.mcpu_mwait;
+ cpu_t *cpup = CPU;
+ processorid_t cpun = cpup->cpu_id;
+ cpupart_t *cp = cpup->cpu_part;
+ int hset_update = 1;
+
+ /*
+ * Set our mcpu_mwait here, so we can tell if anyone trys to
+ * wake us between now and when we call mwait. No other cpu will
+ * attempt to set our mcpu_mwait until we add ourself to the haltset.
+ */
+ *mcpu_mwait = MWAIT_HALTED;
+
+ /*
+ * If this CPU is online, and there's multiple CPUs
+ * in the system, then we should notate our halting
+ * by adding ourselves to the partition's halted CPU
+ * bitmap. This allows other CPUs to find/awaken us when
+ * work becomes available.
+ */
+ if (cpup->cpu_flags & CPU_OFFLINE || ncpus == 1)
+ hset_update = 0;
+
+ /*
+ * Add ourselves to the partition's halted CPUs bitmask
+ * and set our HALTED flag, if necessary.
+ *
+ * When a thread becomes runnable, it is placed on the queue
+ * and then the halted cpuset is checked to determine who
+ * (if anyone) should be awoken. We therefore need to first
+ * add ourselves to the halted cpuset, and and then check if there
+ * is any work available.
+ *
+ * Note that memory barriers after updating the HALTED flag
+ * are not necessary since an atomic operation (updating the bitmap)
+ * immediately follows. On x86 the atomic operation acts as a
+ * memory barrier for the update of cpu_disp_flags.
+ */
+ if (hset_update) {
+ cpup->cpu_disp_flags |= CPU_DISP_HALTED;
+ CPUSET_ATOMIC_ADD(cp->cp_mach->mc_haltset, cpun);
+ }
+
+ /*
+ * Check to make sure there's really nothing to do.
+ * Work destined for this CPU may become available after
+ * this check. We'll be notified through the clearing of our
+ * bit in the halted CPU bitmask, and a write to our mcpu_mwait.
+ *
+ * disp_anywork() checks disp_nrunnable, so we do not have to later.
+ */
+ if (disp_anywork()) {
+ if (hset_update) {
+ cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
+ CPUSET_ATOMIC_DEL(cp->cp_mach->mc_haltset, cpun);
+ }
+ return;
+ }
+
+ /*
+ * We're on our way to being halted.
+ * To avoid a lost wakeup, arm the monitor before checking if another
+ * cpu wrote to mcpu_mwait to wake us up.
+ */
+ i86_monitor(mcpu_mwait, 0, 0);
+ if (*mcpu_mwait == MWAIT_HALTED) {
+ tlb_going_idle();
+ i86_mwait(0, 0);
+ tlb_service();
+ }
+
+ /*
+ * We're no longer halted
+ */
+ if (hset_update) {
+ cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
+ CPUSET_ATOMIC_DEL(cp->cp_mach->mc_haltset, cpun);
+ }
+}
+
+/*
+ * If "cpu" is halted in mwait, then wake it up clearing its halted bit in
+ * advance. Otherwise, see if other CPUs in the cpu partition are halted and
+ * need to be woken up so that they can steal the thread we placed on this CPU.
+ * This function is only used on MP systems.
+ */
+static void
+cpu_wakeup_mwait(cpu_t *cp, int bound)
+{
+ cpupart_t *cpu_part;
+ uint_t cpu_found;
+ int result;
+
+ cpu_part = cp->cpu_part;
+
+ /*
+ * Clear the halted bit for that CPU since it will be woken up
+ * in a moment.
+ */
+ if (CPU_IN_SET(cpu_part->cp_mach->mc_haltset, cp->cpu_id)) {
+ /*
+ * Clear the halted bit for that CPU since it will be
+ * poked in a moment.
+ */
+ CPUSET_ATOMIC_DEL(cpu_part->cp_mach->mc_haltset, cp->cpu_id);
+ /*
+ * We may find the current CPU present in the halted cpuset
+ * if we're in the context of an interrupt that occurred
+ * before we had a chance to clear our bit in cpu_idle().
+ * Waking ourself is obviously unnecessary, since if
+ * we're here, we're not halted.
+ *
+ * monitor/mwait wakeup via writing to our cache line is
+ * harmless and less expensive than always checking if we
+ * are waking ourself which is an uncommon case.
+ */
+ MWAIT_WAKEUP(cp); /* write to monitored line */
+ return;
+ } else {
+ /*
+ * This cpu isn't halted, but it's idle or undergoing a
+ * context switch. No need to awaken anyone else.
+ */
+ if (cp->cpu_thread == cp->cpu_idle_thread ||
+ cp->cpu_disp_flags & CPU_DISP_DONTSTEAL)
+ return;
+ }
+
+ /*
+ * No need to wake up other CPUs if the thread we just enqueued
+ * is bound.
+ */
+ if (bound)
+ return;
+
+
+ /*
+ * See if there's any other halted CPUs. If there are, then
+ * select one, and awaken it.
+ * It's possible that after we find a CPU, somebody else
+ * will awaken it before we get the chance.
+ * In that case, look again.
+ */
+ do {
+ CPUSET_FIND(cpu_part->cp_mach->mc_haltset, cpu_found);
+ if (cpu_found == CPUSET_NOTINSET)
+ return;
+
+ ASSERT(cpu_found >= 0 && cpu_found < NCPU);
+ CPUSET_ATOMIC_XDEL(cpu_part->cp_mach->mc_haltset, cpu_found,
+ result);
+ } while (result < 0);
+
+ /*
+ * Do not check if cpu_found is ourself as monitor/mwait wakeup is
+ * cheap.
+ */
+ MWAIT_WAKEUP(cpu[cpu_found]); /* write to monitored line */
+}
+
void (*cpu_pause_handler)(volatile char *) = NULL;
static int
@@ -474,7 +650,7 @@ mach_get_platform(int owner)
clt_opsp = (void **)mach_set[owner];
if (mach_ver[owner] == (ushort_t)PSM_INFO_VER01)
total_ops = sizeof (struct psm_ops_ver01) /
- sizeof (void (*)(void));
+ sizeof (void (*)(void));
else if (mach_ver[owner] == (ushort_t)PSM_INFO_VER01_1)
/* no psm_notify_func */
total_ops = OFFSETOF(struct psm_ops, psm_notify_func) /
@@ -534,16 +710,16 @@ mach_construct_info()
if (conflict_owner) {
/* remove all psm modules except uppc */
cmn_err(CE_WARN,
- "Conflicts detected on the following PSM modules:");
+ "Conflicts detected on the following PSM modules:");
mutex_enter(&psmsw_lock);
for (swp = psmsw->psw_forw; swp != psmsw; swp = swp->psw_forw) {
if (swp->psw_infop->p_owner == conflict_owner)
cmn_err(CE_WARN, "%s ",
- swp->psw_infop->p_mach_idstring);
+ swp->psw_infop->p_mach_idstring);
}
mutex_exit(&psmsw_lock);
cmn_err(CE_WARN,
- "Setting the system back to SINGLE processor mode!");
+ "Setting the system back to SINGLE processor mode!");
cmn_err(CE_WARN,
"Please edit /etc/mach to remove the invalid PSM module.");
return;
@@ -604,14 +780,58 @@ mach_init()
/*
* Initialize the dispatcher's function hooks
- * to enable CPU halting when idle
+ * to enable CPU halting when idle.
+ * Do not use monitor/mwait if idle_cpu_use_hlt is not set(spin idle).
+ * Allocate monitor/mwait buffer for cpu0.
*/
- if (idle_cpu_use_hlt)
- idle_cpu = cpu_idle;
+ if (idle_cpu_use_hlt) {
+ if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait) {
+ CPU->cpu_m.mcpu_mwait = mach_alloc_mwait(CPU);
+ idle_cpu = cpu_idle_mwait;
+ } else {
+ idle_cpu = cpu_idle;
+ }
+ }
mach_smpinit();
}
+/*
+ * Return a pointer to memory suitable for monitor/mwait use. Memory must be
+ * aligned as specified by cpuid (a cache line size).
+ */
+uint32_t *
+mach_alloc_mwait(cpu_t *cp)
+{
+ size_t mwait_size = cpuid_get_mwait_size(cp);
+ uint32_t *ret;
+
+ if (mwait_size < sizeof (uint32_t) || !ISP2(mwait_size))
+ panic("Can't handle mwait size %ld", (long)mwait_size);
+
+ /*
+ * kmem_alloc() returns cache line size aligned data for mwait_size
+ * allocations. mwait_size is currently cache line sized. Neither
+ * of these implementation details are guarantied to be true in the
+ * future.
+ *
+ * First try allocating mwait_size as kmem_alloc() currently returns
+ * correctly aligned memory. If kmem_alloc() does not return
+ * mwait_size aligned memory, then use mwait_size ROUNDUP.
+ */
+ ret = kmem_zalloc(mwait_size, KM_SLEEP);
+ if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
+ *ret = MWAIT_RUNNING;
+ return (ret);
+ } else {
+ kmem_free(ret, mwait_size);
+ ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
+ ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
+ *ret = MWAIT_RUNNING;
+ return (ret);
+ }
+}
+
static void
mach_smpinit(void)
{
@@ -674,7 +894,10 @@ mach_smpinit(void)
* when a thread becomes runnable.
*/
if (idle_cpu_use_hlt)
- disp_enq_thread = cpu_wakeup;
+ if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait)
+ disp_enq_thread = cpu_wakeup_mwait;
+ else
+ disp_enq_thread = cpu_wakeup;
if (pops->psm_disable_intr)
psm_disable_intr = pops->psm_disable_intr;
@@ -684,11 +907,11 @@ mach_smpinit(void)
psm_get_ipivect = pops->psm_get_ipivect;
(void) add_avintr((void *)NULL, XC_HI_PIL, xc_serv, "xc_hi_intr",
- (*pops->psm_get_ipivect)(XC_HI_PIL, PSM_INTR_IPI_HI),
- (caddr_t)X_CALL_HIPRI, NULL, NULL, NULL);
+ (*pops->psm_get_ipivect)(XC_HI_PIL, PSM_INTR_IPI_HI),
+ (caddr_t)X_CALL_HIPRI, NULL, NULL, NULL);
(void) add_avintr((void *)NULL, XC_MED_PIL, xc_serv, "xc_med_intr",
- (*pops->psm_get_ipivect)(XC_MED_PIL, PSM_INTR_IPI_LO),
- (caddr_t)X_CALL_MEDPRI, NULL, NULL, NULL);
+ (*pops->psm_get_ipivect)(XC_MED_PIL, PSM_INTR_IPI_LO),
+ (caddr_t)X_CALL_MEDPRI, NULL, NULL, NULL);
(void) (*pops->psm_get_ipivect)(XC_CPUPOKE_PIL, PSM_INTR_POKE);
}
@@ -1089,7 +1312,7 @@ mach_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
switch (intr_op) {
case PSM_INTR_OP_CHECK_MSI:
*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
- DDI_INTR_TYPE_MSIX);
+ DDI_INTR_TYPE_MSIX);
break;
case PSM_INTR_OP_ALLOC_VECTORS:
if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c
index 96a3a1e628..425afa7801 100644
--- a/usr/src/uts/i86pc/os/mp_startup.c
+++ b/usr/src/uts/i86pc/os/mp_startup.c
@@ -237,6 +237,9 @@ mp_startup_init(int cpun)
ASSERT(cpun < NCPU && cpu[cpun] == NULL);
cp = kmem_zalloc(sizeof (*cp), KM_SLEEP);
+ if (x86_feature & X86_MWAIT)
+ cp->cpu_m.mcpu_mwait = mach_alloc_mwait(CPU);
+
procp = curthread->t_procp;
mutex_enter(&cpu_lock);
@@ -1304,6 +1307,13 @@ mp_startup(void)
}
/*
+ * We do not support cpus with mixed monitor/mwait support if the
+ * boot cpu supports monitor/mwait.
+ */
+ if ((x86_feature & ~new_x86_feature) & X86_MWAIT)
+ panic("unsupported mixed cpu monitor/mwait support detected");
+
+ /*
* We could be more sophisticated here, and just mark the CPU
* as "faulted" but at this point we'll opt for the easier
* answer of dieing horribly. Provided the boot cpu is ok,
diff --git a/usr/src/uts/i86pc/sys/machcpuvar.h b/usr/src/uts/i86pc/sys/machcpuvar.h
index 894c089aaf..07406188f7 100644
--- a/usr/src/uts/i86pc/sys/machcpuvar.h
+++ b/usr/src/uts/i86pc/sys/machcpuvar.h
@@ -103,9 +103,14 @@ struct machcpu {
uint64_t mcpu_gdtpa; /* xen: GDT in physical address */
uint16_t mcpu_intr_pending; /* xen: pending interrupt levels */
+
+ volatile uint32_t *mcpu_mwait; /* MONITOR/MWAIT buffer */
};
#define NINTR_THREADS (LOCK_LEVEL-1) /* number of interrupt threads */
+#define MWAIT_HALTED (1) /* mcpu_mwait set when halting */
+#define MWAIT_RUNNING (0) /* mcpu_mwait set to wakeup */
+#define MWAIT_WAKEUP(cpu) (*((cpu)->cpu_m.mcpu_mwait) = MWAIT_RUNNING);
#endif /* _ASM */
diff --git a/usr/src/uts/i86pc/sys/machsystm.h b/usr/src/uts/i86pc/sys/machsystm.h
index 6132815959..8ab5c1b2dc 100644
--- a/usr/src/uts/i86pc/sys/machsystm.h
+++ b/usr/src/uts/i86pc/sys/machsystm.h
@@ -56,6 +56,7 @@ extern "C" {
extern void mach_cpu_idle(void);
extern void mach_cpu_halt(char *);
extern int mach_cpu_start(cpu_t *, void *);
+extern uint32_t *mach_alloc_mwait(cpu_t *cp);
extern int Cpudelay;
extern void setcpudelay(void);
diff --git a/usr/src/uts/intel/ia32/ml/i86_subr.s b/usr/src/uts/intel/ia32/ml/i86_subr.s
index 480de7b928..5ee0675b1b 100644
--- a/usr/src/uts/intel/ia32/ml/i86_subr.s
+++ b/usr/src/uts/intel/ia32/ml/i86_subr.s
@@ -664,6 +664,79 @@ __cpuid_insn(struct cpuid_regs *regs)
#endif /* __i386 */
#endif /* __lint */
+#if defined(__lint)
+
+/*ARGSUSED*/
+void
+i86_monitor(volatile uint32_t *addr, uint32_t extensions, uint32_t hints)
+{ return; }
+
+#else /* __lint */
+
+#if defined(__amd64)
+
+ ENTRY_NP(i86_monitor)
+ pushq %rbp
+ movq %rsp, %rbp
+ movq %rdi, %rax /* addr */
+ movq %rsi, %rcx /* extensions */
+ /* rdx contains input arg3: hints */
+ .byte 0x0f, 0x01, 0xc8 /* monitor */
+ leave
+ ret
+ SET_SIZE(i86_monitor)
+
+#elif defined(__i386)
+
+ENTRY_NP(i86_monitor)
+ pushl %ebp
+ movl %esp, %ebp
+ movl 0x4(%esp),%eax /* addr */
+ movl 0x8(%esp),%ecx /* extensions */
+ movl 0xc(%esp),%edx /* hints */
+ .byte 0x0f, 0x01, 0xc8 /* monitor */
+ leave
+ ret
+ SET_SIZE(i86_monitor)
+
+#endif /* __i386 */
+#endif /* __lint */
+
+#if defined(__lint)
+
+/*ARGSUSED*/
+void
+i86_mwait(uint32_t data, uint32_t extensions)
+{ return; }
+
+#else /* __lint */
+
+#if defined(__amd64)
+
+ ENTRY_NP(i86_mwait)
+ pushq %rbp
+ movq %rsp, %rbp
+ movq %rdi, %rax /* data */
+ movq %rsi, %rcx /* extensions */
+ .byte 0x0f, 0x01, 0xc9 /* mwait */
+ leave
+ ret
+ SET_SIZE(i86_mwait)
+
+#elif defined(__i386)
+
+ ENTRY_NP(i86_mwait)
+ pushl %ebp
+ movl %esp, %ebp
+ movl 0x4(%esp),%eax /* data */
+ movl 0x8(%esp),%ecx /* extensions */
+ .byte 0x0f, 0x01, 0xc9 /* mwait */
+ leave
+ ret
+ SET_SIZE(i86_mwait)
+
+#endif /* __i386 */
+#endif /* __lint */
#if defined(__lint)
diff --git a/usr/src/uts/intel/sys/cpu.h b/usr/src/uts/intel/sys/cpu.h
index 07a81e38de..8f4d5af138 100644
--- a/usr/src/uts/intel/sys/cpu.h
+++ b/usr/src/uts/intel/sys/cpu.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -39,6 +38,7 @@
* Include generic bustype cookies.
*/
#include <sys/bustypes.h>
+#include <sys/inttypes.h>
#if defined(__GNUC__) && defined(_ASM_INLINES) && defined(_KERNEL)
#include <asm/cpu.h>
#endif
@@ -52,6 +52,9 @@ extern void ht_pause(void);
extern void cli(void);
extern void sti(void);
extern void i86_halt(void);
+extern void i86_monitor(volatile uint32_t *addr, uint32_t extensions,
+ uint32_t hints);
+extern void i86_mwait(uint32_t data, uint32_t extensions);
/*
* Used to insert cpu-dependent instructions into spin loops
diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h
index d637f9b284..9e0bde9041 100644
--- a/usr/src/uts/intel/sys/x86_archext.h
+++ b/usr/src/uts/intel/sys/x86_archext.h
@@ -365,12 +365,13 @@ typedef struct mtrrvar {
#define X86_CX16 0x00080000
#define X86_CMP 0x00100000
#define X86_TSCP 0x00200000
+#define X86_MWAIT 0x00400000
#define X86_CPUID 0x01000000
#define FMT_X86_FEATURE \
"\20" \
"\31cpuid" \
- "\26tscp\25cmp\24cx16\23sse3\22nx\21asysc" \
+ "\27mwait\26tscp\25cmp\24cx16\23sse3\22nx\21asysc" \
"\20htt\17sse2\16sse\15sep\14pat\13cx8\12pae\11mca" \
"\10mmx\7cmov\6de\5pge\4mtrr\3msr\2tsc\1lgpg"
@@ -581,6 +582,7 @@ extern void add_cpunode2devtree(processorid_t, struct cpuid_info *);
extern void cpuid_get_addrsize(struct cpu *, uint_t *, uint_t *);
extern uint_t cpuid_get_dtlb_nent(struct cpu *, size_t);
+extern size_t cpuid_get_mwait_size(struct cpu *cpu);
extern uint_t workaround_errata(struct cpu *);