summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/Makefile.files1
-rw-r--r--usr/src/uts/common/os/cpu.c5
-rw-r--r--usr/src/uts/common/os/cpu_event.c1093
-rw-r--r--usr/src/uts/common/sys/cpu_event.h270
-rw-r--r--usr/src/uts/i86pc/os/cpupm/cpu_idle.c128
-rw-r--r--usr/src/uts/i86pc/os/intr.c10
-rw-r--r--usr/src/uts/i86pc/os/mp_machdep.c87
-rw-r--r--usr/src/uts/i86pc/os/mp_pc.c8
-rw-r--r--usr/src/uts/i86pc/os/mp_startup.c8
-rw-r--r--usr/src/uts/i86pc/os/startup.c8
-rw-r--r--usr/src/uts/i86pc/sys/cpu_idle.h10
-rw-r--r--usr/src/uts/i86pc/sys/machcpuvar.h1
-rw-r--r--usr/src/uts/i86pc/vm/hat_i86.c25
-rw-r--r--usr/src/uts/intel/sys/cpu.h11
14 files changed, 1568 insertions, 97 deletions
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 374dcce56e..6f9dc7d5a0 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -43,6 +43,7 @@ COMMON_CORE_OBJS += \
cmt.o \
cmt_policy.o \
cpu.o \
+ cpu_event.o \
cpu_intr.o \
cpu_pm.o \
cpupart.o \
diff --git a/usr/src/uts/common/os/cpu.c b/usr/src/uts/common/os/cpu.c
index 6ee6c941f7..d3d49aedf3 100644
--- a/usr/src/uts/common/os/cpu.c
+++ b/usr/src/uts/common/os/cpu.c
@@ -32,6 +32,7 @@
#include <sys/var.h>
#include <sys/thread.h>
#include <sys/cpuvar.h>
+#include <sys/cpu_event.h>
#include <sys/kstat.h>
#include <sys/uadmin.h>
#include <sys/systm.h>
@@ -143,7 +144,7 @@ cpu_t *cpu_inmotion;
/*
* Can be raised to suppress further weakbinding, which are instead
* satisfied by disabling preemption. Must be raised/lowered under cpu_lock,
- * while individual thread weakbinding synchronisation is done under thread
+ * while individual thread weakbinding synchronization is done under thread
* lock.
*/
int weakbindingbarrier;
@@ -2266,7 +2267,7 @@ cpu_info_kstat_update(kstat_t *ksp, int rw)
cpuid_get_ncore_per_chip(cp);
cpu_info_template.ci_pkg_core_id.value.l = cpuid_get_pkgcoreid(cp);
cpu_info_template.ci_max_cstates.value.l = cp->cpu_m.max_cstates;
- cpu_info_template.ci_curr_cstate.value.l = cp->cpu_m.curr_cstate;
+ cpu_info_template.ci_curr_cstate.value.l = cpu_idle_get_cpu_state(cp);
kstat_named_setstr(&cpu_info_template.ci_sktstr,
cpuid_getsocketstr(cp));
#endif
diff --git a/usr/src/uts/common/os/cpu_event.c b/usr/src/uts/common/os/cpu_event.c
new file mode 100644
index 0000000000..11162ccfc9
--- /dev/null
+++ b/usr/src/uts/common/os/cpu_event.c
@@ -0,0 +1,1093 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2009, Intel Corporation.
+ * All rights reserved.
+ */
+
+/*
+ * Introduction
+ * This file implements a CPU event notification mechanism to signal clients
+ * which are interested in CPU related events.
+ * Currently it only supports CPU idle state change events which will be
+ * triggered just before CPU entering hardware idle state and just after CPU
+ * wakes up from hardware idle state.
+ * Please refer to PSARC/2009/115 for detail information.
+ *
+ * Lock Strategy
+ * 1) cpu_idle_prop_busy/free are protected by cpu_idle_prop_lock.
+ * 2) No protection for cpu_idle_cb_state because it's per-CPU data.
+ * 3) cpu_idle_cb_busy is protected by cpu_idle_cb_lock.
+ * 4) cpu_idle_cb_array is protected by pause_cpus/start_cpus logic.
+ * 5) cpu_idle_cb_max/curr are protected by both cpu_idle_cb_lock and
+ * pause_cpus/start_cpus logic.
+ * We have optimized the algorithm for hot path on read side access.
+ * In the current algorithm, it's lock free on read side access.
+ * On write side, we use pause_cpus() to keep other CPUs in the pause thread,
+ * which will guarantee that no other threads will access
+ * cpu_idle_cb_max/curr/array data structure.
+ */
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/cpuvar.h>
+#include <sys/cpu.h>
+#include <sys/kmem.h>
+#include <sys/machcpuvar.h>
+#include <sys/sdt.h>
+#include <sys/sysmacros.h>
+#include <sys/synch.h>
+#include <sys/systm.h>
+#include <sys/sunddi.h>
+#if defined(__sparc)
+#include <sys/machsystm.h>
+#elif defined(__x86)
+#include <sys/archsystm.h>
+#endif
+#include <sys/cpu_event.h>
+
+/* Define normal state for CPU on different platforms. */
+#if defined(__x86)
+#define CPU_IDLE_STATE_NORMAL IDLE_STATE_C0
+#elif defined(__sparc)
+/*
+ * At the time of this implementation IDLE_STATE_NORMAL is defined
+ * in mach_startup.c, and not in a header file. So if we find it is
+ * undefined, then we set it to the value as defined in mach_startup.c
+ * Should it eventually be defined, we will pick it up.
+ */
+#ifndef IDLE_STATE_NORMAL
+#define IDLE_STATE_NORMAL 0
+#endif
+#define CPU_IDLE_STATE_NORMAL IDLE_STATE_NORMAL
+#endif
+
+/*
+ * To improve cache efficiency and avoid cache false sharing, CPU idle
+ * properties are grouped into cache lines as below:
+ * | CPU0 | CPU1 |.........| CPUn |
+ * | cache line 0 | cache line 1 |.........| cache line n |
+ * | v0 | ... | vm | v0 | ... | vm |.........| v0 | ... | vm |
+ * To access value of property m for CPU n, using following value as index:
+ * index = seq_id_of_CPUn * CPU_IDLE_VALUE_GROUP_SIZE + m.
+ */
+#define CPU_IDLE_VALUE_GROUP_SIZE \
+ (CPU_CACHE_COHERENCE_SIZE / sizeof (cpu_idle_prop_value_t))
+
+/* Get callback context handle for current CPU. */
+#define CPU_IDLE_GET_CTX(cp) \
+ ((cpu_idle_callback_context_t)(intptr_t)((cp)->cpu_seqid))
+
+/* Get CPU sequential id from ctx. */
+#define CPU_IDLE_CTX2CPUID(ctx) ((processorid_t)(intptr_t)(ctx))
+
+/* Compute index from callback context handle. */
+#define CPU_IDLE_CTX2IDX(ctx) \
+ (((int)(intptr_t)(ctx)) * CPU_IDLE_VALUE_GROUP_SIZE)
+
+#define CPU_IDLE_HDL2VALP(hdl, idx) \
+ (&((cpu_idle_prop_impl_t *)(hdl))->value[(idx)])
+
+/*
+ * When cpu_idle_cb_array is NULL or full, increase CPU_IDLE_ARRAY_CAPACITY_INC
+ * entries every time. Here we prefer linear growth instead of exponential.
+ */
+#define CPU_IDLE_ARRAY_CAPACITY_INC 0x10
+
+typedef struct cpu_idle_prop_impl {
+ cpu_idle_prop_value_t *value;
+ struct cpu_idle_prop_impl *next;
+ char *name;
+ cpu_idle_prop_update_t update;
+ void *private;
+ cpu_idle_prop_type_t type;
+ uint32_t refcnt;
+} cpu_idle_prop_impl_t;
+
+typedef struct cpu_idle_prop_item {
+ cpu_idle_prop_type_t type;
+ char *name;
+ cpu_idle_prop_update_t update;
+ void *arg;
+ cpu_idle_prop_handle_t handle;
+} cpu_idle_prop_item_t;
+
+/* Structure to maintain registered callbacks in list. */
+typedef struct cpu_idle_cb_impl {
+ struct cpu_idle_cb_impl *next;
+ cpu_idle_callback_t *callback;
+ void *argument;
+ int priority;
+} cpu_idle_cb_impl_t;
+
+/*
+ * Structure to maintain registered callbacks in priority order and also
+ * optimized for cache efficiency for reading access.
+ */
+typedef struct cpu_idle_cb_item {
+ cpu_idle_enter_cbfn_t enter;
+ cpu_idle_exit_cbfn_t exit;
+ void *arg;
+ cpu_idle_cb_impl_t *impl;
+} cpu_idle_cb_item_t;
+
+/* Per-CPU state aligned to CPU_CACHE_COHERENCE_SIZE to avoid false sharing. */
+typedef union cpu_idle_cb_state {
+ struct {
+ int index;
+ boolean_t ready;
+ cpu_idle_prop_value_t *idle_state;
+ cpu_idle_prop_value_t *enter_ts;
+ cpu_idle_prop_value_t *exit_ts;
+ cpu_idle_prop_value_t *last_idle;
+ cpu_idle_prop_value_t *last_busy;
+ cpu_idle_prop_value_t *total_idle;
+ cpu_idle_prop_value_t *total_busy;
+ cpu_idle_prop_value_t *intr_cnt;
+ } v;
+#ifdef _LP64
+ char align[2 * CPU_CACHE_COHERENCE_SIZE];
+#else
+ char align[CPU_CACHE_COHERENCE_SIZE];
+#endif
+} cpu_idle_cb_state_t;
+
+static kmutex_t cpu_idle_prop_lock;
+static cpu_idle_prop_impl_t *cpu_idle_prop_busy = NULL;
+static cpu_idle_prop_impl_t *cpu_idle_prop_free = NULL;
+
+static kmutex_t cpu_idle_cb_lock;
+static cpu_idle_cb_impl_t *cpu_idle_cb_busy = NULL;
+static cpu_idle_cb_item_t *cpu_idle_cb_array = NULL;
+static int cpu_idle_cb_curr = 0;
+static int cpu_idle_cb_max = 0;
+
+static cpu_idle_cb_state_t *cpu_idle_cb_state;
+
+static int cpu_idle_prop_update_intr_cnt(void *arg, uint64_t seqnum,
+ cpu_idle_prop_value_t *valp);
+
+static cpu_idle_prop_item_t cpu_idle_prop_array[] = {
+ {
+ CPU_IDLE_PROP_TYPE_INTPTR, CPU_IDLE_PROP_IDLE_STATE,
+ NULL, NULL, NULL
+ },
+ {
+ CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_ENTER_TIMESTAMP,
+ NULL, NULL, NULL
+ },
+ {
+ CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_EXIT_TIMESTAMP,
+ NULL, NULL, NULL
+ },
+ {
+ CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_LAST_IDLE_TIME,
+ NULL, NULL, NULL
+ },
+ {
+ CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_LAST_BUSY_TIME,
+ NULL, NULL, NULL
+ },
+ {
+ CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_TOTAL_IDLE_TIME,
+ NULL, NULL, NULL
+ },
+ {
+ CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_TOTAL_BUSY_TIME,
+ NULL, NULL, NULL
+ },
+ {
+ CPU_IDLE_PROP_TYPE_UINT64, CPU_IDLE_PROP_INTERRUPT_COUNT,
+ cpu_idle_prop_update_intr_cnt, NULL, NULL
+ },
+};
+
+#define CPU_IDLE_PROP_IDX_IDLE_STATE 0
+#define CPU_IDLE_PROP_IDX_ENTER_TS 1
+#define CPU_IDLE_PROP_IDX_EXIT_TS 2
+#define CPU_IDLE_PROP_IDX_LAST_IDLE 3
+#define CPU_IDLE_PROP_IDX_LAST_BUSY 4
+#define CPU_IDLE_PROP_IDX_TOTAL_IDLE 5
+#define CPU_IDLE_PROP_IDX_TOTAL_BUSY 6
+#define CPU_IDLE_PROP_IDX_INTR_CNT 7
+
+/*ARGSUSED*/
+static void
+cpu_idle_dtrace_enter(void *arg, cpu_idle_callback_context_t ctx,
+ cpu_idle_check_wakeup_t check_func, void *check_arg)
+{
+ int state;
+
+ state = cpu_idle_prop_get_intptr(
+ cpu_idle_prop_array[CPU_IDLE_PROP_IDX_IDLE_STATE].handle, ctx);
+ DTRACE_PROBE1(idle__state__transition, uint_t, state);
+}
+
+/*ARGSUSED*/
+static void
+cpu_idle_dtrace_exit(void *arg, cpu_idle_callback_context_t ctx, int flag)
+{
+ DTRACE_PROBE1(idle__state__transition, uint_t, CPU_IDLE_STATE_NORMAL);
+}
+
+static cpu_idle_callback_handle_t cpu_idle_cb_handle_dtrace;
+static cpu_idle_callback_t cpu_idle_callback_dtrace = {
+ CPU_IDLE_CALLBACK_VERS,
+ cpu_idle_dtrace_enter,
+ cpu_idle_dtrace_exit,
+};
+
+#if defined(__x86) && !defined(__xpv)
+extern void tlb_going_idle(void);
+extern void tlb_service(void);
+
+static cpu_idle_callback_handle_t cpu_idle_cb_handle_tlb;
+static cpu_idle_callback_t cpu_idle_callback_tlb = {
+ CPU_IDLE_CALLBACK_VERS,
+ (cpu_idle_enter_cbfn_t)tlb_going_idle,
+ (cpu_idle_exit_cbfn_t)tlb_service,
+};
+#endif
+
+void
+cpu_event_init(void)
+{
+ int i, idx;
+ size_t sz;
+ intptr_t buf;
+ cpu_idle_cb_state_t *sp;
+ cpu_idle_prop_item_t *ip;
+
+ mutex_init(&cpu_idle_cb_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&cpu_idle_prop_lock, NULL, MUTEX_DRIVER, NULL);
+
+ /* Create internal properties. */
+ for (i = 0, ip = cpu_idle_prop_array;
+ i < sizeof (cpu_idle_prop_array) / sizeof (cpu_idle_prop_array[0]);
+ i++, ip++) {
+ (void) cpu_idle_prop_create_property(ip->name, ip->type,
+ ip->update, ip->arg, &ip->handle);
+ ASSERT(ip->handle != NULL);
+ }
+
+ /* Allocate buffer and align to CPU_CACHE_COHERENCE_SIZE. */
+ sz = sizeof (cpu_idle_cb_state_t) * max_ncpus;
+ sz += CPU_CACHE_COHERENCE_SIZE;
+ buf = (intptr_t)kmem_zalloc(sz, KM_SLEEP);
+ cpu_idle_cb_state = (cpu_idle_cb_state_t *)P2ROUNDUP(buf,
+ CPU_CACHE_COHERENCE_SIZE);
+
+ /* Cache frequently used property value pointers. */
+ for (sp = cpu_idle_cb_state, i = 0; i < max_ncpus; i++, sp++) {
+ idx = CPU_IDLE_CTX2IDX(i);
+#define ___INIT_P(f, i) \
+ sp->v.f = CPU_IDLE_HDL2VALP(cpu_idle_prop_array[(i)].handle, idx)
+ ___INIT_P(idle_state, CPU_IDLE_PROP_IDX_IDLE_STATE);
+ ___INIT_P(enter_ts, CPU_IDLE_PROP_IDX_ENTER_TS);
+ ___INIT_P(exit_ts, CPU_IDLE_PROP_IDX_EXIT_TS);
+ ___INIT_P(last_idle, CPU_IDLE_PROP_IDX_LAST_IDLE);
+ ___INIT_P(last_busy, CPU_IDLE_PROP_IDX_LAST_BUSY);
+ ___INIT_P(total_idle, CPU_IDLE_PROP_IDX_TOTAL_IDLE);
+ ___INIT_P(total_busy, CPU_IDLE_PROP_IDX_TOTAL_BUSY);
+ ___INIT_P(last_idle, CPU_IDLE_PROP_IDX_INTR_CNT);
+#undef ___INIT_P
+ }
+
+ /* Register built-in callbacks. */
+ if (cpu_idle_register_callback(CPU_IDLE_CB_PRIO_DTRACE,
+ &cpu_idle_callback_dtrace, NULL, &cpu_idle_cb_handle_dtrace) != 0) {
+ cmn_err(CE_PANIC,
+ "cpu_idle: failed to register callback for dtrace.");
+ }
+#if defined(__x86) && !defined(__xpv)
+ if (cpu_idle_register_callback(CPU_IDLE_CB_PRIO_TLB,
+ &cpu_idle_callback_tlb, NULL, &cpu_idle_cb_handle_tlb) != 0) {
+ cmn_err(CE_PANIC,
+ "cpu_idle: failed to register callback for tlb_flush.");
+ }
+#endif
+}
+
+void
+cpu_event_init_cpu(cpu_t *cp)
+{
+ ASSERT(cp->cpu_seqid < max_ncpus);
+ cpu_idle_cb_state[cp->cpu_seqid].v.ready = B_FALSE;
+}
+
+void
+cpu_event_fini_cpu(cpu_t *cp)
+{
+ ASSERT(cp->cpu_seqid < max_ncpus);
+ cpu_idle_cb_state[cp->cpu_seqid].v.ready = B_FALSE;
+}
+
+static void
+cpu_idle_insert_callback(cpu_idle_cb_impl_t *cip)
+{
+ int unlock = 0, unpause = 0;
+ int i, cnt_new = 0, cnt_old = 0;
+ char *buf_new = NULL, *buf_old = NULL;
+
+ ASSERT(MUTEX_HELD(&cpu_idle_cb_lock));
+
+ /*
+ * Expand array if it's full.
+ * Memory must be allocated out of pause/start_cpus() scope because
+ * kmem_zalloc() can't be called with KM_SLEEP flag within that scope.
+ */
+ if (cpu_idle_cb_curr == cpu_idle_cb_max) {
+ cnt_new = cpu_idle_cb_max + CPU_IDLE_ARRAY_CAPACITY_INC;
+ buf_new = (char *)kmem_zalloc(cnt_new *
+ sizeof (cpu_idle_cb_item_t), KM_SLEEP);
+ }
+
+ /* Try to acquire cpu_lock if not held yet. */
+ if (!MUTEX_HELD(&cpu_lock)) {
+ mutex_enter(&cpu_lock);
+ unlock = 1;
+ }
+ /*
+ * Pause all other CPUs (and let them run pause thread).
+ * It's guaranteed that no other threads will access cpu_idle_cb_array
+ * after pause_cpus().
+ */
+ if (!cpus_paused()) {
+ pause_cpus(NULL);
+ unpause = 1;
+ }
+
+ /* Copy content to new buffer if needed. */
+ if (buf_new != NULL) {
+ buf_old = (char *)cpu_idle_cb_array;
+ cnt_old = cpu_idle_cb_max;
+ if (buf_old != NULL) {
+ ASSERT(cnt_old != 0);
+ bcopy(cpu_idle_cb_array, buf_new,
+ sizeof (cpu_idle_cb_item_t) * cnt_old);
+ }
+ cpu_idle_cb_array = (cpu_idle_cb_item_t *)buf_new;
+ cpu_idle_cb_max = cnt_new;
+ }
+
+ /* Insert into array according to priority. */
+ ASSERT(cpu_idle_cb_curr < cpu_idle_cb_max);
+ for (i = cpu_idle_cb_curr; i > 0; i--) {
+ if (cpu_idle_cb_array[i - 1].impl->priority >= cip->priority) {
+ break;
+ }
+ cpu_idle_cb_array[i] = cpu_idle_cb_array[i - 1];
+ }
+ cpu_idle_cb_array[i].arg = cip->argument;
+ cpu_idle_cb_array[i].enter = cip->callback->idle_enter;
+ cpu_idle_cb_array[i].exit = cip->callback->idle_exit;
+ cpu_idle_cb_array[i].impl = cip;
+ cpu_idle_cb_curr++;
+
+ /* Resume other CPUs from paused state if needed. */
+ if (unpause) {
+ start_cpus();
+ }
+ if (unlock) {
+ mutex_exit(&cpu_lock);
+ }
+
+ /* Free old resource if needed. */
+ if (buf_old != NULL) {
+ ASSERT(cnt_old != 0);
+ kmem_free(buf_old, cnt_old * sizeof (cpu_idle_cb_item_t));
+ }
+}
+
+static void
+cpu_idle_remove_callback(cpu_idle_cb_impl_t *cip)
+{
+ int i, found = 0;
+ int unlock = 0, unpause = 0;
+ cpu_idle_cb_state_t *sp;
+
+ ASSERT(MUTEX_HELD(&cpu_idle_cb_lock));
+
+ /* Try to acquire cpu_lock if not held yet. */
+ if (!MUTEX_HELD(&cpu_lock)) {
+ mutex_enter(&cpu_lock);
+ unlock = 1;
+ }
+ /*
+ * Pause all other CPUs.
+ * It's guaranteed that no other threads will access cpu_idle_cb_array
+ * after pause_cpus().
+ */
+ if (!cpus_paused()) {
+ pause_cpus(NULL);
+ unpause = 1;
+ }
+
+ /* Remove cip from array. */
+ for (i = 0; i < cpu_idle_cb_curr; i++) {
+ if (found == 0) {
+ if (cpu_idle_cb_array[i].impl == cip) {
+ found = 1;
+ }
+ } else {
+ cpu_idle_cb_array[i - 1] = cpu_idle_cb_array[i];
+ }
+ }
+ ASSERT(found != 0);
+ cpu_idle_cb_curr--;
+
+ /*
+ * Reset property ready flag for all CPUs if no registered callback
+ * left because cpu_idle_enter/exit will stop updating property if
+ * there's no callback registered.
+ */
+ if (cpu_idle_cb_curr == 0) {
+ for (sp = cpu_idle_cb_state, i = 0; i < max_ncpus; i++, sp++) {
+ sp->v.ready = B_FALSE;
+ }
+ }
+
+ /* Resume other CPUs from paused state if needed. */
+ if (unpause) {
+ start_cpus();
+ }
+ if (unlock) {
+ mutex_exit(&cpu_lock);
+ }
+}
+
+int
+cpu_idle_register_callback(uint_t prio, cpu_idle_callback_t *cbp,
+ void *arg, cpu_idle_callback_handle_t *hdlp)
+{
+ cpu_idle_cb_state_t *sp;
+ cpu_idle_cb_impl_t *cip = NULL;
+
+ /* First validate parameters. */
+ ASSERT(!CPU_ON_INTR(CPU));
+ ASSERT(CPU->cpu_seqid < max_ncpus);
+ sp = &cpu_idle_cb_state[CPU->cpu_seqid];
+ if (sp->v.index != 0) {
+ cmn_err(CE_NOTE,
+ "!cpu_event: register_callback called from callback.");
+ return (EBUSY);
+ } else if (cbp == NULL || hdlp == NULL) {
+ cmn_err(CE_NOTE,
+ "!cpu_event: NULL parameters in register_callback.");
+ return (EINVAL);
+ } else if (prio < CPU_IDLE_CB_PRIO_LOW_BASE ||
+ prio >= CPU_IDLE_CB_PRIO_RESV_BASE) {
+ cmn_err(CE_NOTE,
+ "!cpu_event: priority 0x%x out of range.", prio);
+ return (EINVAL);
+ } else if (cbp->version != CPU_IDLE_CALLBACK_VERS) {
+ cmn_err(CE_NOTE,
+ "!cpu_event: callback version %d is not supported.",
+ cbp->version);
+ return (EINVAL);
+ }
+
+ mutex_enter(&cpu_idle_cb_lock);
+ /* Check whether callback with priority exists if not dynamic. */
+ if (prio != CPU_IDLE_CB_PRIO_DYNAMIC) {
+ for (cip = cpu_idle_cb_busy; cip != NULL;
+ cip = cip->next) {
+ if (cip->priority == prio) {
+ mutex_exit(&cpu_idle_cb_lock);
+ cmn_err(CE_NOTE, "!cpu_event: callback with "
+ "priority 0x%x already exists.", prio);
+ return (EEXIST);
+ }
+ }
+ }
+
+ cip = kmem_zalloc(sizeof (*cip), KM_SLEEP);
+ cip->callback = cbp;
+ cip->argument = arg;
+ cip->priority = prio;
+ cip->next = cpu_idle_cb_busy;
+ cpu_idle_cb_busy = cip;
+ cpu_idle_insert_callback(cip);
+ mutex_exit(&cpu_idle_cb_lock);
+
+ *hdlp = (cpu_idle_callback_handle_t)cip;
+
+ return (0);
+}
+
+int
+cpu_idle_unregister_callback(cpu_idle_callback_handle_t hdl)
+{
+ int rc = ENODEV;
+ cpu_idle_cb_state_t *sp;
+ cpu_idle_cb_impl_t *ip, **ipp;
+
+ ASSERT(!CPU_ON_INTR(CPU));
+ ASSERT(CPU->cpu_seqid < max_ncpus);
+ sp = &cpu_idle_cb_state[CPU->cpu_seqid];
+ if (sp->v.index != 0) {
+ cmn_err(CE_NOTE,
+ "!cpu_event: unregister_callback called from callback.");
+ return (EBUSY);
+ } else if (hdl == NULL) {
+ cmn_err(CE_NOTE,
+ "!cpu_event: hdl is NULL in unregister_callback.");
+ return (EINVAL);
+ }
+
+ ip = (cpu_idle_cb_impl_t *)hdl;
+ mutex_enter(&cpu_idle_cb_lock);
+ for (ipp = &cpu_idle_cb_busy; *ipp != NULL; ipp = &(*ipp)->next) {
+ if (*ipp == ip) {
+ *ipp = ip->next;
+ cpu_idle_remove_callback(ip);
+ rc = 0;
+ break;
+ }
+ }
+ mutex_exit(&cpu_idle_cb_lock);
+
+ if (rc == 0) {
+ kmem_free(ip, sizeof (*ip));
+ } else {
+ cmn_err(CE_NOTE,
+ "!cpu_event: callback handle %p not found.", (void *)hdl);
+ }
+
+ return (rc);
+}
+
+static int
+cpu_idle_enter_state(cpu_idle_cb_state_t *sp, intptr_t state)
+{
+ sp->v.idle_state->cipv_intptr = state;
+ sp->v.enter_ts->cipv_hrtime = gethrtime_unscaled();
+ sp->v.last_busy->cipv_hrtime = sp->v.enter_ts->cipv_hrtime -
+ sp->v.exit_ts->cipv_hrtime;
+ sp->v.total_busy->cipv_hrtime += sp->v.last_busy->cipv_hrtime;
+ if (sp->v.ready == B_FALSE) {
+ sp->v.ready = B_TRUE;
+ return (0);
+ }
+
+ return (1);
+}
+
+static void
+cpu_idle_exit_state(cpu_idle_cb_state_t *sp)
+{
+ sp->v.idle_state->cipv_intptr = CPU_IDLE_STATE_NORMAL;
+ sp->v.exit_ts->cipv_hrtime = gethrtime_unscaled();
+ sp->v.last_idle->cipv_hrtime = sp->v.exit_ts->cipv_hrtime -
+ sp->v.enter_ts->cipv_hrtime;
+ sp->v.total_idle->cipv_hrtime += sp->v.last_idle->cipv_hrtime;
+}
+
+/*ARGSUSED*/
+int
+cpu_idle_enter(int state, int flag,
+ cpu_idle_check_wakeup_t check_func, void *check_arg)
+{
+ int i;
+ cpu_idle_cb_item_t *cip;
+ cpu_idle_cb_state_t *sp;
+ cpu_idle_callback_context_t ctx;
+#if defined(__x86)
+ ulong_t iflags;
+#endif
+
+ ctx = CPU_IDLE_GET_CTX(CPU);
+ ASSERT(CPU->cpu_seqid < max_ncpus);
+ sp = &cpu_idle_cb_state[CPU->cpu_seqid];
+ ASSERT(sp->v.index == 0);
+
+ /*
+ * On x86, cpu_idle_enter can be called from idle thread with either
+ * interrupts enabled or disabled, so we need to make sure interrupts
+ * are disabled here.
+ * On SPARC, cpu_idle_enter will be called from idle thread with
+ * interrupt disabled, so no special handling necessary.
+ */
+#if defined(__x86)
+ iflags = intr_clear();
+#endif
+
+ /* Skip calling callback if state is not ready for current CPU. */
+ if (cpu_idle_enter_state(sp, state) == 0) {
+#if defined(__x86)
+ intr_restore(iflags);
+#endif
+ return (0);
+ }
+
+ for (i = 0, cip = cpu_idle_cb_array; i < cpu_idle_cb_curr; i++, cip++) {
+ /*
+ * Increase index so corresponding idle_exit callback
+ * will be invoked should interrupt happen during
+ * idle_enter callback.
+ */
+ sp->v.index++;
+
+ /* Call idle_enter callback function if it's not NULL. */
+ if (cip->enter != NULL) {
+ cip->enter(cip->arg, ctx, check_func, check_arg);
+
+ /*
+ * cpu_idle_enter runs with interrupts
+ * disabled, so the idle_enter callbacks will
+ * also be called with interrupts disabled.
+ * It is permissible for the callbacks to
+ * enable the interrupts, if they can also
+ * handle the condition if the interrupt
+ * occurs.
+ *
+ * However, if an interrupt occurs and we
+ * return here without dealing with it, we
+ * return to the cpu_idle_enter() caller
+ * with an EBUSY, and the caller will not
+ * enter the idle state.
+ *
+ * We detect the interrupt, by checking the
+ * index value of the state pointer. If it
+ * is not the index we incremented above,
+ * then it was cleared while processing
+ * the interrupt.
+ *
+ * Also note, that at this point of the code
+ * the normal index value will be one greater
+ * than the variable 'i' in the loop, as it
+ * hasn't yet been incremented.
+ */
+ if (sp->v.index != i + 1) {
+#if defined(__x86)
+ intr_restore(iflags);
+#endif
+ return (EBUSY);
+ }
+ }
+ }
+#if defined(__x86)
+ intr_restore(iflags);
+#endif
+
+ return (0);
+}
+
+void
+cpu_idle_exit(int flag)
+{
+ int i;
+ cpu_idle_cb_item_t *cip;
+ cpu_idle_cb_state_t *sp;
+ cpu_idle_callback_context_t ctx;
+#if defined(__x86)
+ ulong_t iflags;
+#endif
+
+ ASSERT(CPU->cpu_seqid < max_ncpus);
+ sp = &cpu_idle_cb_state[CPU->cpu_seqid];
+
+#if defined(__sparc)
+ /*
+ * On SPARC, cpu_idle_exit will only be called from idle thread
+ * with interrupt disabled.
+ */
+
+ if (sp->v.index != 0) {
+ ctx = CPU_IDLE_GET_CTX(CPU);
+ cpu_idle_exit_state(sp);
+ for (i = sp->v.index - 1; i >= 0; i--) {
+ cip = &cpu_idle_cb_array[i];
+ if (cip->exit != NULL) {
+ cip->exit(cip->arg, ctx, flag);
+ }
+ }
+ sp->v.index = 0;
+ }
+#elif defined(__x86)
+ /*
+ * On x86, cpu_idle_exit will be called from idle thread or interrupt
+ * handler. When called from interrupt handler, interrupts will be
+ * disabled. When called from idle thread, interrupts may be disabled
+ * or enabled.
+ */
+
+ /* Called from interrupt, interrupts are already disabled. */
+ if (flag & CPU_IDLE_CB_FLAG_INTR) {
+ /*
+ * return if cpu_idle_exit already called or
+ * there is no registered callback.
+ */
+ if (sp->v.index == 0) {
+ return;
+ }
+ ctx = CPU_IDLE_GET_CTX(CPU);
+ cpu_idle_exit_state(sp);
+ for (i = sp->v.index - 1; i >= 0; i--) {
+ cip = &cpu_idle_cb_array[i];
+ if (cip->exit != NULL) {
+ cip->exit(cip->arg, ctx, flag);
+ }
+ }
+ sp->v.index = 0;
+
+ /* Called from idle thread, need to disable interrupt. */
+ } else {
+ iflags = intr_clear();
+ if (sp->v.index != 0) {
+ ctx = CPU_IDLE_GET_CTX(CPU);
+ cpu_idle_exit_state(sp);
+ for (i = sp->v.index - 1; i >= 0; i--) {
+ cip = &cpu_idle_cb_array[i];
+ if (cip->exit != NULL) {
+ cip->exit(cip->arg, ctx, flag);
+ }
+ }
+ sp->v.index = 0;
+ }
+ intr_restore(iflags);
+ }
+#endif
+}
+
+cpu_idle_callback_context_t
+cpu_idle_get_context(void)
+{
+ return (CPU_IDLE_GET_CTX(CPU));
+}
+
+/*
+ * Allocate property structure in group of CPU_IDLE_VALUE_GROUP_SIZE to improve
+ * cache efficiency. To simplify implementation, allocated memory for property
+ * structure won't be freed.
+ */
+static void
+cpu_idle_prop_allocate_impl(void)
+{
+ int i;
+ size_t sz;
+ intptr_t buf;
+ cpu_idle_prop_impl_t *prop;
+ cpu_idle_prop_value_t *valp;
+
+ ASSERT(!CPU_ON_INTR(CPU));
+ prop = kmem_zalloc(sizeof (*prop) * CPU_IDLE_VALUE_GROUP_SIZE,
+ KM_SLEEP);
+ sz = sizeof (*valp) * CPU_IDLE_VALUE_GROUP_SIZE * max_ncpus;
+ sz += CPU_CACHE_COHERENCE_SIZE;
+ buf = (intptr_t)kmem_zalloc(sz, KM_SLEEP);
+ valp = (cpu_idle_prop_value_t *)P2ROUNDUP(buf,
+ CPU_CACHE_COHERENCE_SIZE);
+
+ for (i = 0; i < CPU_IDLE_VALUE_GROUP_SIZE; i++, prop++, valp++) {
+ prop->value = valp;
+ prop->next = cpu_idle_prop_free;
+ cpu_idle_prop_free = prop;
+ }
+}
+
+int
+cpu_idle_prop_create_property(const char *name, cpu_idle_prop_type_t type,
+ cpu_idle_prop_update_t update, void *arg, cpu_idle_prop_handle_t *hdlp)
+{
+ int rc = EEXIST;
+ cpu_idle_prop_impl_t *prop;
+
+ ASSERT(!CPU_ON_INTR(CPU));
+ if (name == NULL || hdlp == NULL) {
+ cmn_err(CE_WARN,
+ "!cpu_event: NULL parameters in create_property.");
+ return (EINVAL);
+ }
+
+ mutex_enter(&cpu_idle_prop_lock);
+ for (prop = cpu_idle_prop_busy; prop != NULL; prop = prop->next) {
+ if (strcmp(prop->name, name) == 0) {
+ cmn_err(CE_NOTE,
+ "!cpu_event: property %s already exists.", name);
+ break;
+ }
+ }
+ if (prop == NULL) {
+ if (cpu_idle_prop_free == NULL) {
+ cpu_idle_prop_allocate_impl();
+ }
+ ASSERT(cpu_idle_prop_free != NULL);
+ prop = cpu_idle_prop_free;
+ cpu_idle_prop_free = prop->next;
+ prop->next = cpu_idle_prop_busy;
+ cpu_idle_prop_busy = prop;
+
+ ASSERT(prop->value != NULL);
+ prop->name = strdup(name);
+ prop->type = type;
+ prop->update = update;
+ prop->private = arg;
+ prop->refcnt = 1;
+ *hdlp = prop;
+ rc = 0;
+ }
+ mutex_exit(&cpu_idle_prop_lock);
+
+ return (rc);
+}
+
+int
+cpu_idle_prop_destroy_property(cpu_idle_prop_handle_t hdl)
+{
+ int rc = ENODEV;
+ cpu_idle_prop_impl_t *prop, **propp;
+ cpu_idle_prop_value_t *valp;
+
+ ASSERT(!CPU_ON_INTR(CPU));
+ if (hdl == NULL) {
+ cmn_err(CE_WARN,
+ "!cpu_event: hdl is NULL in destroy_property.");
+ return (EINVAL);
+ }
+
+ prop = (cpu_idle_prop_impl_t *)hdl;
+ mutex_enter(&cpu_idle_prop_lock);
+ for (propp = &cpu_idle_prop_busy; *propp != NULL;
+ propp = &(*propp)->next) {
+ if (*propp == prop) {
+ ASSERT(prop->refcnt > 0);
+ if (atomic_cas_32(&prop->refcnt, 1, 0) == 1) {
+ *propp = prop->next;
+ strfree(prop->name);
+ valp = prop->value;
+ bzero(prop, sizeof (*prop));
+ prop->value = valp;
+ prop->next = cpu_idle_prop_free;
+ cpu_idle_prop_free = prop;
+ rc = 0;
+ } else {
+ rc = EBUSY;
+ }
+ break;
+ }
+ }
+ mutex_exit(&cpu_idle_prop_lock);
+
+ return (rc);
+}
+
+int
+cpu_idle_prop_create_handle(const char *name, cpu_idle_prop_handle_t *hdlp)
+{
+ int rc = ENODEV;
+ cpu_idle_prop_impl_t *prop;
+
+ ASSERT(!CPU_ON_INTR(CPU));
+ if (name == NULL || hdlp == NULL) {
+ cmn_err(CE_WARN,
+ "!cpu_event: NULL parameters in create_handle.");
+ return (EINVAL);
+ }
+
+ mutex_enter(&cpu_idle_prop_lock);
+ for (prop = cpu_idle_prop_busy; prop != NULL; prop = prop->next) {
+ if (strcmp(prop->name, name) == 0) {
+ /* Hold one refcount on object. */
+ ASSERT(prop->refcnt > 0);
+ atomic_inc_32(&prop->refcnt);
+ *hdlp = (cpu_idle_prop_handle_t)prop;
+ rc = 0;
+ break;
+ }
+ }
+ mutex_exit(&cpu_idle_prop_lock);
+
+ return (rc);
+}
+
+int
+cpu_idle_prop_destroy_handle(cpu_idle_prop_handle_t hdl)
+{
+ int rc = ENODEV;
+ cpu_idle_prop_impl_t *prop;
+
+ ASSERT(!CPU_ON_INTR(CPU));
+ if (hdl == NULL) {
+ cmn_err(CE_WARN,
+ "!cpu_event: hdl is NULL in destroy_handle.");
+ return (EINVAL);
+ }
+
+ mutex_enter(&cpu_idle_prop_lock);
+ for (prop = cpu_idle_prop_busy; prop != NULL; prop = prop->next) {
+ if (prop == hdl) {
+ /* Release refcnt held in create_handle. */
+ ASSERT(prop->refcnt > 1);
+ atomic_dec_32(&prop->refcnt);
+ rc = 0;
+ break;
+ }
+ }
+ mutex_exit(&cpu_idle_prop_lock);
+
+ return (rc);
+}
+
+cpu_idle_prop_type_t
+cpu_idle_prop_get_type(cpu_idle_prop_handle_t hdl)
+{
+ ASSERT(hdl != NULL);
+ return (((cpu_idle_prop_impl_t *)hdl)->type);
+}
+
+const char *
+cpu_idle_prop_get_name(cpu_idle_prop_handle_t hdl)
+{
+ ASSERT(hdl != NULL);
+ return (((cpu_idle_prop_impl_t *)hdl)->name);
+}
+
+int
+cpu_idle_prop_get_value(cpu_idle_prop_handle_t hdl,
+ cpu_idle_callback_context_t ctx, cpu_idle_prop_value_t *valp)
+{
+ int idx, rc = 0;
+ cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl;
+
+ ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus);
+ if (hdl == NULL || valp == NULL) {
+ cmn_err(CE_NOTE, "!cpu_event: NULL parameters in prop_get.");
+ return (EINVAL);
+ }
+ idx = CPU_IDLE_CTX2IDX(ctx);
+ if (prop->update != NULL) {
+ cpu_idle_cb_state_t *sp;
+
+ ASSERT(CPU->cpu_seqid < max_ncpus);
+ sp = &cpu_idle_cb_state[CPU->cpu_seqid];
+ /* CPU's idle enter timestamp as sequence number. */
+ rc = prop->update(prop->private,
+ (uint64_t)sp->v.enter_ts->cipv_hrtime, &prop->value[idx]);
+ }
+ if (rc == 0) {
+ *valp = prop->value[idx];
+ }
+
+ return (rc);
+}
+
+uint32_t
+cpu_idle_prop_get_uint32(cpu_idle_prop_handle_t hdl,
+ cpu_idle_callback_context_t ctx)
+{
+ int idx;
+ cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl;
+
+ ASSERT(hdl != NULL);
+ ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus);
+ idx = CPU_IDLE_CTX2IDX(ctx);
+ return (prop->value[idx].cipv_uint32);
+}
+
+uint64_t
+cpu_idle_prop_get_uint64(cpu_idle_prop_handle_t hdl,
+ cpu_idle_callback_context_t ctx)
+{
+ int idx;
+ cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl;
+
+ ASSERT(hdl != NULL);
+ ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus);
+ idx = CPU_IDLE_CTX2IDX(ctx);
+ return (prop->value[idx].cipv_uint64);
+}
+
+intptr_t
+cpu_idle_prop_get_intptr(cpu_idle_prop_handle_t hdl,
+ cpu_idle_callback_context_t ctx)
+{
+ int idx;
+ cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl;
+
+ ASSERT(hdl != NULL);
+ ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus);
+ idx = CPU_IDLE_CTX2IDX(ctx);
+ return (prop->value[idx].cipv_intptr);
+}
+
+hrtime_t
+cpu_idle_prop_get_hrtime(cpu_idle_prop_handle_t hdl,
+ cpu_idle_callback_context_t ctx)
+{
+ int idx;
+ cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl;
+
+ ASSERT(hdl != NULL);
+ ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus);
+ idx = CPU_IDLE_CTX2IDX(ctx);
+ return (prop->value[idx].cipv_hrtime);
+}
+
+void
+cpu_idle_prop_set_value(cpu_idle_prop_handle_t hdl,
+ cpu_idle_callback_context_t ctx, cpu_idle_prop_value_t val)
+{
+ int idx;
+ cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl;
+
+ ASSERT(hdl != NULL);
+ ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus);
+ idx = CPU_IDLE_CTX2IDX(ctx);
+ prop->value[idx] = val;
+}
+
+void
+cpu_idle_prop_set_all(cpu_idle_prop_handle_t hdl, cpu_idle_prop_value_t val)
+{
+ int i, idx;
+ cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl;
+
+ ASSERT(hdl != NULL);
+ for (i = 0; i < max_ncpus; i++) {
+ idx = CPU_IDLE_CTX2IDX(i);
+ prop->value[idx] = val;
+ }
+}
+
+/*ARGSUSED*/
+static int cpu_idle_prop_update_intr_cnt(void *arg, uint64_t seqnum,
+ cpu_idle_prop_value_t *valp)
+{
+ int i;
+ uint64_t val;
+
+ for (val = 0, i = 0; i < PIL_MAX; i++) {
+ val += CPU->cpu_stats.sys.intr[i];
+ }
+ valp->cipv_uint64 = val;
+
+ return (0);
+}
+
+uint_t
+cpu_idle_get_cpu_state(cpu_t *cp)
+{
+ ASSERT(cp != NULL && cp->cpu_seqid < max_ncpus);
+ return ((uint_t)cpu_idle_prop_get_uint32(
+ cpu_idle_prop_array[CPU_IDLE_PROP_IDX_IDLE_STATE].handle,
+ CPU_IDLE_GET_CTX(cp)));
+}
diff --git a/usr/src/uts/common/sys/cpu_event.h b/usr/src/uts/common/sys/cpu_event.h
new file mode 100644
index 0000000000..a636fd9a41
--- /dev/null
+++ b/usr/src/uts/common/sys/cpu_event.h
@@ -0,0 +1,270 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2009, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef _SYS_CPU_EVENT_H
+#define _SYS_CPU_EVENT_H
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+/*
+ * CPU idle notification callbacks are divided into three priority classes:
+ * 1. Statically assigned high priority callbacks.
+ * 2. Dynamically allocated normal priority callbacks.
+ * 3. Statically assigned low priority callbacks.
+ *
+ * All registered callbacks will be called in priority order from high
+ * to low just before CPU enters hardware idle state and from low to
+ * high just after CPU wakes from idle state.
+ *
+ * The high and low priority classes are designed to support hardware
+ * ordering requirements. A dynamically assigned priority allows the
+ * framework to choose the order in which the callback is processed.
+ * If a callback has no dependency on other callbacks, it should use
+ * dynamic priority to avoid priority conflicts.
+ *
+ * Note that the priority doesn't describe how important a callback
+ * is, but just the order in which they are processed. If a callback
+ * needs processing early in the idle notification cycle, it should
+ * have a higher priority. If it needs to be at the end, or early on
+ * the exit, then it should have a lower priority.
+ */
+
+#define CPU_IDLE_CB_PRIO_LOW_BASE 0x20000000U
+#define CPU_IDLE_CB_PRIO_DYN_BASE 0x40000000U
+#define CPU_IDLE_CB_PRIO_HIGH_BASE 0x40000001U
+#define CPU_IDLE_CB_PRIO_RESV_BASE 0x80000000U
+
+/*
+ * Indicating dynamic priority to cpu_idle_{un}register_callback().
+ */
+#define CPU_IDLE_CB_PRIO_DYNAMIC CPU_IDLE_CB_PRIO_DYN_BASE
+/* Priority assigned to dtrace probe callback. */
+#define CPU_IDLE_CB_PRIO_DTRACE (CPU_IDLE_CB_PRIO_LOW_BASE + 0xC000000)
+
+
+#ifdef __x86
+/* Priority assigned to TLB flush callback. */
+#define CPU_IDLE_CB_PRIO_TLB (CPU_IDLE_CB_PRIO_LOW_BASE + 0x100000)
+#endif
+
+/* Name of properties supported by CPU idle notification. */
+#define CPU_IDLE_PROP_IDLE_STATE "idle-state"
+#define CPU_IDLE_PROP_ENTER_TIMESTAMP "enter-ts"
+#define CPU_IDLE_PROP_EXIT_TIMESTAMP "exit-ts"
+#define CPU_IDLE_PROP_LAST_IDLE_TIME "last-idle-time"
+#define CPU_IDLE_PROP_LAST_BUSY_TIME "last-busy-time"
+#define CPU_IDLE_PROP_TOTAL_IDLE_TIME "total-idle-time"
+#define CPU_IDLE_PROP_TOTAL_BUSY_TIME "total-busy-time"
+#define CPU_IDLE_PROP_INTERRUPT_COUNT "interupt-count"
+
+/*
+ * sizeof(cpu_idle_prop_value_t) should be power of 2 to align on cache line.
+ */
+typedef union cpu_idle_prop_value {
+ intptr_t cipv_intptr;
+ uint32_t cipv_uint32;
+ uint64_t cipv_uint64;
+ hrtime_t cipv_hrtime;
+} cpu_idle_prop_value_t;
+
+typedef enum cpu_idle_prop_type {
+ CPU_IDLE_PROP_TYPE_INTPTR,
+ CPU_IDLE_PROP_TYPE_UINT32,
+ CPU_IDLE_PROP_TYPE_UINT64,
+ CPU_IDLE_PROP_TYPE_HRTIME,
+} cpu_idle_prop_type_t;
+
+typedef void *cpu_idle_callback_handle_t;
+typedef void *cpu_idle_callback_context_t;
+typedef void *cpu_idle_prop_handle_t;
+
+/*
+ * Function prototype for checking CPU wakeup events.
+ * If CPU has already been awakened, check_wakeup callback should call
+ * cpu_idle_exit() to notify CPU idle framework if it has been called yet.
+ */
+typedef void (* cpu_idle_check_wakeup_t)(void *arg);
+
+/*
+ * Function prototype for entering idle state notification callback.
+ * Callback for entering idle state notification must obey all constraints
+ * which apply to idle thread because it will be called in idle thread context.
+ * The callback will be called with interrupt disabled. The callback may enable
+ * interrupt if it can cooperate with corresponding idle_exit callback to
+ * handle interrupt happening after enabling interrupt. If idle_enter callback
+ * enables interrupt, the corresponding idle_exit callback may be called before
+ * returning from idle_enter callback.
+ */
+typedef void (* cpu_idle_enter_cbfn_t)(void *arg,
+ cpu_idle_callback_context_t ctx,
+ cpu_idle_check_wakeup_t check_func, void *check_arg);
+
+/*
+ * Function prototype for exiting idle state notification callback.
+ * Callback for exiting idle state notification will be called in idle thread
+ * context or interrupt context with interrupt disabled.
+ * There is a flag to distinguish the calling context.
+ * The callback must not try to enable interrupts.
+ */
+typedef void (* cpu_idle_exit_cbfn_t)(void *arg,
+ cpu_idle_callback_context_t ctx, int flag);
+
+#define CPU_IDLE_CB_FLAG_INTR 0x1 /* Called in interrupt context. */
+#define CPU_IDLE_CB_FLAG_IDLE 0x2 /* Called in idle thread context. */
+
+typedef struct cpu_idle_callback {
+ int version;
+ cpu_idle_enter_cbfn_t idle_enter;
+ cpu_idle_exit_cbfn_t idle_exit;
+} cpu_idle_callback_t;
+
+#define CPU_IDLE_CALLBACK_VER0 0
+#define CPU_IDLE_CALLBACK_VERS CPU_IDLE_CALLBACK_VER0
+
+/*
+ * Register a callback to be called when CPU idle state changes.
+ * All registered callbacks will be called in priority order from high to low
+ * when CPU enters idle state and from low to high when CPU leaves idle state.
+ * If CPU is predicted to sleep for a short time or be under heavy load,
+ * framework may skip calling registered callbacks when idle state changes to
+ * avoid overhead and reduce performance penalties.
+ * It's guaranteed that each exiting notification will be paired with each
+ * entering notification.
+ * Return zero on success and error code on failure.
+ * N.B.: this interface shouldn't be called from following conditions:
+ * 1) from callback.
+ */
+extern int cpu_idle_register_callback(uint_t prio, cpu_idle_callback_t *cbp,
+ void *arg, cpu_idle_callback_handle_t *hdlp);
+
+/*
+ * Un-register a registered callback.
+ * Return zero on success and error code on failure.
+ * N.B.: this interface shouldn't be called from following cases:
+ * 1) from callback.
+ */
+extern int cpu_idle_unregister_callback(cpu_idle_callback_handle_t hdl);
+
+/*
+ * Called by CPU idle handler to notify entering idle state.
+ * It should be called with interrupt disabled.
+ * state: platform specific information of idle state to enter.
+ * On x86, it's CPU C state.
+ * Idle thread should cancel entering hardware idle state if cpu_idle_enter
+ * returns non-zero value.
+ */
+extern int cpu_idle_enter(int state, int flag,
+ cpu_idle_check_wakeup_t check_func, void *check_arg);
+
+/*
+ * Called by CPU idle handler to notify exiting idle state.
+ * It should be called with interrupt disabled.
+ */
+extern void cpu_idle_exit(int flag);
+
+/*
+ * Get CPU idle notification context corresponding to current CPU.
+ */
+extern cpu_idle_callback_context_t cpu_idle_get_context(void);
+
+/*
+ * Prototype of function called to update property value on demand.
+ * The callback should update property value corresponding to current CPU.
+ */
+typedef int (* cpu_idle_prop_update_t)(void *arg, uint64_t seqnum,
+ cpu_idle_prop_value_t *valp);
+
+/*
+ * Create a property with name and type.
+ * If parameter update is not NULL, it will be called on demand to update
+ * value of property corresponding to current CPU.
+ * If parameter update is NULL, provider should call cpu_idle_property_set
+ * to update property value for each CPU.
+ * Return zero on success with handle stored in hdlp, otherwise error code.
+ */
+extern int cpu_idle_prop_create_property(const char *name,
+ cpu_idle_prop_type_t type, cpu_idle_prop_update_t update, void *arg,
+ cpu_idle_prop_handle_t *hdlp);
+
+/*
+ * Destroy property corresponding to hdl.
+ * Return zero on success, otherwise error code.
+ */
+extern int cpu_idle_prop_destroy_property(cpu_idle_prop_handle_t hdl);
+
+/*
+ * Create handle for property with name 'name'.
+ * Return zero on success with handle stored in hdlp, otherwise error code.
+ */
+extern int cpu_idle_prop_create_handle(const char *name,
+ cpu_idle_prop_handle_t *hdlp);
+
+/*
+ * Destroy property handle.
+ * Return zero on success, otherwise error code.
+ */
+extern int cpu_idle_prop_destroy_handle(cpu_idle_prop_handle_t hdl);
+
+/*
+ * CPU idle property manipulation functions.
+ * All cpu_idle_prop_get/set_xxx functions with argument ctx should only be used
+ * to manipulate properties associated with current CPU.
+ * Context ctx shouldn't be passed to other CPUs to manipulate properties.
+ */
+extern cpu_idle_prop_type_t cpu_idle_prop_get_type(cpu_idle_prop_handle_t hdl);
+extern const char *cpu_idle_prop_get_name(cpu_idle_prop_handle_t hdl);
+extern int cpu_idle_prop_get_value(cpu_idle_prop_handle_t hdl,
+ cpu_idle_callback_context_t ctx, cpu_idle_prop_value_t *valp);
+extern uint32_t cpu_idle_prop_get_uint32(cpu_idle_prop_handle_t hdl,
+ cpu_idle_callback_context_t ctx);
+extern uint64_t cpu_idle_prop_get_uint64(cpu_idle_prop_handle_t hdl,
+ cpu_idle_callback_context_t ctx);
+extern intptr_t cpu_idle_prop_get_intptr(cpu_idle_prop_handle_t hdl,
+ cpu_idle_callback_context_t ctx);
+extern hrtime_t cpu_idle_prop_get_hrtime(cpu_idle_prop_handle_t hdl,
+ cpu_idle_callback_context_t ctx);
+extern void cpu_idle_prop_set_value(cpu_idle_prop_handle_t hdl,
+ cpu_idle_callback_context_t ctx, cpu_idle_prop_value_t val);
+extern void cpu_idle_prop_set_all(cpu_idle_prop_handle_t hdl,
+ cpu_idle_prop_value_t val);
+
+extern uint_t cpu_idle_get_cpu_state(cpu_t *cp);
+
+extern void cpu_event_init(void);
+extern void cpu_event_init_cpu(cpu_t *cp);
+extern void cpu_event_fini_cpu(cpu_t *cp);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CPU_EVENT_H */
diff --git a/usr/src/uts/i86pc/os/cpupm/cpu_idle.c b/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
index 3cb7c3fac1..e8ff2ad634 100644
--- a/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
+++ b/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
@@ -36,6 +36,7 @@
#include <sys/cpu_acpi.h>
#include <sys/cpu_idle.h>
#include <sys/cpupm.h>
+#include <sys/cpu_event.h>
#include <sys/hpet.h>
#include <sys/archsystm.h>
#include <vm/hat_i86.h>
@@ -253,6 +254,74 @@ cstate_wakeup(cpu_t *cp, int bound)
}
/*
+ * Function called by CPU idle notification framework to check whether CPU
+ * has been awakened. It will be called with interrupt disabled.
+ * If CPU has been awakened, call cpu_idle_exit() to notify CPU idle
+ * notification framework.
+ */
+static void
+acpi_cpu_mwait_check_wakeup(void *arg)
+{
+ volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg;
+
+ ASSERT(arg != NULL);
+ if (*mcpu_mwait != MWAIT_HALTED) {
+ /*
+ * CPU has been awakened, notify CPU idle notification system.
+ */
+ cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
+ } else {
+ /*
+ * Toggle interrupt flag to detect pending interrupts.
+ * If interrupt happened, do_interrupt() will notify CPU idle
+ * notification framework so no need to call cpu_idle_exit()
+ * here.
+ */
+ sti();
+ SMT_PAUSE();
+ cli();
+ }
+}
+
+static void
+acpi_cpu_mwait_ipi_check_wakeup(void *arg)
+{
+ volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg;
+
+ ASSERT(arg != NULL);
+ if (*mcpu_mwait != MWAIT_WAKEUP_IPI) {
+ /*
+ * CPU has been awakened, notify CPU idle notification system.
+ */
+ cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
+ } else {
+ /*
+ * Toggle interrupt flag to detect pending interrupts.
+ * If interrupt happened, do_interrupt() will notify CPU idle
+ * notification framework so no need to call cpu_idle_exit()
+ * here.
+ */
+ sti();
+ SMT_PAUSE();
+ cli();
+ }
+}
+
+/*ARGSUSED*/
+static void
+acpi_cpu_check_wakeup(void *arg)
+{
+ /*
+ * Toggle interrupt flag to detect pending interrupts.
+ * If interrupt happened, do_interrupt() will notify CPU idle
+ * notification framework so no need to call cpu_idle_exit() here.
+ */
+ sti();
+ SMT_PAUSE();
+ cli();
+}
+
+/*
* enter deep c-state handler
*/
static void
@@ -267,6 +336,7 @@ acpi_cpu_cstate(cpu_acpi_cstate_t *cstate)
uint32_t cs_type = cstate->cs_type;
int hset_update = 1;
boolean_t using_timer;
+ cpu_idle_check_wakeup_t check_func = &acpi_cpu_check_wakeup;
/*
* Set our mcpu_mwait here, so we can tell if anyone tries to
@@ -274,10 +344,13 @@ acpi_cpu_cstate(cpu_acpi_cstate_t *cstate)
* attempt to set our mcpu_mwait until we add ourself to the haltset.
*/
if (mcpu_mwait) {
- if (type == ACPI_ADR_SPACE_SYSTEM_IO)
+ if (type == ACPI_ADR_SPACE_SYSTEM_IO) {
*mcpu_mwait = MWAIT_WAKEUP_IPI;
- else
+ check_func = &acpi_cpu_mwait_ipi_check_wakeup;
+ } else {
*mcpu_mwait = MWAIT_HALTED;
+ check_func = &acpi_cpu_mwait_check_wakeup;
+ }
}
/*
@@ -397,13 +470,14 @@ acpi_cpu_cstate(cpu_acpi_cstate_t *cstate)
*/
i86_monitor(mcpu_mwait, 0, 0);
if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == MWAIT_HALTED) {
- cpu_dtrace_idle_probe(CPU_ACPI_C1);
-
- tlb_going_idle();
- i86_mwait(0, 0);
- tlb_service();
-
- cpu_dtrace_idle_probe(CPU_ACPI_C0);
+ if (cpu_idle_enter(IDLE_STATE_C1, 0,
+ check_func, (void *)mcpu_mwait) == 0) {
+ if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) ==
+ MWAIT_HALTED) {
+ i86_mwait(0, 0);
+ }
+ cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
+ }
}
/*
@@ -416,8 +490,6 @@ acpi_cpu_cstate(cpu_acpi_cstate_t *cstate)
return;
}
- cpu_dtrace_idle_probe((uint_t)cs_type);
-
if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) {
/*
* We're on our way to being halted.
@@ -426,25 +498,31 @@ acpi_cpu_cstate(cpu_acpi_cstate_t *cstate)
*/
i86_monitor(mcpu_mwait, 0, 0);
if (*mcpu_mwait == MWAIT_HALTED) {
- uint32_t eax = cstate->cs_address;
- uint32_t ecx = 1;
-
- tlb_going_idle();
- i86_mwait(eax, ecx);
- tlb_service();
+ if (cpu_idle_enter((uint_t)cs_type, 0,
+ check_func, (void *)mcpu_mwait) == 0) {
+ if (*mcpu_mwait == MWAIT_HALTED) {
+ i86_mwait(cstate->cs_address, 1);
+ }
+ cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
+ }
}
} else if (type == ACPI_ADR_SPACE_SYSTEM_IO) {
uint32_t value;
ACPI_TABLE_FADT *gbl_FADT;
if (*mcpu_mwait == MWAIT_WAKEUP_IPI) {
- tlb_going_idle();
- (void) cpu_acpi_read_port(cstate->cs_address,
- &value, 8);
- acpica_get_global_FADT(&gbl_FADT);
- (void) cpu_acpi_read_port(
- gbl_FADT->XPmTimerBlock.Address, &value, 32);
- tlb_service();
+ if (cpu_idle_enter((uint_t)cs_type, 0,
+ check_func, (void *)mcpu_mwait) == 0) {
+ if (*mcpu_mwait == MWAIT_WAKEUP_IPI) {
+ (void) cpu_acpi_read_port(
+ cstate->cs_address, &value, 8);
+ acpica_get_global_FADT(&gbl_FADT);
+ (void) cpu_acpi_read_port(
+ gbl_FADT->XPmTimerBlock.Address,
+ &value, 32);
+ }
+ cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
+ }
}
}
@@ -455,8 +533,6 @@ acpi_cpu_cstate(cpu_acpi_cstate_t *cstate)
(void) cstate_use_timer(&lapic_expire, CSTATE_USING_LAT);
sti();
- cpu_dtrace_idle_probe(CPU_ACPI_C0);
-
/*
* We're no longer halted
*/
diff --git a/usr/src/uts/i86pc/os/intr.c b/usr/src/uts/i86pc/os/intr.c
index 2f4b66ddf2..18968c0721 100644
--- a/usr/src/uts/i86pc/os/intr.c
+++ b/usr/src/uts/i86pc/os/intr.c
@@ -25,6 +25,7 @@
*/
#include <sys/cpuvar.h>
+#include <sys/cpu_event.h>
#include <sys/regset.h>
#include <sys/psw.h>
#include <sys/types.h>
@@ -76,7 +77,7 @@ ulong_t laststi[NCPU];
/*
* This variable tracks the last place events were disabled on each cpu
- * it assists in debugging when asserts that interupts are enabled trip.
+ * it assists in debugging when asserts that interrupts are enabled trip.
*/
ulong_t lastcli[NCPU];
@@ -931,12 +932,7 @@ do_interrupt(struct regs *rp, trap_trace_rec_t *ttp)
ttp->ttr_vector = 0xff;
#endif /* TRAPTRACE */
-#if !defined(__xpv)
- /*
- * Handle any pending TLB flushing
- */
- tlb_service();
-#endif
+ cpu_idle_exit(CPU_IDLE_CB_FLAG_INTR);
/*
* If it's a softint go do it now.
diff --git a/usr/src/uts/i86pc/os/mp_machdep.c b/usr/src/uts/i86pc/os/mp_machdep.c
index ced26fb6a5..6c9cc3aec3 100644
--- a/usr/src/uts/i86pc/os/mp_machdep.c
+++ b/usr/src/uts/i86pc/os/mp_machdep.c
@@ -36,6 +36,7 @@
#include <sys/x86_archext.h>
#include <sys/cpupart.h>
#include <sys/cpuvar.h>
+#include <sys/cpu_event.h>
#include <sys/cmt.h>
#include <sys/cpu.h>
#include <sys/disp.h>
@@ -370,18 +371,28 @@ cpu_idle_adaptive(void)
(*CPU->cpu_m.mcpu_idle_cpu)();
}
-void
-cpu_dtrace_idle_probe(uint_t cstate)
+/*
+ * Function called by CPU idle notification framework to check whether CPU
+ * has been awakened. It will be called with interrupt disabled.
+ * If CPU has been awakened, call cpu_idle_exit() to notify CPU idle
+ * notification framework.
+ */
+/*ARGSUSED*/
+static void
+cpu_idle_check_wakeup(void *arg)
{
- cpu_t *cpup = CPU;
- struct machcpu *mcpu = &(cpup->cpu_m);
-
- mcpu->curr_cstate = cstate;
- DTRACE_PROBE1(idle__state__transition, uint_t, cstate);
+ /*
+ * Toggle interrupt flag to detect pending interrupts.
+ * If interrupt happened, do_interrupt() will notify CPU idle
+ * notification framework so no need to call cpu_idle_exit() here.
+ */
+ sti();
+ SMT_PAUSE();
+ cli();
}
/*
- * Idle the present CPU until awoken via an interrupt
+ * Idle the present CPU until wakened via an interrupt
*/
void
cpu_idle(void)
@@ -407,7 +418,7 @@ cpu_idle(void)
*
* When a thread becomes runnable, it is placed on the queue
* and then the halted CPU bitmap is checked to determine who
- * (if anyone) should be awoken. We therefore need to first
+ * (if anyone) should be awakened. We therefore need to first
* add ourselves to the bitmap, and and then check if there
* is any work available. The order is important to prevent a race
* that can lead to work languishing on a run queue somewhere while
@@ -479,11 +490,11 @@ cpu_idle(void)
return;
}
- cpu_dtrace_idle_probe(IDLE_STATE_C1);
-
- mach_cpu_idle();
-
- cpu_dtrace_idle_probe(IDLE_STATE_C0);
+ if (cpu_idle_enter(IDLE_STATE_C1, 0,
+ cpu_idle_check_wakeup, NULL) == 0) {
+ mach_cpu_idle();
+ cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
+ }
/*
* We're no longer halted
@@ -560,7 +571,37 @@ cpu_wakeup(cpu_t *cpu, int bound)
#ifndef __xpv
/*
- * Idle the present CPU until awoken via touching its monitored line
+ * Function called by CPU idle notification framework to check whether CPU
+ * has been awakened. It will be called with interrupt disabled.
+ * If CPU has been awakened, call cpu_idle_exit() to notify CPU idle
+ * notification framework.
+ */
+static void
+cpu_idle_mwait_check_wakeup(void *arg)
+{
+ volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg;
+
+ ASSERT(arg != NULL);
+ if (*mcpu_mwait != MWAIT_HALTED) {
+ /*
+ * CPU has been awakened, notify CPU idle notification system.
+ */
+ cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
+ } else {
+ /*
+ * Toggle interrupt flag to detect pending interrupts.
+ * If interrupt happened, do_interrupt() will notify CPU idle
+ * notification framework so no need to call cpu_idle_exit()
+ * here.
+ */
+ sti();
+ SMT_PAUSE();
+ cli();
+ }
+}
+
+/*
+ * Idle the present CPU until awakened via touching its monitored line
*/
void
cpu_idle_mwait(void)
@@ -632,13 +673,13 @@ cpu_idle_mwait(void)
*/
i86_monitor(mcpu_mwait, 0, 0);
if (*mcpu_mwait == MWAIT_HALTED) {
- cpu_dtrace_idle_probe(IDLE_STATE_C1);
-
- tlb_going_idle();
- i86_mwait(0, 0);
- tlb_service();
-
- cpu_dtrace_idle_probe(IDLE_STATE_C0);
+ if (cpu_idle_enter(IDLE_STATE_C1, 0,
+ cpu_idle_mwait_check_wakeup, (void *)mcpu_mwait) == 0) {
+ if (*mcpu_mwait == MWAIT_HALTED) {
+ i86_mwait(0, 0);
+ }
+ cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
+ }
}
/*
@@ -803,7 +844,7 @@ mach_get_platform(int owner)
/*
* Save the version of the PSM module, in case we need to
- * bahave differently based on version.
+ * behave differently based on version.
*/
mach_ver[0] = mach_ver[owner];
diff --git a/usr/src/uts/i86pc/os/mp_pc.c b/usr/src/uts/i86pc/os/mp_pc.c
index ff880fa515..6fc571b445 100644
--- a/usr/src/uts/i86pc/os/mp_pc.c
+++ b/usr/src/uts/i86pc/os/mp_pc.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Welcome to the world of the "real mode platter".
* See also startup.c, mpcore.s and apic.c for related routines.
@@ -106,7 +104,7 @@ mach_cpucontext_alloc(struct cpu *cp)
/*
* Allocate space for stack, tss, gdt and idt. We round the size
- * alloated for cpu_tables up, so that the TSS is on a unique page.
+ * allotted for cpu_tables up, so that the TSS is on a unique page.
* This is more efficient when running in virtual machines.
*/
ct = kmem_zalloc(P2ROUNDUP(sizeof (*ct), PAGESIZE), KM_SLEEP);
@@ -257,9 +255,7 @@ mach_cpu_halt(char *msg)
void
mach_cpu_idle(void)
{
- tlb_going_idle();
i86_halt();
- tlb_service();
}
void
diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c
index e8e2c24053..683f8942e4 100644
--- a/usr/src/uts/i86pc/os/mp_startup.c
+++ b/usr/src/uts/i86pc/os/mp_startup.c
@@ -854,7 +854,7 @@ workaround_errata(struct cpu *cpu)
/*LINTED*/
if (cpuid_opteron_erratum(cpu, 109) > 0) do {
/*
- * Certain Reverse REP MOVS May Produce Unpredictable Behaviour
+ * Certain Reverse REP MOVS May Produce Unpredictable Behavior
*/
#if defined(OPTERON_ERRATUM_109)
/*
@@ -1470,6 +1470,7 @@ mp_startup(void)
{
struct cpu *cp = CPU;
uint_t new_x86_feature;
+ extern void cpu_event_init_cpu(cpu_t *);
#ifndef __xpv
extern void cpupm_init(cpu_t *);
#endif
@@ -1556,7 +1557,7 @@ mp_startup(void)
/*
* We could be more sophisticated here, and just mark the CPU
* as "faulted" but at this point we'll opt for the easier
- * answer of dieing horribly. Provided the boot cpu is ok,
+ * answer of dying horribly. Provided the boot cpu is ok,
* the system can be recovered by booting with use_mp set to zero.
*/
if (workaround_errata(cp) != 0)
@@ -1591,7 +1592,7 @@ mp_startup(void)
/*
* Enable preemption here so that contention for any locks acquired
* later in mp_startup may be preempted if the thread owning those
- * locks is continously executing on other CPUs (for example, this
+ * locks is continuously executing on other CPUs (for example, this
* CPU must be preemptible to allow other CPUs to pause it during their
* startup phases). It's safe to enable preemption here because the
* CPU state is pretty-much fully constructed.
@@ -1602,6 +1603,7 @@ mp_startup(void)
ASSERT(cp->cpu_base_spl == ipltospl(LOCK_LEVEL));
set_base_spl(); /* Restore the spl to its proper value */
+ cpu_event_init_cpu(cp);
#ifndef __xpv
cpupm_init(cp);
#endif
diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c
index 22a66482b2..ef6b28fdbc 100644
--- a/usr/src/uts/i86pc/os/startup.c
+++ b/usr/src/uts/i86pc/os/startup.c
@@ -2016,6 +2016,7 @@ startup_end(void)
{
int i;
extern void setx86isalist(void);
+ extern void cpu_event_init(void);
PRM_POINT("startup_end() starting...");
@@ -2031,6 +2032,11 @@ startup_end(void)
*/
kcpc_hw_init(CPU);
+ /*
+ * Initialize cpu event framework.
+ */
+ cpu_event_init();
+
#if defined(OPTERON_WORKAROUND_6323525)
if (opteron_workaround_6323525)
patch_workaround_6323525();
@@ -2125,6 +2131,7 @@ void
post_startup(void)
{
extern void cpupm_init(cpu_t *);
+ extern void cpu_event_init_cpu(cpu_t *);
/*
* Set the system wide, processor-specific flags to be passed
@@ -2184,6 +2191,7 @@ post_startup(void)
maxmem = freemem;
+ cpu_event_init_cpu(CPU);
cpupm_init(CPU);
add_cpunode2devtree(CPU->cpu_id, CPU->cpu_m.mcpu_cpi);
diff --git a/usr/src/uts/i86pc/sys/cpu_idle.h b/usr/src/uts/i86pc/sys/cpu_idle.h
index f60a6e9d3c..1d922a1f8c 100644
--- a/usr/src/uts/i86pc/sys/cpu_idle.h
+++ b/usr/src/uts/i86pc/sys/cpu_idle.h
@@ -31,16 +31,17 @@
#define _CPUIDLE_H
#include <sys/cpupm.h>
+#include <sys/cpu.h>
#ifdef __cplusplus
extern "C" {
#endif
#define CPU_MAX_CSTATES 8
-#define CPU_ACPI_C0 0
-#define CPU_ACPI_C1 1
-#define CPU_ACPI_C2 2
-#define CPU_ACPI_C3 3
+#define CPU_ACPI_C0 IDLE_STATE_C0
+#define CPU_ACPI_C1 IDLE_STATE_C1
+#define CPU_ACPI_C2 IDLE_STATE_C2
+#define CPU_ACPI_C3 IDLE_STATE_C3
#define BM_CTL 0x1
#define BM_RLD 0x2
@@ -64,7 +65,6 @@ extern void cstate_wakeup(cpu_t *, int);
extern boolean_t cpu_deep_cstates_supported(void);
extern void cpu_wakeup(cpu_t *, int);
extern void cpu_wakeup_mwait(cpu_t *, int);
-extern void cpu_dtrace_idle_probe(uint_t);
extern void cpuidle_manage_cstates(void *);
extern boolean_t cstate_timer_callback(int code);
diff --git a/usr/src/uts/i86pc/sys/machcpuvar.h b/usr/src/uts/i86pc/sys/machcpuvar.h
index 50a5b98432..28b72f0a04 100644
--- a/usr/src/uts/i86pc/sys/machcpuvar.h
+++ b/usr/src/uts/i86pc/sys/machcpuvar.h
@@ -131,7 +131,6 @@ struct machcpu {
void (*mcpu_idle_cpu)(void); /* idle function */
uint16_t mcpu_idle_type; /* CPU next idle type */
uint16_t max_cstates; /* supported max cstates */
- uint32_t curr_cstate; /* current cstate */
struct cpu_ucode_info *mcpu_ucode_info;
diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c
index 028518c894..732fe496a9 100644
--- a/usr/src/uts/i86pc/vm/hat_i86.c
+++ b/usr/src/uts/i86pc/vm/hat_i86.c
@@ -138,7 +138,7 @@ int enable_1gpg = 1;
/*
* AMD shanghai processors provide better management of 1gb ptes in its tlb.
- * By default, 1g page suppport will be disabled for pre-shanghai AMD
+ * By default, 1g page support will be disabled for pre-shanghai AMD
* processors that don't have optimal tlb support for the 1g page size.
* chk_optimal_1gtlb can be set to 0 to force 1g page support on sub-optimal
* processors.
@@ -1299,7 +1299,7 @@ hati_pte_map(
int rv = 0;
/*
- * Is this a consistant (ie. need mapping list lock) mapping?
+ * Is this a consistent (ie. need mapping list lock) mapping?
*/
is_consist = (pp != NULL && (flags & HAT_LOAD_NOCONSIST) == 0);
@@ -1991,22 +1991,15 @@ tlb_going_idle(void)
/*
* Service a delayed TLB flush if coming out of being idle.
+ * It will be called from cpu idle notification with interrupt disabled.
*/
void
tlb_service(void)
{
- ulong_t flags = getflags();
ulong_t tlb_info;
ulong_t found;
/*
- * Be sure interrupts are off while doing this so that
- * higher level interrupts correctly wait for flushes to finish.
- */
- if (flags & PS_IE)
- flags = intr_clear();
-
- /*
* We only have to do something if coming out of being idle.
*/
tlb_info = CPU->cpu_m.mcpu_tlb_info;
@@ -2024,12 +2017,6 @@ tlb_service(void)
if (tlb_info & TLB_INVAL_ALL)
flush_all_tlb_entries();
}
-
- /*
- * Restore interrupt enable control bit.
- */
- if (flags & PS_IE)
- sti();
}
#endif /* !__xpv */
@@ -3178,7 +3165,7 @@ hat_reserve(struct as *as, caddr_t addr, size_t len)
/*
* Called when all mappings to a page should have write permission removed.
- * Mostly stolem from hat_pagesync()
+ * Mostly stolen from hat_pagesync()
*/
static void
hati_page_clrwrt(struct page *pp)
@@ -3311,8 +3298,8 @@ hat_page_clrattr(struct page *pp, uint_t flag)
/*
* If flag is specified, returns 0 if attribute is disabled
- * and non zero if enabled. If flag specifes multiple attributs
- * then returns 0 if ALL atriibutes are disabled. This is an advisory
+ * and non zero if enabled. If flag specifes multiple attributes
+ * then returns 0 if ALL attributes are disabled. This is an advisory
* call.
*/
uint_t
diff --git a/usr/src/uts/intel/sys/cpu.h b/usr/src/uts/intel/sys/cpu.h
index d62cb7692f..20f9e0290e 100644
--- a/usr/src/uts/intel/sys/cpu.h
+++ b/usr/src/uts/intel/sys/cpu.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_CPU_H
#define _SYS_CPU_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* WARNING:
* This header file is Obsolete and may be deleted in a
@@ -72,12 +70,15 @@ extern void i86_mwait(uint32_t data, uint32_t extensions);
* C-state defines for the idle_state_transition DTrace probe
*
* The probe fires when the CPU undergoes an idle state change (e.g. C-state)
- * The agument passed is the C-state to which the CPU is transitioning.
+ * The argument passed is the C-state to which the CPU is transitioning.
*
- * The states are defined here.
+ * These states will be shared by cpupm subsystem, so they should be kept in
+ * consistence with ACPI defined C states.
*/
#define IDLE_STATE_C0 0
#define IDLE_STATE_C1 1
+#define IDLE_STATE_C2 2
+#define IDLE_STATE_C3 3
#endif /* _KERNEL */