diff options
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/Makefile.files | 1 | ||||
-rw-r--r-- | usr/src/uts/common/os/cpu.c | 5 | ||||
-rw-r--r-- | usr/src/uts/common/os/cpu_event.c | 1093 | ||||
-rw-r--r-- | usr/src/uts/common/sys/cpu_event.h | 270 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/cpupm/cpu_idle.c | 128 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/intr.c | 10 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/mp_machdep.c | 87 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/mp_pc.c | 8 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/mp_startup.c | 8 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/startup.c | 8 | ||||
-rw-r--r-- | usr/src/uts/i86pc/sys/cpu_idle.h | 10 | ||||
-rw-r--r-- | usr/src/uts/i86pc/sys/machcpuvar.h | 1 | ||||
-rw-r--r-- | usr/src/uts/i86pc/vm/hat_i86.c | 25 | ||||
-rw-r--r-- | usr/src/uts/intel/sys/cpu.h | 11 |
14 files changed, 1568 insertions, 97 deletions
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 374dcce56e..6f9dc7d5a0 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -43,6 +43,7 @@ COMMON_CORE_OBJS += \ cmt.o \ cmt_policy.o \ cpu.o \ + cpu_event.o \ cpu_intr.o \ cpu_pm.o \ cpupart.o \ diff --git a/usr/src/uts/common/os/cpu.c b/usr/src/uts/common/os/cpu.c index 6ee6c941f7..d3d49aedf3 100644 --- a/usr/src/uts/common/os/cpu.c +++ b/usr/src/uts/common/os/cpu.c @@ -32,6 +32,7 @@ #include <sys/var.h> #include <sys/thread.h> #include <sys/cpuvar.h> +#include <sys/cpu_event.h> #include <sys/kstat.h> #include <sys/uadmin.h> #include <sys/systm.h> @@ -143,7 +144,7 @@ cpu_t *cpu_inmotion; /* * Can be raised to suppress further weakbinding, which are instead * satisfied by disabling preemption. Must be raised/lowered under cpu_lock, - * while individual thread weakbinding synchronisation is done under thread + * while individual thread weakbinding synchronization is done under thread * lock. */ int weakbindingbarrier; @@ -2266,7 +2267,7 @@ cpu_info_kstat_update(kstat_t *ksp, int rw) cpuid_get_ncore_per_chip(cp); cpu_info_template.ci_pkg_core_id.value.l = cpuid_get_pkgcoreid(cp); cpu_info_template.ci_max_cstates.value.l = cp->cpu_m.max_cstates; - cpu_info_template.ci_curr_cstate.value.l = cp->cpu_m.curr_cstate; + cpu_info_template.ci_curr_cstate.value.l = cpu_idle_get_cpu_state(cp); kstat_named_setstr(&cpu_info_template.ci_sktstr, cpuid_getsocketstr(cp)); #endif diff --git a/usr/src/uts/common/os/cpu_event.c b/usr/src/uts/common/os/cpu_event.c new file mode 100644 index 0000000000..11162ccfc9 --- /dev/null +++ b/usr/src/uts/common/os/cpu_event.c @@ -0,0 +1,1093 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2009, Intel Corporation. + * All rights reserved. + */ + +/* + * Introduction + * This file implements a CPU event notification mechanism to signal clients + * which are interested in CPU related events. + * Currently it only supports CPU idle state change events which will be + * triggered just before CPU entering hardware idle state and just after CPU + * wakes up from hardware idle state. + * Please refer to PSARC/2009/115 for detail information. + * + * Lock Strategy + * 1) cpu_idle_prop_busy/free are protected by cpu_idle_prop_lock. + * 2) No protection for cpu_idle_cb_state because it's per-CPU data. + * 3) cpu_idle_cb_busy is protected by cpu_idle_cb_lock. + * 4) cpu_idle_cb_array is protected by pause_cpus/start_cpus logic. + * 5) cpu_idle_cb_max/curr are protected by both cpu_idle_cb_lock and + * pause_cpus/start_cpus logic. + * We have optimized the algorithm for hot path on read side access. + * In the current algorithm, it's lock free on read side access. + * On write side, we use pause_cpus() to keep other CPUs in the pause thread, + * which will guarantee that no other threads will access + * cpu_idle_cb_max/curr/array data structure. + */ + +#include <sys/types.h> +#include <sys/cmn_err.h> +#include <sys/cpuvar.h> +#include <sys/cpu.h> +#include <sys/kmem.h> +#include <sys/machcpuvar.h> +#include <sys/sdt.h> +#include <sys/sysmacros.h> +#include <sys/synch.h> +#include <sys/systm.h> +#include <sys/sunddi.h> +#if defined(__sparc) +#include <sys/machsystm.h> +#elif defined(__x86) +#include <sys/archsystm.h> +#endif +#include <sys/cpu_event.h> + +/* Define normal state for CPU on different platforms. */ +#if defined(__x86) +#define CPU_IDLE_STATE_NORMAL IDLE_STATE_C0 +#elif defined(__sparc) +/* + * At the time of this implementation IDLE_STATE_NORMAL is defined + * in mach_startup.c, and not in a header file. So if we find it is + * undefined, then we set it to the value as defined in mach_startup.c + * Should it eventually be defined, we will pick it up. + */ +#ifndef IDLE_STATE_NORMAL +#define IDLE_STATE_NORMAL 0 +#endif +#define CPU_IDLE_STATE_NORMAL IDLE_STATE_NORMAL +#endif + +/* + * To improve cache efficiency and avoid cache false sharing, CPU idle + * properties are grouped into cache lines as below: + * | CPU0 | CPU1 |.........| CPUn | + * | cache line 0 | cache line 1 |.........| cache line n | + * | v0 | ... | vm | v0 | ... | vm |.........| v0 | ... | vm | + * To access value of property m for CPU n, using following value as index: + * index = seq_id_of_CPUn * CPU_IDLE_VALUE_GROUP_SIZE + m. + */ +#define CPU_IDLE_VALUE_GROUP_SIZE \ + (CPU_CACHE_COHERENCE_SIZE / sizeof (cpu_idle_prop_value_t)) + +/* Get callback context handle for current CPU. */ +#define CPU_IDLE_GET_CTX(cp) \ + ((cpu_idle_callback_context_t)(intptr_t)((cp)->cpu_seqid)) + +/* Get CPU sequential id from ctx. */ +#define CPU_IDLE_CTX2CPUID(ctx) ((processorid_t)(intptr_t)(ctx)) + +/* Compute index from callback context handle. */ +#define CPU_IDLE_CTX2IDX(ctx) \ + (((int)(intptr_t)(ctx)) * CPU_IDLE_VALUE_GROUP_SIZE) + +#define CPU_IDLE_HDL2VALP(hdl, idx) \ + (&((cpu_idle_prop_impl_t *)(hdl))->value[(idx)]) + +/* + * When cpu_idle_cb_array is NULL or full, increase CPU_IDLE_ARRAY_CAPACITY_INC + * entries every time. Here we prefer linear growth instead of exponential. + */ +#define CPU_IDLE_ARRAY_CAPACITY_INC 0x10 + +typedef struct cpu_idle_prop_impl { + cpu_idle_prop_value_t *value; + struct cpu_idle_prop_impl *next; + char *name; + cpu_idle_prop_update_t update; + void *private; + cpu_idle_prop_type_t type; + uint32_t refcnt; +} cpu_idle_prop_impl_t; + +typedef struct cpu_idle_prop_item { + cpu_idle_prop_type_t type; + char *name; + cpu_idle_prop_update_t update; + void *arg; + cpu_idle_prop_handle_t handle; +} cpu_idle_prop_item_t; + +/* Structure to maintain registered callbacks in list. */ +typedef struct cpu_idle_cb_impl { + struct cpu_idle_cb_impl *next; + cpu_idle_callback_t *callback; + void *argument; + int priority; +} cpu_idle_cb_impl_t; + +/* + * Structure to maintain registered callbacks in priority order and also + * optimized for cache efficiency for reading access. + */ +typedef struct cpu_idle_cb_item { + cpu_idle_enter_cbfn_t enter; + cpu_idle_exit_cbfn_t exit; + void *arg; + cpu_idle_cb_impl_t *impl; +} cpu_idle_cb_item_t; + +/* Per-CPU state aligned to CPU_CACHE_COHERENCE_SIZE to avoid false sharing. */ +typedef union cpu_idle_cb_state { + struct { + int index; + boolean_t ready; + cpu_idle_prop_value_t *idle_state; + cpu_idle_prop_value_t *enter_ts; + cpu_idle_prop_value_t *exit_ts; + cpu_idle_prop_value_t *last_idle; + cpu_idle_prop_value_t *last_busy; + cpu_idle_prop_value_t *total_idle; + cpu_idle_prop_value_t *total_busy; + cpu_idle_prop_value_t *intr_cnt; + } v; +#ifdef _LP64 + char align[2 * CPU_CACHE_COHERENCE_SIZE]; +#else + char align[CPU_CACHE_COHERENCE_SIZE]; +#endif +} cpu_idle_cb_state_t; + +static kmutex_t cpu_idle_prop_lock; +static cpu_idle_prop_impl_t *cpu_idle_prop_busy = NULL; +static cpu_idle_prop_impl_t *cpu_idle_prop_free = NULL; + +static kmutex_t cpu_idle_cb_lock; +static cpu_idle_cb_impl_t *cpu_idle_cb_busy = NULL; +static cpu_idle_cb_item_t *cpu_idle_cb_array = NULL; +static int cpu_idle_cb_curr = 0; +static int cpu_idle_cb_max = 0; + +static cpu_idle_cb_state_t *cpu_idle_cb_state; + +static int cpu_idle_prop_update_intr_cnt(void *arg, uint64_t seqnum, + cpu_idle_prop_value_t *valp); + +static cpu_idle_prop_item_t cpu_idle_prop_array[] = { + { + CPU_IDLE_PROP_TYPE_INTPTR, CPU_IDLE_PROP_IDLE_STATE, + NULL, NULL, NULL + }, + { + CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_ENTER_TIMESTAMP, + NULL, NULL, NULL + }, + { + CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_EXIT_TIMESTAMP, + NULL, NULL, NULL + }, + { + CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_LAST_IDLE_TIME, + NULL, NULL, NULL + }, + { + CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_LAST_BUSY_TIME, + NULL, NULL, NULL + }, + { + CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_TOTAL_IDLE_TIME, + NULL, NULL, NULL + }, + { + CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_TOTAL_BUSY_TIME, + NULL, NULL, NULL + }, + { + CPU_IDLE_PROP_TYPE_UINT64, CPU_IDLE_PROP_INTERRUPT_COUNT, + cpu_idle_prop_update_intr_cnt, NULL, NULL + }, +}; + +#define CPU_IDLE_PROP_IDX_IDLE_STATE 0 +#define CPU_IDLE_PROP_IDX_ENTER_TS 1 +#define CPU_IDLE_PROP_IDX_EXIT_TS 2 +#define CPU_IDLE_PROP_IDX_LAST_IDLE 3 +#define CPU_IDLE_PROP_IDX_LAST_BUSY 4 +#define CPU_IDLE_PROP_IDX_TOTAL_IDLE 5 +#define CPU_IDLE_PROP_IDX_TOTAL_BUSY 6 +#define CPU_IDLE_PROP_IDX_INTR_CNT 7 + +/*ARGSUSED*/ +static void +cpu_idle_dtrace_enter(void *arg, cpu_idle_callback_context_t ctx, + cpu_idle_check_wakeup_t check_func, void *check_arg) +{ + int state; + + state = cpu_idle_prop_get_intptr( + cpu_idle_prop_array[CPU_IDLE_PROP_IDX_IDLE_STATE].handle, ctx); + DTRACE_PROBE1(idle__state__transition, uint_t, state); +} + +/*ARGSUSED*/ +static void +cpu_idle_dtrace_exit(void *arg, cpu_idle_callback_context_t ctx, int flag) +{ + DTRACE_PROBE1(idle__state__transition, uint_t, CPU_IDLE_STATE_NORMAL); +} + +static cpu_idle_callback_handle_t cpu_idle_cb_handle_dtrace; +static cpu_idle_callback_t cpu_idle_callback_dtrace = { + CPU_IDLE_CALLBACK_VERS, + cpu_idle_dtrace_enter, + cpu_idle_dtrace_exit, +}; + +#if defined(__x86) && !defined(__xpv) +extern void tlb_going_idle(void); +extern void tlb_service(void); + +static cpu_idle_callback_handle_t cpu_idle_cb_handle_tlb; +static cpu_idle_callback_t cpu_idle_callback_tlb = { + CPU_IDLE_CALLBACK_VERS, + (cpu_idle_enter_cbfn_t)tlb_going_idle, + (cpu_idle_exit_cbfn_t)tlb_service, +}; +#endif + +void +cpu_event_init(void) +{ + int i, idx; + size_t sz; + intptr_t buf; + cpu_idle_cb_state_t *sp; + cpu_idle_prop_item_t *ip; + + mutex_init(&cpu_idle_cb_lock, NULL, MUTEX_DRIVER, NULL); + mutex_init(&cpu_idle_prop_lock, NULL, MUTEX_DRIVER, NULL); + + /* Create internal properties. */ + for (i = 0, ip = cpu_idle_prop_array; + i < sizeof (cpu_idle_prop_array) / sizeof (cpu_idle_prop_array[0]); + i++, ip++) { + (void) cpu_idle_prop_create_property(ip->name, ip->type, + ip->update, ip->arg, &ip->handle); + ASSERT(ip->handle != NULL); + } + + /* Allocate buffer and align to CPU_CACHE_COHERENCE_SIZE. */ + sz = sizeof (cpu_idle_cb_state_t) * max_ncpus; + sz += CPU_CACHE_COHERENCE_SIZE; + buf = (intptr_t)kmem_zalloc(sz, KM_SLEEP); + cpu_idle_cb_state = (cpu_idle_cb_state_t *)P2ROUNDUP(buf, + CPU_CACHE_COHERENCE_SIZE); + + /* Cache frequently used property value pointers. */ + for (sp = cpu_idle_cb_state, i = 0; i < max_ncpus; i++, sp++) { + idx = CPU_IDLE_CTX2IDX(i); +#define ___INIT_P(f, i) \ + sp->v.f = CPU_IDLE_HDL2VALP(cpu_idle_prop_array[(i)].handle, idx) + ___INIT_P(idle_state, CPU_IDLE_PROP_IDX_IDLE_STATE); + ___INIT_P(enter_ts, CPU_IDLE_PROP_IDX_ENTER_TS); + ___INIT_P(exit_ts, CPU_IDLE_PROP_IDX_EXIT_TS); + ___INIT_P(last_idle, CPU_IDLE_PROP_IDX_LAST_IDLE); + ___INIT_P(last_busy, CPU_IDLE_PROP_IDX_LAST_BUSY); + ___INIT_P(total_idle, CPU_IDLE_PROP_IDX_TOTAL_IDLE); + ___INIT_P(total_busy, CPU_IDLE_PROP_IDX_TOTAL_BUSY); + ___INIT_P(last_idle, CPU_IDLE_PROP_IDX_INTR_CNT); +#undef ___INIT_P + } + + /* Register built-in callbacks. */ + if (cpu_idle_register_callback(CPU_IDLE_CB_PRIO_DTRACE, + &cpu_idle_callback_dtrace, NULL, &cpu_idle_cb_handle_dtrace) != 0) { + cmn_err(CE_PANIC, + "cpu_idle: failed to register callback for dtrace."); + } +#if defined(__x86) && !defined(__xpv) + if (cpu_idle_register_callback(CPU_IDLE_CB_PRIO_TLB, + &cpu_idle_callback_tlb, NULL, &cpu_idle_cb_handle_tlb) != 0) { + cmn_err(CE_PANIC, + "cpu_idle: failed to register callback for tlb_flush."); + } +#endif +} + +void +cpu_event_init_cpu(cpu_t *cp) +{ + ASSERT(cp->cpu_seqid < max_ncpus); + cpu_idle_cb_state[cp->cpu_seqid].v.ready = B_FALSE; +} + +void +cpu_event_fini_cpu(cpu_t *cp) +{ + ASSERT(cp->cpu_seqid < max_ncpus); + cpu_idle_cb_state[cp->cpu_seqid].v.ready = B_FALSE; +} + +static void +cpu_idle_insert_callback(cpu_idle_cb_impl_t *cip) +{ + int unlock = 0, unpause = 0; + int i, cnt_new = 0, cnt_old = 0; + char *buf_new = NULL, *buf_old = NULL; + + ASSERT(MUTEX_HELD(&cpu_idle_cb_lock)); + + /* + * Expand array if it's full. + * Memory must be allocated out of pause/start_cpus() scope because + * kmem_zalloc() can't be called with KM_SLEEP flag within that scope. + */ + if (cpu_idle_cb_curr == cpu_idle_cb_max) { + cnt_new = cpu_idle_cb_max + CPU_IDLE_ARRAY_CAPACITY_INC; + buf_new = (char *)kmem_zalloc(cnt_new * + sizeof (cpu_idle_cb_item_t), KM_SLEEP); + } + + /* Try to acquire cpu_lock if not held yet. */ + if (!MUTEX_HELD(&cpu_lock)) { + mutex_enter(&cpu_lock); + unlock = 1; + } + /* + * Pause all other CPUs (and let them run pause thread). + * It's guaranteed that no other threads will access cpu_idle_cb_array + * after pause_cpus(). + */ + if (!cpus_paused()) { + pause_cpus(NULL); + unpause = 1; + } + + /* Copy content to new buffer if needed. */ + if (buf_new != NULL) { + buf_old = (char *)cpu_idle_cb_array; + cnt_old = cpu_idle_cb_max; + if (buf_old != NULL) { + ASSERT(cnt_old != 0); + bcopy(cpu_idle_cb_array, buf_new, + sizeof (cpu_idle_cb_item_t) * cnt_old); + } + cpu_idle_cb_array = (cpu_idle_cb_item_t *)buf_new; + cpu_idle_cb_max = cnt_new; + } + + /* Insert into array according to priority. */ + ASSERT(cpu_idle_cb_curr < cpu_idle_cb_max); + for (i = cpu_idle_cb_curr; i > 0; i--) { + if (cpu_idle_cb_array[i - 1].impl->priority >= cip->priority) { + break; + } + cpu_idle_cb_array[i] = cpu_idle_cb_array[i - 1]; + } + cpu_idle_cb_array[i].arg = cip->argument; + cpu_idle_cb_array[i].enter = cip->callback->idle_enter; + cpu_idle_cb_array[i].exit = cip->callback->idle_exit; + cpu_idle_cb_array[i].impl = cip; + cpu_idle_cb_curr++; + + /* Resume other CPUs from paused state if needed. */ + if (unpause) { + start_cpus(); + } + if (unlock) { + mutex_exit(&cpu_lock); + } + + /* Free old resource if needed. */ + if (buf_old != NULL) { + ASSERT(cnt_old != 0); + kmem_free(buf_old, cnt_old * sizeof (cpu_idle_cb_item_t)); + } +} + +static void +cpu_idle_remove_callback(cpu_idle_cb_impl_t *cip) +{ + int i, found = 0; + int unlock = 0, unpause = 0; + cpu_idle_cb_state_t *sp; + + ASSERT(MUTEX_HELD(&cpu_idle_cb_lock)); + + /* Try to acquire cpu_lock if not held yet. */ + if (!MUTEX_HELD(&cpu_lock)) { + mutex_enter(&cpu_lock); + unlock = 1; + } + /* + * Pause all other CPUs. + * It's guaranteed that no other threads will access cpu_idle_cb_array + * after pause_cpus(). + */ + if (!cpus_paused()) { + pause_cpus(NULL); + unpause = 1; + } + + /* Remove cip from array. */ + for (i = 0; i < cpu_idle_cb_curr; i++) { + if (found == 0) { + if (cpu_idle_cb_array[i].impl == cip) { + found = 1; + } + } else { + cpu_idle_cb_array[i - 1] = cpu_idle_cb_array[i]; + } + } + ASSERT(found != 0); + cpu_idle_cb_curr--; + + /* + * Reset property ready flag for all CPUs if no registered callback + * left because cpu_idle_enter/exit will stop updating property if + * there's no callback registered. + */ + if (cpu_idle_cb_curr == 0) { + for (sp = cpu_idle_cb_state, i = 0; i < max_ncpus; i++, sp++) { + sp->v.ready = B_FALSE; + } + } + + /* Resume other CPUs from paused state if needed. */ + if (unpause) { + start_cpus(); + } + if (unlock) { + mutex_exit(&cpu_lock); + } +} + +int +cpu_idle_register_callback(uint_t prio, cpu_idle_callback_t *cbp, + void *arg, cpu_idle_callback_handle_t *hdlp) +{ + cpu_idle_cb_state_t *sp; + cpu_idle_cb_impl_t *cip = NULL; + + /* First validate parameters. */ + ASSERT(!CPU_ON_INTR(CPU)); + ASSERT(CPU->cpu_seqid < max_ncpus); + sp = &cpu_idle_cb_state[CPU->cpu_seqid]; + if (sp->v.index != 0) { + cmn_err(CE_NOTE, + "!cpu_event: register_callback called from callback."); + return (EBUSY); + } else if (cbp == NULL || hdlp == NULL) { + cmn_err(CE_NOTE, + "!cpu_event: NULL parameters in register_callback."); + return (EINVAL); + } else if (prio < CPU_IDLE_CB_PRIO_LOW_BASE || + prio >= CPU_IDLE_CB_PRIO_RESV_BASE) { + cmn_err(CE_NOTE, + "!cpu_event: priority 0x%x out of range.", prio); + return (EINVAL); + } else if (cbp->version != CPU_IDLE_CALLBACK_VERS) { + cmn_err(CE_NOTE, + "!cpu_event: callback version %d is not supported.", + cbp->version); + return (EINVAL); + } + + mutex_enter(&cpu_idle_cb_lock); + /* Check whether callback with priority exists if not dynamic. */ + if (prio != CPU_IDLE_CB_PRIO_DYNAMIC) { + for (cip = cpu_idle_cb_busy; cip != NULL; + cip = cip->next) { + if (cip->priority == prio) { + mutex_exit(&cpu_idle_cb_lock); + cmn_err(CE_NOTE, "!cpu_event: callback with " + "priority 0x%x already exists.", prio); + return (EEXIST); + } + } + } + + cip = kmem_zalloc(sizeof (*cip), KM_SLEEP); + cip->callback = cbp; + cip->argument = arg; + cip->priority = prio; + cip->next = cpu_idle_cb_busy; + cpu_idle_cb_busy = cip; + cpu_idle_insert_callback(cip); + mutex_exit(&cpu_idle_cb_lock); + + *hdlp = (cpu_idle_callback_handle_t)cip; + + return (0); +} + +int +cpu_idle_unregister_callback(cpu_idle_callback_handle_t hdl) +{ + int rc = ENODEV; + cpu_idle_cb_state_t *sp; + cpu_idle_cb_impl_t *ip, **ipp; + + ASSERT(!CPU_ON_INTR(CPU)); + ASSERT(CPU->cpu_seqid < max_ncpus); + sp = &cpu_idle_cb_state[CPU->cpu_seqid]; + if (sp->v.index != 0) { + cmn_err(CE_NOTE, + "!cpu_event: unregister_callback called from callback."); + return (EBUSY); + } else if (hdl == NULL) { + cmn_err(CE_NOTE, + "!cpu_event: hdl is NULL in unregister_callback."); + return (EINVAL); + } + + ip = (cpu_idle_cb_impl_t *)hdl; + mutex_enter(&cpu_idle_cb_lock); + for (ipp = &cpu_idle_cb_busy; *ipp != NULL; ipp = &(*ipp)->next) { + if (*ipp == ip) { + *ipp = ip->next; + cpu_idle_remove_callback(ip); + rc = 0; + break; + } + } + mutex_exit(&cpu_idle_cb_lock); + + if (rc == 0) { + kmem_free(ip, sizeof (*ip)); + } else { + cmn_err(CE_NOTE, + "!cpu_event: callback handle %p not found.", (void *)hdl); + } + + return (rc); +} + +static int +cpu_idle_enter_state(cpu_idle_cb_state_t *sp, intptr_t state) +{ + sp->v.idle_state->cipv_intptr = state; + sp->v.enter_ts->cipv_hrtime = gethrtime_unscaled(); + sp->v.last_busy->cipv_hrtime = sp->v.enter_ts->cipv_hrtime - + sp->v.exit_ts->cipv_hrtime; + sp->v.total_busy->cipv_hrtime += sp->v.last_busy->cipv_hrtime; + if (sp->v.ready == B_FALSE) { + sp->v.ready = B_TRUE; + return (0); + } + + return (1); +} + +static void +cpu_idle_exit_state(cpu_idle_cb_state_t *sp) +{ + sp->v.idle_state->cipv_intptr = CPU_IDLE_STATE_NORMAL; + sp->v.exit_ts->cipv_hrtime = gethrtime_unscaled(); + sp->v.last_idle->cipv_hrtime = sp->v.exit_ts->cipv_hrtime - + sp->v.enter_ts->cipv_hrtime; + sp->v.total_idle->cipv_hrtime += sp->v.last_idle->cipv_hrtime; +} + +/*ARGSUSED*/ +int +cpu_idle_enter(int state, int flag, + cpu_idle_check_wakeup_t check_func, void *check_arg) +{ + int i; + cpu_idle_cb_item_t *cip; + cpu_idle_cb_state_t *sp; + cpu_idle_callback_context_t ctx; +#if defined(__x86) + ulong_t iflags; +#endif + + ctx = CPU_IDLE_GET_CTX(CPU); + ASSERT(CPU->cpu_seqid < max_ncpus); + sp = &cpu_idle_cb_state[CPU->cpu_seqid]; + ASSERT(sp->v.index == 0); + + /* + * On x86, cpu_idle_enter can be called from idle thread with either + * interrupts enabled or disabled, so we need to make sure interrupts + * are disabled here. + * On SPARC, cpu_idle_enter will be called from idle thread with + * interrupt disabled, so no special handling necessary. + */ +#if defined(__x86) + iflags = intr_clear(); +#endif + + /* Skip calling callback if state is not ready for current CPU. */ + if (cpu_idle_enter_state(sp, state) == 0) { +#if defined(__x86) + intr_restore(iflags); +#endif + return (0); + } + + for (i = 0, cip = cpu_idle_cb_array; i < cpu_idle_cb_curr; i++, cip++) { + /* + * Increase index so corresponding idle_exit callback + * will be invoked should interrupt happen during + * idle_enter callback. + */ + sp->v.index++; + + /* Call idle_enter callback function if it's not NULL. */ + if (cip->enter != NULL) { + cip->enter(cip->arg, ctx, check_func, check_arg); + + /* + * cpu_idle_enter runs with interrupts + * disabled, so the idle_enter callbacks will + * also be called with interrupts disabled. + * It is permissible for the callbacks to + * enable the interrupts, if they can also + * handle the condition if the interrupt + * occurs. + * + * However, if an interrupt occurs and we + * return here without dealing with it, we + * return to the cpu_idle_enter() caller + * with an EBUSY, and the caller will not + * enter the idle state. + * + * We detect the interrupt, by checking the + * index value of the state pointer. If it + * is not the index we incremented above, + * then it was cleared while processing + * the interrupt. + * + * Also note, that at this point of the code + * the normal index value will be one greater + * than the variable 'i' in the loop, as it + * hasn't yet been incremented. + */ + if (sp->v.index != i + 1) { +#if defined(__x86) + intr_restore(iflags); +#endif + return (EBUSY); + } + } + } +#if defined(__x86) + intr_restore(iflags); +#endif + + return (0); +} + +void +cpu_idle_exit(int flag) +{ + int i; + cpu_idle_cb_item_t *cip; + cpu_idle_cb_state_t *sp; + cpu_idle_callback_context_t ctx; +#if defined(__x86) + ulong_t iflags; +#endif + + ASSERT(CPU->cpu_seqid < max_ncpus); + sp = &cpu_idle_cb_state[CPU->cpu_seqid]; + +#if defined(__sparc) + /* + * On SPARC, cpu_idle_exit will only be called from idle thread + * with interrupt disabled. + */ + + if (sp->v.index != 0) { + ctx = CPU_IDLE_GET_CTX(CPU); + cpu_idle_exit_state(sp); + for (i = sp->v.index - 1; i >= 0; i--) { + cip = &cpu_idle_cb_array[i]; + if (cip->exit != NULL) { + cip->exit(cip->arg, ctx, flag); + } + } + sp->v.index = 0; + } +#elif defined(__x86) + /* + * On x86, cpu_idle_exit will be called from idle thread or interrupt + * handler. When called from interrupt handler, interrupts will be + * disabled. When called from idle thread, interrupts may be disabled + * or enabled. + */ + + /* Called from interrupt, interrupts are already disabled. */ + if (flag & CPU_IDLE_CB_FLAG_INTR) { + /* + * return if cpu_idle_exit already called or + * there is no registered callback. + */ + if (sp->v.index == 0) { + return; + } + ctx = CPU_IDLE_GET_CTX(CPU); + cpu_idle_exit_state(sp); + for (i = sp->v.index - 1; i >= 0; i--) { + cip = &cpu_idle_cb_array[i]; + if (cip->exit != NULL) { + cip->exit(cip->arg, ctx, flag); + } + } + sp->v.index = 0; + + /* Called from idle thread, need to disable interrupt. */ + } else { + iflags = intr_clear(); + if (sp->v.index != 0) { + ctx = CPU_IDLE_GET_CTX(CPU); + cpu_idle_exit_state(sp); + for (i = sp->v.index - 1; i >= 0; i--) { + cip = &cpu_idle_cb_array[i]; + if (cip->exit != NULL) { + cip->exit(cip->arg, ctx, flag); + } + } + sp->v.index = 0; + } + intr_restore(iflags); + } +#endif +} + +cpu_idle_callback_context_t +cpu_idle_get_context(void) +{ + return (CPU_IDLE_GET_CTX(CPU)); +} + +/* + * Allocate property structure in group of CPU_IDLE_VALUE_GROUP_SIZE to improve + * cache efficiency. To simplify implementation, allocated memory for property + * structure won't be freed. + */ +static void +cpu_idle_prop_allocate_impl(void) +{ + int i; + size_t sz; + intptr_t buf; + cpu_idle_prop_impl_t *prop; + cpu_idle_prop_value_t *valp; + + ASSERT(!CPU_ON_INTR(CPU)); + prop = kmem_zalloc(sizeof (*prop) * CPU_IDLE_VALUE_GROUP_SIZE, + KM_SLEEP); + sz = sizeof (*valp) * CPU_IDLE_VALUE_GROUP_SIZE * max_ncpus; + sz += CPU_CACHE_COHERENCE_SIZE; + buf = (intptr_t)kmem_zalloc(sz, KM_SLEEP); + valp = (cpu_idle_prop_value_t *)P2ROUNDUP(buf, + CPU_CACHE_COHERENCE_SIZE); + + for (i = 0; i < CPU_IDLE_VALUE_GROUP_SIZE; i++, prop++, valp++) { + prop->value = valp; + prop->next = cpu_idle_prop_free; + cpu_idle_prop_free = prop; + } +} + +int +cpu_idle_prop_create_property(const char *name, cpu_idle_prop_type_t type, + cpu_idle_prop_update_t update, void *arg, cpu_idle_prop_handle_t *hdlp) +{ + int rc = EEXIST; + cpu_idle_prop_impl_t *prop; + + ASSERT(!CPU_ON_INTR(CPU)); + if (name == NULL || hdlp == NULL) { + cmn_err(CE_WARN, + "!cpu_event: NULL parameters in create_property."); + return (EINVAL); + } + + mutex_enter(&cpu_idle_prop_lock); + for (prop = cpu_idle_prop_busy; prop != NULL; prop = prop->next) { + if (strcmp(prop->name, name) == 0) { + cmn_err(CE_NOTE, + "!cpu_event: property %s already exists.", name); + break; + } + } + if (prop == NULL) { + if (cpu_idle_prop_free == NULL) { + cpu_idle_prop_allocate_impl(); + } + ASSERT(cpu_idle_prop_free != NULL); + prop = cpu_idle_prop_free; + cpu_idle_prop_free = prop->next; + prop->next = cpu_idle_prop_busy; + cpu_idle_prop_busy = prop; + + ASSERT(prop->value != NULL); + prop->name = strdup(name); + prop->type = type; + prop->update = update; + prop->private = arg; + prop->refcnt = 1; + *hdlp = prop; + rc = 0; + } + mutex_exit(&cpu_idle_prop_lock); + + return (rc); +} + +int +cpu_idle_prop_destroy_property(cpu_idle_prop_handle_t hdl) +{ + int rc = ENODEV; + cpu_idle_prop_impl_t *prop, **propp; + cpu_idle_prop_value_t *valp; + + ASSERT(!CPU_ON_INTR(CPU)); + if (hdl == NULL) { + cmn_err(CE_WARN, + "!cpu_event: hdl is NULL in destroy_property."); + return (EINVAL); + } + + prop = (cpu_idle_prop_impl_t *)hdl; + mutex_enter(&cpu_idle_prop_lock); + for (propp = &cpu_idle_prop_busy; *propp != NULL; + propp = &(*propp)->next) { + if (*propp == prop) { + ASSERT(prop->refcnt > 0); + if (atomic_cas_32(&prop->refcnt, 1, 0) == 1) { + *propp = prop->next; + strfree(prop->name); + valp = prop->value; + bzero(prop, sizeof (*prop)); + prop->value = valp; + prop->next = cpu_idle_prop_free; + cpu_idle_prop_free = prop; + rc = 0; + } else { + rc = EBUSY; + } + break; + } + } + mutex_exit(&cpu_idle_prop_lock); + + return (rc); +} + +int +cpu_idle_prop_create_handle(const char *name, cpu_idle_prop_handle_t *hdlp) +{ + int rc = ENODEV; + cpu_idle_prop_impl_t *prop; + + ASSERT(!CPU_ON_INTR(CPU)); + if (name == NULL || hdlp == NULL) { + cmn_err(CE_WARN, + "!cpu_event: NULL parameters in create_handle."); + return (EINVAL); + } + + mutex_enter(&cpu_idle_prop_lock); + for (prop = cpu_idle_prop_busy; prop != NULL; prop = prop->next) { + if (strcmp(prop->name, name) == 0) { + /* Hold one refcount on object. */ + ASSERT(prop->refcnt > 0); + atomic_inc_32(&prop->refcnt); + *hdlp = (cpu_idle_prop_handle_t)prop; + rc = 0; + break; + } + } + mutex_exit(&cpu_idle_prop_lock); + + return (rc); +} + +int +cpu_idle_prop_destroy_handle(cpu_idle_prop_handle_t hdl) +{ + int rc = ENODEV; + cpu_idle_prop_impl_t *prop; + + ASSERT(!CPU_ON_INTR(CPU)); + if (hdl == NULL) { + cmn_err(CE_WARN, + "!cpu_event: hdl is NULL in destroy_handle."); + return (EINVAL); + } + + mutex_enter(&cpu_idle_prop_lock); + for (prop = cpu_idle_prop_busy; prop != NULL; prop = prop->next) { + if (prop == hdl) { + /* Release refcnt held in create_handle. */ + ASSERT(prop->refcnt > 1); + atomic_dec_32(&prop->refcnt); + rc = 0; + break; + } + } + mutex_exit(&cpu_idle_prop_lock); + + return (rc); +} + +cpu_idle_prop_type_t +cpu_idle_prop_get_type(cpu_idle_prop_handle_t hdl) +{ + ASSERT(hdl != NULL); + return (((cpu_idle_prop_impl_t *)hdl)->type); +} + +const char * +cpu_idle_prop_get_name(cpu_idle_prop_handle_t hdl) +{ + ASSERT(hdl != NULL); + return (((cpu_idle_prop_impl_t *)hdl)->name); +} + +int +cpu_idle_prop_get_value(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx, cpu_idle_prop_value_t *valp) +{ + int idx, rc = 0; + cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; + + ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); + if (hdl == NULL || valp == NULL) { + cmn_err(CE_NOTE, "!cpu_event: NULL parameters in prop_get."); + return (EINVAL); + } + idx = CPU_IDLE_CTX2IDX(ctx); + if (prop->update != NULL) { + cpu_idle_cb_state_t *sp; + + ASSERT(CPU->cpu_seqid < max_ncpus); + sp = &cpu_idle_cb_state[CPU->cpu_seqid]; + /* CPU's idle enter timestamp as sequence number. */ + rc = prop->update(prop->private, + (uint64_t)sp->v.enter_ts->cipv_hrtime, &prop->value[idx]); + } + if (rc == 0) { + *valp = prop->value[idx]; + } + + return (rc); +} + +uint32_t +cpu_idle_prop_get_uint32(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx) +{ + int idx; + cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; + + ASSERT(hdl != NULL); + ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); + idx = CPU_IDLE_CTX2IDX(ctx); + return (prop->value[idx].cipv_uint32); +} + +uint64_t +cpu_idle_prop_get_uint64(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx) +{ + int idx; + cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; + + ASSERT(hdl != NULL); + ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); + idx = CPU_IDLE_CTX2IDX(ctx); + return (prop->value[idx].cipv_uint64); +} + +intptr_t +cpu_idle_prop_get_intptr(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx) +{ + int idx; + cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; + + ASSERT(hdl != NULL); + ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); + idx = CPU_IDLE_CTX2IDX(ctx); + return (prop->value[idx].cipv_intptr); +} + +hrtime_t +cpu_idle_prop_get_hrtime(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx) +{ + int idx; + cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; + + ASSERT(hdl != NULL); + ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); + idx = CPU_IDLE_CTX2IDX(ctx); + return (prop->value[idx].cipv_hrtime); +} + +void +cpu_idle_prop_set_value(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx, cpu_idle_prop_value_t val) +{ + int idx; + cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; + + ASSERT(hdl != NULL); + ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); + idx = CPU_IDLE_CTX2IDX(ctx); + prop->value[idx] = val; +} + +void +cpu_idle_prop_set_all(cpu_idle_prop_handle_t hdl, cpu_idle_prop_value_t val) +{ + int i, idx; + cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; + + ASSERT(hdl != NULL); + for (i = 0; i < max_ncpus; i++) { + idx = CPU_IDLE_CTX2IDX(i); + prop->value[idx] = val; + } +} + +/*ARGSUSED*/ +static int cpu_idle_prop_update_intr_cnt(void *arg, uint64_t seqnum, + cpu_idle_prop_value_t *valp) +{ + int i; + uint64_t val; + + for (val = 0, i = 0; i < PIL_MAX; i++) { + val += CPU->cpu_stats.sys.intr[i]; + } + valp->cipv_uint64 = val; + + return (0); +} + +uint_t +cpu_idle_get_cpu_state(cpu_t *cp) +{ + ASSERT(cp != NULL && cp->cpu_seqid < max_ncpus); + return ((uint_t)cpu_idle_prop_get_uint32( + cpu_idle_prop_array[CPU_IDLE_PROP_IDX_IDLE_STATE].handle, + CPU_IDLE_GET_CTX(cp))); +} diff --git a/usr/src/uts/common/sys/cpu_event.h b/usr/src/uts/common/sys/cpu_event.h new file mode 100644 index 0000000000..a636fd9a41 --- /dev/null +++ b/usr/src/uts/common/sys/cpu_event.h @@ -0,0 +1,270 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2009, Intel Corporation. + * All rights reserved. + */ + +#ifndef _SYS_CPU_EVENT_H +#define _SYS_CPU_EVENT_H +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL + +/* + * CPU idle notification callbacks are divided into three priority classes: + * 1. Statically assigned high priority callbacks. + * 2. Dynamically allocated normal priority callbacks. + * 3. Statically assigned low priority callbacks. + * + * All registered callbacks will be called in priority order from high + * to low just before CPU enters hardware idle state and from low to + * high just after CPU wakes from idle state. + * + * The high and low priority classes are designed to support hardware + * ordering requirements. A dynamically assigned priority allows the + * framework to choose the order in which the callback is processed. + * If a callback has no dependency on other callbacks, it should use + * dynamic priority to avoid priority conflicts. + * + * Note that the priority doesn't describe how important a callback + * is, but just the order in which they are processed. If a callback + * needs processing early in the idle notification cycle, it should + * have a higher priority. If it needs to be at the end, or early on + * the exit, then it should have a lower priority. + */ + +#define CPU_IDLE_CB_PRIO_LOW_BASE 0x20000000U +#define CPU_IDLE_CB_PRIO_DYN_BASE 0x40000000U +#define CPU_IDLE_CB_PRIO_HIGH_BASE 0x40000001U +#define CPU_IDLE_CB_PRIO_RESV_BASE 0x80000000U + +/* + * Indicating dynamic priority to cpu_idle_{un}register_callback(). + */ +#define CPU_IDLE_CB_PRIO_DYNAMIC CPU_IDLE_CB_PRIO_DYN_BASE +/* Priority assigned to dtrace probe callback. */ +#define CPU_IDLE_CB_PRIO_DTRACE (CPU_IDLE_CB_PRIO_LOW_BASE + 0xC000000) + + +#ifdef __x86 +/* Priority assigned to TLB flush callback. */ +#define CPU_IDLE_CB_PRIO_TLB (CPU_IDLE_CB_PRIO_LOW_BASE + 0x100000) +#endif + +/* Name of properties supported by CPU idle notification. */ +#define CPU_IDLE_PROP_IDLE_STATE "idle-state" +#define CPU_IDLE_PROP_ENTER_TIMESTAMP "enter-ts" +#define CPU_IDLE_PROP_EXIT_TIMESTAMP "exit-ts" +#define CPU_IDLE_PROP_LAST_IDLE_TIME "last-idle-time" +#define CPU_IDLE_PROP_LAST_BUSY_TIME "last-busy-time" +#define CPU_IDLE_PROP_TOTAL_IDLE_TIME "total-idle-time" +#define CPU_IDLE_PROP_TOTAL_BUSY_TIME "total-busy-time" +#define CPU_IDLE_PROP_INTERRUPT_COUNT "interupt-count" + +/* + * sizeof(cpu_idle_prop_value_t) should be power of 2 to align on cache line. + */ +typedef union cpu_idle_prop_value { + intptr_t cipv_intptr; + uint32_t cipv_uint32; + uint64_t cipv_uint64; + hrtime_t cipv_hrtime; +} cpu_idle_prop_value_t; + +typedef enum cpu_idle_prop_type { + CPU_IDLE_PROP_TYPE_INTPTR, + CPU_IDLE_PROP_TYPE_UINT32, + CPU_IDLE_PROP_TYPE_UINT64, + CPU_IDLE_PROP_TYPE_HRTIME, +} cpu_idle_prop_type_t; + +typedef void *cpu_idle_callback_handle_t; +typedef void *cpu_idle_callback_context_t; +typedef void *cpu_idle_prop_handle_t; + +/* + * Function prototype for checking CPU wakeup events. + * If CPU has already been awakened, check_wakeup callback should call + * cpu_idle_exit() to notify CPU idle framework if it has been called yet. + */ +typedef void (* cpu_idle_check_wakeup_t)(void *arg); + +/* + * Function prototype for entering idle state notification callback. + * Callback for entering idle state notification must obey all constraints + * which apply to idle thread because it will be called in idle thread context. + * The callback will be called with interrupt disabled. The callback may enable + * interrupt if it can cooperate with corresponding idle_exit callback to + * handle interrupt happening after enabling interrupt. If idle_enter callback + * enables interrupt, the corresponding idle_exit callback may be called before + * returning from idle_enter callback. + */ +typedef void (* cpu_idle_enter_cbfn_t)(void *arg, + cpu_idle_callback_context_t ctx, + cpu_idle_check_wakeup_t check_func, void *check_arg); + +/* + * Function prototype for exiting idle state notification callback. + * Callback for exiting idle state notification will be called in idle thread + * context or interrupt context with interrupt disabled. + * There is a flag to distinguish the calling context. + * The callback must not try to enable interrupts. + */ +typedef void (* cpu_idle_exit_cbfn_t)(void *arg, + cpu_idle_callback_context_t ctx, int flag); + +#define CPU_IDLE_CB_FLAG_INTR 0x1 /* Called in interrupt context. */ +#define CPU_IDLE_CB_FLAG_IDLE 0x2 /* Called in idle thread context. */ + +typedef struct cpu_idle_callback { + int version; + cpu_idle_enter_cbfn_t idle_enter; + cpu_idle_exit_cbfn_t idle_exit; +} cpu_idle_callback_t; + +#define CPU_IDLE_CALLBACK_VER0 0 +#define CPU_IDLE_CALLBACK_VERS CPU_IDLE_CALLBACK_VER0 + +/* + * Register a callback to be called when CPU idle state changes. + * All registered callbacks will be called in priority order from high to low + * when CPU enters idle state and from low to high when CPU leaves idle state. + * If CPU is predicted to sleep for a short time or be under heavy load, + * framework may skip calling registered callbacks when idle state changes to + * avoid overhead and reduce performance penalties. + * It's guaranteed that each exiting notification will be paired with each + * entering notification. + * Return zero on success and error code on failure. + * N.B.: this interface shouldn't be called from following conditions: + * 1) from callback. + */ +extern int cpu_idle_register_callback(uint_t prio, cpu_idle_callback_t *cbp, + void *arg, cpu_idle_callback_handle_t *hdlp); + +/* + * Un-register a registered callback. + * Return zero on success and error code on failure. + * N.B.: this interface shouldn't be called from following cases: + * 1) from callback. + */ +extern int cpu_idle_unregister_callback(cpu_idle_callback_handle_t hdl); + +/* + * Called by CPU idle handler to notify entering idle state. + * It should be called with interrupt disabled. + * state: platform specific information of idle state to enter. + * On x86, it's CPU C state. + * Idle thread should cancel entering hardware idle state if cpu_idle_enter + * returns non-zero value. + */ +extern int cpu_idle_enter(int state, int flag, + cpu_idle_check_wakeup_t check_func, void *check_arg); + +/* + * Called by CPU idle handler to notify exiting idle state. + * It should be called with interrupt disabled. + */ +extern void cpu_idle_exit(int flag); + +/* + * Get CPU idle notification context corresponding to current CPU. + */ +extern cpu_idle_callback_context_t cpu_idle_get_context(void); + +/* + * Prototype of function called to update property value on demand. + * The callback should update property value corresponding to current CPU. + */ +typedef int (* cpu_idle_prop_update_t)(void *arg, uint64_t seqnum, + cpu_idle_prop_value_t *valp); + +/* + * Create a property with name and type. + * If parameter update is not NULL, it will be called on demand to update + * value of property corresponding to current CPU. + * If parameter update is NULL, provider should call cpu_idle_property_set + * to update property value for each CPU. + * Return zero on success with handle stored in hdlp, otherwise error code. + */ +extern int cpu_idle_prop_create_property(const char *name, + cpu_idle_prop_type_t type, cpu_idle_prop_update_t update, void *arg, + cpu_idle_prop_handle_t *hdlp); + +/* + * Destroy property corresponding to hdl. + * Return zero on success, otherwise error code. + */ +extern int cpu_idle_prop_destroy_property(cpu_idle_prop_handle_t hdl); + +/* + * Create handle for property with name 'name'. + * Return zero on success with handle stored in hdlp, otherwise error code. + */ +extern int cpu_idle_prop_create_handle(const char *name, + cpu_idle_prop_handle_t *hdlp); + +/* + * Destroy property handle. + * Return zero on success, otherwise error code. + */ +extern int cpu_idle_prop_destroy_handle(cpu_idle_prop_handle_t hdl); + +/* + * CPU idle property manipulation functions. + * All cpu_idle_prop_get/set_xxx functions with argument ctx should only be used + * to manipulate properties associated with current CPU. + * Context ctx shouldn't be passed to other CPUs to manipulate properties. + */ +extern cpu_idle_prop_type_t cpu_idle_prop_get_type(cpu_idle_prop_handle_t hdl); +extern const char *cpu_idle_prop_get_name(cpu_idle_prop_handle_t hdl); +extern int cpu_idle_prop_get_value(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx, cpu_idle_prop_value_t *valp); +extern uint32_t cpu_idle_prop_get_uint32(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx); +extern uint64_t cpu_idle_prop_get_uint64(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx); +extern intptr_t cpu_idle_prop_get_intptr(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx); +extern hrtime_t cpu_idle_prop_get_hrtime(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx); +extern void cpu_idle_prop_set_value(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx, cpu_idle_prop_value_t val); +extern void cpu_idle_prop_set_all(cpu_idle_prop_handle_t hdl, + cpu_idle_prop_value_t val); + +extern uint_t cpu_idle_get_cpu_state(cpu_t *cp); + +extern void cpu_event_init(void); +extern void cpu_event_init_cpu(cpu_t *cp); +extern void cpu_event_fini_cpu(cpu_t *cp); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_CPU_EVENT_H */ diff --git a/usr/src/uts/i86pc/os/cpupm/cpu_idle.c b/usr/src/uts/i86pc/os/cpupm/cpu_idle.c index 3cb7c3fac1..e8ff2ad634 100644 --- a/usr/src/uts/i86pc/os/cpupm/cpu_idle.c +++ b/usr/src/uts/i86pc/os/cpupm/cpu_idle.c @@ -36,6 +36,7 @@ #include <sys/cpu_acpi.h> #include <sys/cpu_idle.h> #include <sys/cpupm.h> +#include <sys/cpu_event.h> #include <sys/hpet.h> #include <sys/archsystm.h> #include <vm/hat_i86.h> @@ -253,6 +254,74 @@ cstate_wakeup(cpu_t *cp, int bound) } /* + * Function called by CPU idle notification framework to check whether CPU + * has been awakened. It will be called with interrupt disabled. + * If CPU has been awakened, call cpu_idle_exit() to notify CPU idle + * notification framework. + */ +static void +acpi_cpu_mwait_check_wakeup(void *arg) +{ + volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg; + + ASSERT(arg != NULL); + if (*mcpu_mwait != MWAIT_HALTED) { + /* + * CPU has been awakened, notify CPU idle notification system. + */ + cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); + } else { + /* + * Toggle interrupt flag to detect pending interrupts. + * If interrupt happened, do_interrupt() will notify CPU idle + * notification framework so no need to call cpu_idle_exit() + * here. + */ + sti(); + SMT_PAUSE(); + cli(); + } +} + +static void +acpi_cpu_mwait_ipi_check_wakeup(void *arg) +{ + volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg; + + ASSERT(arg != NULL); + if (*mcpu_mwait != MWAIT_WAKEUP_IPI) { + /* + * CPU has been awakened, notify CPU idle notification system. + */ + cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); + } else { + /* + * Toggle interrupt flag to detect pending interrupts. + * If interrupt happened, do_interrupt() will notify CPU idle + * notification framework so no need to call cpu_idle_exit() + * here. + */ + sti(); + SMT_PAUSE(); + cli(); + } +} + +/*ARGSUSED*/ +static void +acpi_cpu_check_wakeup(void *arg) +{ + /* + * Toggle interrupt flag to detect pending interrupts. + * If interrupt happened, do_interrupt() will notify CPU idle + * notification framework so no need to call cpu_idle_exit() here. + */ + sti(); + SMT_PAUSE(); + cli(); +} + +/* * enter deep c-state handler */ static void @@ -267,6 +336,7 @@ acpi_cpu_cstate(cpu_acpi_cstate_t *cstate) uint32_t cs_type = cstate->cs_type; int hset_update = 1; boolean_t using_timer; + cpu_idle_check_wakeup_t check_func = &acpi_cpu_check_wakeup; /* * Set our mcpu_mwait here, so we can tell if anyone tries to @@ -274,10 +344,13 @@ acpi_cpu_cstate(cpu_acpi_cstate_t *cstate) * attempt to set our mcpu_mwait until we add ourself to the haltset. */ if (mcpu_mwait) { - if (type == ACPI_ADR_SPACE_SYSTEM_IO) + if (type == ACPI_ADR_SPACE_SYSTEM_IO) { *mcpu_mwait = MWAIT_WAKEUP_IPI; - else + check_func = &acpi_cpu_mwait_ipi_check_wakeup; + } else { *mcpu_mwait = MWAIT_HALTED; + check_func = &acpi_cpu_mwait_check_wakeup; + } } /* @@ -397,13 +470,14 @@ acpi_cpu_cstate(cpu_acpi_cstate_t *cstate) */ i86_monitor(mcpu_mwait, 0, 0); if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == MWAIT_HALTED) { - cpu_dtrace_idle_probe(CPU_ACPI_C1); - - tlb_going_idle(); - i86_mwait(0, 0); - tlb_service(); - - cpu_dtrace_idle_probe(CPU_ACPI_C0); + if (cpu_idle_enter(IDLE_STATE_C1, 0, + check_func, (void *)mcpu_mwait) == 0) { + if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == + MWAIT_HALTED) { + i86_mwait(0, 0); + } + cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); + } } /* @@ -416,8 +490,6 @@ acpi_cpu_cstate(cpu_acpi_cstate_t *cstate) return; } - cpu_dtrace_idle_probe((uint_t)cs_type); - if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) { /* * We're on our way to being halted. @@ -426,25 +498,31 @@ acpi_cpu_cstate(cpu_acpi_cstate_t *cstate) */ i86_monitor(mcpu_mwait, 0, 0); if (*mcpu_mwait == MWAIT_HALTED) { - uint32_t eax = cstate->cs_address; - uint32_t ecx = 1; - - tlb_going_idle(); - i86_mwait(eax, ecx); - tlb_service(); + if (cpu_idle_enter((uint_t)cs_type, 0, + check_func, (void *)mcpu_mwait) == 0) { + if (*mcpu_mwait == MWAIT_HALTED) { + i86_mwait(cstate->cs_address, 1); + } + cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); + } } } else if (type == ACPI_ADR_SPACE_SYSTEM_IO) { uint32_t value; ACPI_TABLE_FADT *gbl_FADT; if (*mcpu_mwait == MWAIT_WAKEUP_IPI) { - tlb_going_idle(); - (void) cpu_acpi_read_port(cstate->cs_address, - &value, 8); - acpica_get_global_FADT(&gbl_FADT); - (void) cpu_acpi_read_port( - gbl_FADT->XPmTimerBlock.Address, &value, 32); - tlb_service(); + if (cpu_idle_enter((uint_t)cs_type, 0, + check_func, (void *)mcpu_mwait) == 0) { + if (*mcpu_mwait == MWAIT_WAKEUP_IPI) { + (void) cpu_acpi_read_port( + cstate->cs_address, &value, 8); + acpica_get_global_FADT(&gbl_FADT); + (void) cpu_acpi_read_port( + gbl_FADT->XPmTimerBlock.Address, + &value, 32); + } + cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); + } } } @@ -455,8 +533,6 @@ acpi_cpu_cstate(cpu_acpi_cstate_t *cstate) (void) cstate_use_timer(&lapic_expire, CSTATE_USING_LAT); sti(); - cpu_dtrace_idle_probe(CPU_ACPI_C0); - /* * We're no longer halted */ diff --git a/usr/src/uts/i86pc/os/intr.c b/usr/src/uts/i86pc/os/intr.c index 2f4b66ddf2..18968c0721 100644 --- a/usr/src/uts/i86pc/os/intr.c +++ b/usr/src/uts/i86pc/os/intr.c @@ -25,6 +25,7 @@ */ #include <sys/cpuvar.h> +#include <sys/cpu_event.h> #include <sys/regset.h> #include <sys/psw.h> #include <sys/types.h> @@ -76,7 +77,7 @@ ulong_t laststi[NCPU]; /* * This variable tracks the last place events were disabled on each cpu - * it assists in debugging when asserts that interupts are enabled trip. + * it assists in debugging when asserts that interrupts are enabled trip. */ ulong_t lastcli[NCPU]; @@ -931,12 +932,7 @@ do_interrupt(struct regs *rp, trap_trace_rec_t *ttp) ttp->ttr_vector = 0xff; #endif /* TRAPTRACE */ -#if !defined(__xpv) - /* - * Handle any pending TLB flushing - */ - tlb_service(); -#endif + cpu_idle_exit(CPU_IDLE_CB_FLAG_INTR); /* * If it's a softint go do it now. diff --git a/usr/src/uts/i86pc/os/mp_machdep.c b/usr/src/uts/i86pc/os/mp_machdep.c index ced26fb6a5..6c9cc3aec3 100644 --- a/usr/src/uts/i86pc/os/mp_machdep.c +++ b/usr/src/uts/i86pc/os/mp_machdep.c @@ -36,6 +36,7 @@ #include <sys/x86_archext.h> #include <sys/cpupart.h> #include <sys/cpuvar.h> +#include <sys/cpu_event.h> #include <sys/cmt.h> #include <sys/cpu.h> #include <sys/disp.h> @@ -370,18 +371,28 @@ cpu_idle_adaptive(void) (*CPU->cpu_m.mcpu_idle_cpu)(); } -void -cpu_dtrace_idle_probe(uint_t cstate) +/* + * Function called by CPU idle notification framework to check whether CPU + * has been awakened. It will be called with interrupt disabled. + * If CPU has been awakened, call cpu_idle_exit() to notify CPU idle + * notification framework. + */ +/*ARGSUSED*/ +static void +cpu_idle_check_wakeup(void *arg) { - cpu_t *cpup = CPU; - struct machcpu *mcpu = &(cpup->cpu_m); - - mcpu->curr_cstate = cstate; - DTRACE_PROBE1(idle__state__transition, uint_t, cstate); + /* + * Toggle interrupt flag to detect pending interrupts. + * If interrupt happened, do_interrupt() will notify CPU idle + * notification framework so no need to call cpu_idle_exit() here. + */ + sti(); + SMT_PAUSE(); + cli(); } /* - * Idle the present CPU until awoken via an interrupt + * Idle the present CPU until wakened via an interrupt */ void cpu_idle(void) @@ -407,7 +418,7 @@ cpu_idle(void) * * When a thread becomes runnable, it is placed on the queue * and then the halted CPU bitmap is checked to determine who - * (if anyone) should be awoken. We therefore need to first + * (if anyone) should be awakened. We therefore need to first * add ourselves to the bitmap, and and then check if there * is any work available. The order is important to prevent a race * that can lead to work languishing on a run queue somewhere while @@ -479,11 +490,11 @@ cpu_idle(void) return; } - cpu_dtrace_idle_probe(IDLE_STATE_C1); - - mach_cpu_idle(); - - cpu_dtrace_idle_probe(IDLE_STATE_C0); + if (cpu_idle_enter(IDLE_STATE_C1, 0, + cpu_idle_check_wakeup, NULL) == 0) { + mach_cpu_idle(); + cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); + } /* * We're no longer halted @@ -560,7 +571,37 @@ cpu_wakeup(cpu_t *cpu, int bound) #ifndef __xpv /* - * Idle the present CPU until awoken via touching its monitored line + * Function called by CPU idle notification framework to check whether CPU + * has been awakened. It will be called with interrupt disabled. + * If CPU has been awakened, call cpu_idle_exit() to notify CPU idle + * notification framework. + */ +static void +cpu_idle_mwait_check_wakeup(void *arg) +{ + volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg; + + ASSERT(arg != NULL); + if (*mcpu_mwait != MWAIT_HALTED) { + /* + * CPU has been awakened, notify CPU idle notification system. + */ + cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); + } else { + /* + * Toggle interrupt flag to detect pending interrupts. + * If interrupt happened, do_interrupt() will notify CPU idle + * notification framework so no need to call cpu_idle_exit() + * here. + */ + sti(); + SMT_PAUSE(); + cli(); + } +} + +/* + * Idle the present CPU until awakened via touching its monitored line */ void cpu_idle_mwait(void) @@ -632,13 +673,13 @@ cpu_idle_mwait(void) */ i86_monitor(mcpu_mwait, 0, 0); if (*mcpu_mwait == MWAIT_HALTED) { - cpu_dtrace_idle_probe(IDLE_STATE_C1); - - tlb_going_idle(); - i86_mwait(0, 0); - tlb_service(); - - cpu_dtrace_idle_probe(IDLE_STATE_C0); + if (cpu_idle_enter(IDLE_STATE_C1, 0, + cpu_idle_mwait_check_wakeup, (void *)mcpu_mwait) == 0) { + if (*mcpu_mwait == MWAIT_HALTED) { + i86_mwait(0, 0); + } + cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE); + } } /* @@ -803,7 +844,7 @@ mach_get_platform(int owner) /* * Save the version of the PSM module, in case we need to - * bahave differently based on version. + * behave differently based on version. */ mach_ver[0] = mach_ver[owner]; diff --git a/usr/src/uts/i86pc/os/mp_pc.c b/usr/src/uts/i86pc/os/mp_pc.c index ff880fa515..6fc571b445 100644 --- a/usr/src/uts/i86pc/os/mp_pc.c +++ b/usr/src/uts/i86pc/os/mp_pc.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Welcome to the world of the "real mode platter". * See also startup.c, mpcore.s and apic.c for related routines. @@ -106,7 +104,7 @@ mach_cpucontext_alloc(struct cpu *cp) /* * Allocate space for stack, tss, gdt and idt. We round the size - * alloated for cpu_tables up, so that the TSS is on a unique page. + * allotted for cpu_tables up, so that the TSS is on a unique page. * This is more efficient when running in virtual machines. */ ct = kmem_zalloc(P2ROUNDUP(sizeof (*ct), PAGESIZE), KM_SLEEP); @@ -257,9 +255,7 @@ mach_cpu_halt(char *msg) void mach_cpu_idle(void) { - tlb_going_idle(); i86_halt(); - tlb_service(); } void diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c index e8e2c24053..683f8942e4 100644 --- a/usr/src/uts/i86pc/os/mp_startup.c +++ b/usr/src/uts/i86pc/os/mp_startup.c @@ -854,7 +854,7 @@ workaround_errata(struct cpu *cpu) /*LINTED*/ if (cpuid_opteron_erratum(cpu, 109) > 0) do { /* - * Certain Reverse REP MOVS May Produce Unpredictable Behaviour + * Certain Reverse REP MOVS May Produce Unpredictable Behavior */ #if defined(OPTERON_ERRATUM_109) /* @@ -1470,6 +1470,7 @@ mp_startup(void) { struct cpu *cp = CPU; uint_t new_x86_feature; + extern void cpu_event_init_cpu(cpu_t *); #ifndef __xpv extern void cpupm_init(cpu_t *); #endif @@ -1556,7 +1557,7 @@ mp_startup(void) /* * We could be more sophisticated here, and just mark the CPU * as "faulted" but at this point we'll opt for the easier - * answer of dieing horribly. Provided the boot cpu is ok, + * answer of dying horribly. Provided the boot cpu is ok, * the system can be recovered by booting with use_mp set to zero. */ if (workaround_errata(cp) != 0) @@ -1591,7 +1592,7 @@ mp_startup(void) /* * Enable preemption here so that contention for any locks acquired * later in mp_startup may be preempted if the thread owning those - * locks is continously executing on other CPUs (for example, this + * locks is continuously executing on other CPUs (for example, this * CPU must be preemptible to allow other CPUs to pause it during their * startup phases). It's safe to enable preemption here because the * CPU state is pretty-much fully constructed. @@ -1602,6 +1603,7 @@ mp_startup(void) ASSERT(cp->cpu_base_spl == ipltospl(LOCK_LEVEL)); set_base_spl(); /* Restore the spl to its proper value */ + cpu_event_init_cpu(cp); #ifndef __xpv cpupm_init(cp); #endif diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c index 22a66482b2..ef6b28fdbc 100644 --- a/usr/src/uts/i86pc/os/startup.c +++ b/usr/src/uts/i86pc/os/startup.c @@ -2016,6 +2016,7 @@ startup_end(void) { int i; extern void setx86isalist(void); + extern void cpu_event_init(void); PRM_POINT("startup_end() starting..."); @@ -2031,6 +2032,11 @@ startup_end(void) */ kcpc_hw_init(CPU); + /* + * Initialize cpu event framework. + */ + cpu_event_init(); + #if defined(OPTERON_WORKAROUND_6323525) if (opteron_workaround_6323525) patch_workaround_6323525(); @@ -2125,6 +2131,7 @@ void post_startup(void) { extern void cpupm_init(cpu_t *); + extern void cpu_event_init_cpu(cpu_t *); /* * Set the system wide, processor-specific flags to be passed @@ -2184,6 +2191,7 @@ post_startup(void) maxmem = freemem; + cpu_event_init_cpu(CPU); cpupm_init(CPU); add_cpunode2devtree(CPU->cpu_id, CPU->cpu_m.mcpu_cpi); diff --git a/usr/src/uts/i86pc/sys/cpu_idle.h b/usr/src/uts/i86pc/sys/cpu_idle.h index f60a6e9d3c..1d922a1f8c 100644 --- a/usr/src/uts/i86pc/sys/cpu_idle.h +++ b/usr/src/uts/i86pc/sys/cpu_idle.h @@ -31,16 +31,17 @@ #define _CPUIDLE_H #include <sys/cpupm.h> +#include <sys/cpu.h> #ifdef __cplusplus extern "C" { #endif #define CPU_MAX_CSTATES 8 -#define CPU_ACPI_C0 0 -#define CPU_ACPI_C1 1 -#define CPU_ACPI_C2 2 -#define CPU_ACPI_C3 3 +#define CPU_ACPI_C0 IDLE_STATE_C0 +#define CPU_ACPI_C1 IDLE_STATE_C1 +#define CPU_ACPI_C2 IDLE_STATE_C2 +#define CPU_ACPI_C3 IDLE_STATE_C3 #define BM_CTL 0x1 #define BM_RLD 0x2 @@ -64,7 +65,6 @@ extern void cstate_wakeup(cpu_t *, int); extern boolean_t cpu_deep_cstates_supported(void); extern void cpu_wakeup(cpu_t *, int); extern void cpu_wakeup_mwait(cpu_t *, int); -extern void cpu_dtrace_idle_probe(uint_t); extern void cpuidle_manage_cstates(void *); extern boolean_t cstate_timer_callback(int code); diff --git a/usr/src/uts/i86pc/sys/machcpuvar.h b/usr/src/uts/i86pc/sys/machcpuvar.h index 50a5b98432..28b72f0a04 100644 --- a/usr/src/uts/i86pc/sys/machcpuvar.h +++ b/usr/src/uts/i86pc/sys/machcpuvar.h @@ -131,7 +131,6 @@ struct machcpu { void (*mcpu_idle_cpu)(void); /* idle function */ uint16_t mcpu_idle_type; /* CPU next idle type */ uint16_t max_cstates; /* supported max cstates */ - uint32_t curr_cstate; /* current cstate */ struct cpu_ucode_info *mcpu_ucode_info; diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c index 028518c894..732fe496a9 100644 --- a/usr/src/uts/i86pc/vm/hat_i86.c +++ b/usr/src/uts/i86pc/vm/hat_i86.c @@ -138,7 +138,7 @@ int enable_1gpg = 1; /* * AMD shanghai processors provide better management of 1gb ptes in its tlb. - * By default, 1g page suppport will be disabled for pre-shanghai AMD + * By default, 1g page support will be disabled for pre-shanghai AMD * processors that don't have optimal tlb support for the 1g page size. * chk_optimal_1gtlb can be set to 0 to force 1g page support on sub-optimal * processors. @@ -1299,7 +1299,7 @@ hati_pte_map( int rv = 0; /* - * Is this a consistant (ie. need mapping list lock) mapping? + * Is this a consistent (ie. need mapping list lock) mapping? */ is_consist = (pp != NULL && (flags & HAT_LOAD_NOCONSIST) == 0); @@ -1991,22 +1991,15 @@ tlb_going_idle(void) /* * Service a delayed TLB flush if coming out of being idle. + * It will be called from cpu idle notification with interrupt disabled. */ void tlb_service(void) { - ulong_t flags = getflags(); ulong_t tlb_info; ulong_t found; /* - * Be sure interrupts are off while doing this so that - * higher level interrupts correctly wait for flushes to finish. - */ - if (flags & PS_IE) - flags = intr_clear(); - - /* * We only have to do something if coming out of being idle. */ tlb_info = CPU->cpu_m.mcpu_tlb_info; @@ -2024,12 +2017,6 @@ tlb_service(void) if (tlb_info & TLB_INVAL_ALL) flush_all_tlb_entries(); } - - /* - * Restore interrupt enable control bit. - */ - if (flags & PS_IE) - sti(); } #endif /* !__xpv */ @@ -3178,7 +3165,7 @@ hat_reserve(struct as *as, caddr_t addr, size_t len) /* * Called when all mappings to a page should have write permission removed. - * Mostly stolem from hat_pagesync() + * Mostly stolen from hat_pagesync() */ static void hati_page_clrwrt(struct page *pp) @@ -3311,8 +3298,8 @@ hat_page_clrattr(struct page *pp, uint_t flag) /* * If flag is specified, returns 0 if attribute is disabled - * and non zero if enabled. If flag specifes multiple attributs - * then returns 0 if ALL atriibutes are disabled. This is an advisory + * and non zero if enabled. If flag specifes multiple attributes + * then returns 0 if ALL attributes are disabled. This is an advisory * call. */ uint_t diff --git a/usr/src/uts/intel/sys/cpu.h b/usr/src/uts/intel/sys/cpu.h index d62cb7692f..20f9e0290e 100644 --- a/usr/src/uts/intel/sys/cpu.h +++ b/usr/src/uts/intel/sys/cpu.h @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_CPU_H #define _SYS_CPU_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * WARNING: * This header file is Obsolete and may be deleted in a @@ -72,12 +70,15 @@ extern void i86_mwait(uint32_t data, uint32_t extensions); * C-state defines for the idle_state_transition DTrace probe * * The probe fires when the CPU undergoes an idle state change (e.g. C-state) - * The agument passed is the C-state to which the CPU is transitioning. + * The argument passed is the C-state to which the CPU is transitioning. * - * The states are defined here. + * These states will be shared by cpupm subsystem, so they should be kept in + * consistence with ACPI defined C states. */ #define IDLE_STATE_C0 0 #define IDLE_STATE_C1 1 +#define IDLE_STATE_C2 2 +#define IDLE_STATE_C3 3 #endif /* _KERNEL */ |