diff options
Diffstat (limited to 'usr/src/uts/common/os/cpu_event.c')
-rw-r--r-- | usr/src/uts/common/os/cpu_event.c | 1093 |
1 files changed, 1093 insertions, 0 deletions
diff --git a/usr/src/uts/common/os/cpu_event.c b/usr/src/uts/common/os/cpu_event.c new file mode 100644 index 0000000000..11162ccfc9 --- /dev/null +++ b/usr/src/uts/common/os/cpu_event.c @@ -0,0 +1,1093 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2009, Intel Corporation. + * All rights reserved. + */ + +/* + * Introduction + * This file implements a CPU event notification mechanism to signal clients + * which are interested in CPU related events. + * Currently it only supports CPU idle state change events which will be + * triggered just before CPU entering hardware idle state and just after CPU + * wakes up from hardware idle state. + * Please refer to PSARC/2009/115 for detail information. + * + * Lock Strategy + * 1) cpu_idle_prop_busy/free are protected by cpu_idle_prop_lock. + * 2) No protection for cpu_idle_cb_state because it's per-CPU data. + * 3) cpu_idle_cb_busy is protected by cpu_idle_cb_lock. + * 4) cpu_idle_cb_array is protected by pause_cpus/start_cpus logic. + * 5) cpu_idle_cb_max/curr are protected by both cpu_idle_cb_lock and + * pause_cpus/start_cpus logic. + * We have optimized the algorithm for hot path on read side access. + * In the current algorithm, it's lock free on read side access. + * On write side, we use pause_cpus() to keep other CPUs in the pause thread, + * which will guarantee that no other threads will access + * cpu_idle_cb_max/curr/array data structure. + */ + +#include <sys/types.h> +#include <sys/cmn_err.h> +#include <sys/cpuvar.h> +#include <sys/cpu.h> +#include <sys/kmem.h> +#include <sys/machcpuvar.h> +#include <sys/sdt.h> +#include <sys/sysmacros.h> +#include <sys/synch.h> +#include <sys/systm.h> +#include <sys/sunddi.h> +#if defined(__sparc) +#include <sys/machsystm.h> +#elif defined(__x86) +#include <sys/archsystm.h> +#endif +#include <sys/cpu_event.h> + +/* Define normal state for CPU on different platforms. */ +#if defined(__x86) +#define CPU_IDLE_STATE_NORMAL IDLE_STATE_C0 +#elif defined(__sparc) +/* + * At the time of this implementation IDLE_STATE_NORMAL is defined + * in mach_startup.c, and not in a header file. So if we find it is + * undefined, then we set it to the value as defined in mach_startup.c + * Should it eventually be defined, we will pick it up. + */ +#ifndef IDLE_STATE_NORMAL +#define IDLE_STATE_NORMAL 0 +#endif +#define CPU_IDLE_STATE_NORMAL IDLE_STATE_NORMAL +#endif + +/* + * To improve cache efficiency and avoid cache false sharing, CPU idle + * properties are grouped into cache lines as below: + * | CPU0 | CPU1 |.........| CPUn | + * | cache line 0 | cache line 1 |.........| cache line n | + * | v0 | ... | vm | v0 | ... | vm |.........| v0 | ... | vm | + * To access value of property m for CPU n, using following value as index: + * index = seq_id_of_CPUn * CPU_IDLE_VALUE_GROUP_SIZE + m. + */ +#define CPU_IDLE_VALUE_GROUP_SIZE \ + (CPU_CACHE_COHERENCE_SIZE / sizeof (cpu_idle_prop_value_t)) + +/* Get callback context handle for current CPU. */ +#define CPU_IDLE_GET_CTX(cp) \ + ((cpu_idle_callback_context_t)(intptr_t)((cp)->cpu_seqid)) + +/* Get CPU sequential id from ctx. */ +#define CPU_IDLE_CTX2CPUID(ctx) ((processorid_t)(intptr_t)(ctx)) + +/* Compute index from callback context handle. */ +#define CPU_IDLE_CTX2IDX(ctx) \ + (((int)(intptr_t)(ctx)) * CPU_IDLE_VALUE_GROUP_SIZE) + +#define CPU_IDLE_HDL2VALP(hdl, idx) \ + (&((cpu_idle_prop_impl_t *)(hdl))->value[(idx)]) + +/* + * When cpu_idle_cb_array is NULL or full, increase CPU_IDLE_ARRAY_CAPACITY_INC + * entries every time. Here we prefer linear growth instead of exponential. + */ +#define CPU_IDLE_ARRAY_CAPACITY_INC 0x10 + +typedef struct cpu_idle_prop_impl { + cpu_idle_prop_value_t *value; + struct cpu_idle_prop_impl *next; + char *name; + cpu_idle_prop_update_t update; + void *private; + cpu_idle_prop_type_t type; + uint32_t refcnt; +} cpu_idle_prop_impl_t; + +typedef struct cpu_idle_prop_item { + cpu_idle_prop_type_t type; + char *name; + cpu_idle_prop_update_t update; + void *arg; + cpu_idle_prop_handle_t handle; +} cpu_idle_prop_item_t; + +/* Structure to maintain registered callbacks in list. */ +typedef struct cpu_idle_cb_impl { + struct cpu_idle_cb_impl *next; + cpu_idle_callback_t *callback; + void *argument; + int priority; +} cpu_idle_cb_impl_t; + +/* + * Structure to maintain registered callbacks in priority order and also + * optimized for cache efficiency for reading access. + */ +typedef struct cpu_idle_cb_item { + cpu_idle_enter_cbfn_t enter; + cpu_idle_exit_cbfn_t exit; + void *arg; + cpu_idle_cb_impl_t *impl; +} cpu_idle_cb_item_t; + +/* Per-CPU state aligned to CPU_CACHE_COHERENCE_SIZE to avoid false sharing. */ +typedef union cpu_idle_cb_state { + struct { + int index; + boolean_t ready; + cpu_idle_prop_value_t *idle_state; + cpu_idle_prop_value_t *enter_ts; + cpu_idle_prop_value_t *exit_ts; + cpu_idle_prop_value_t *last_idle; + cpu_idle_prop_value_t *last_busy; + cpu_idle_prop_value_t *total_idle; + cpu_idle_prop_value_t *total_busy; + cpu_idle_prop_value_t *intr_cnt; + } v; +#ifdef _LP64 + char align[2 * CPU_CACHE_COHERENCE_SIZE]; +#else + char align[CPU_CACHE_COHERENCE_SIZE]; +#endif +} cpu_idle_cb_state_t; + +static kmutex_t cpu_idle_prop_lock; +static cpu_idle_prop_impl_t *cpu_idle_prop_busy = NULL; +static cpu_idle_prop_impl_t *cpu_idle_prop_free = NULL; + +static kmutex_t cpu_idle_cb_lock; +static cpu_idle_cb_impl_t *cpu_idle_cb_busy = NULL; +static cpu_idle_cb_item_t *cpu_idle_cb_array = NULL; +static int cpu_idle_cb_curr = 0; +static int cpu_idle_cb_max = 0; + +static cpu_idle_cb_state_t *cpu_idle_cb_state; + +static int cpu_idle_prop_update_intr_cnt(void *arg, uint64_t seqnum, + cpu_idle_prop_value_t *valp); + +static cpu_idle_prop_item_t cpu_idle_prop_array[] = { + { + CPU_IDLE_PROP_TYPE_INTPTR, CPU_IDLE_PROP_IDLE_STATE, + NULL, NULL, NULL + }, + { + CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_ENTER_TIMESTAMP, + NULL, NULL, NULL + }, + { + CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_EXIT_TIMESTAMP, + NULL, NULL, NULL + }, + { + CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_LAST_IDLE_TIME, + NULL, NULL, NULL + }, + { + CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_LAST_BUSY_TIME, + NULL, NULL, NULL + }, + { + CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_TOTAL_IDLE_TIME, + NULL, NULL, NULL + }, + { + CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_TOTAL_BUSY_TIME, + NULL, NULL, NULL + }, + { + CPU_IDLE_PROP_TYPE_UINT64, CPU_IDLE_PROP_INTERRUPT_COUNT, + cpu_idle_prop_update_intr_cnt, NULL, NULL + }, +}; + +#define CPU_IDLE_PROP_IDX_IDLE_STATE 0 +#define CPU_IDLE_PROP_IDX_ENTER_TS 1 +#define CPU_IDLE_PROP_IDX_EXIT_TS 2 +#define CPU_IDLE_PROP_IDX_LAST_IDLE 3 +#define CPU_IDLE_PROP_IDX_LAST_BUSY 4 +#define CPU_IDLE_PROP_IDX_TOTAL_IDLE 5 +#define CPU_IDLE_PROP_IDX_TOTAL_BUSY 6 +#define CPU_IDLE_PROP_IDX_INTR_CNT 7 + +/*ARGSUSED*/ +static void +cpu_idle_dtrace_enter(void *arg, cpu_idle_callback_context_t ctx, + cpu_idle_check_wakeup_t check_func, void *check_arg) +{ + int state; + + state = cpu_idle_prop_get_intptr( + cpu_idle_prop_array[CPU_IDLE_PROP_IDX_IDLE_STATE].handle, ctx); + DTRACE_PROBE1(idle__state__transition, uint_t, state); +} + +/*ARGSUSED*/ +static void +cpu_idle_dtrace_exit(void *arg, cpu_idle_callback_context_t ctx, int flag) +{ + DTRACE_PROBE1(idle__state__transition, uint_t, CPU_IDLE_STATE_NORMAL); +} + +static cpu_idle_callback_handle_t cpu_idle_cb_handle_dtrace; +static cpu_idle_callback_t cpu_idle_callback_dtrace = { + CPU_IDLE_CALLBACK_VERS, + cpu_idle_dtrace_enter, + cpu_idle_dtrace_exit, +}; + +#if defined(__x86) && !defined(__xpv) +extern void tlb_going_idle(void); +extern void tlb_service(void); + +static cpu_idle_callback_handle_t cpu_idle_cb_handle_tlb; +static cpu_idle_callback_t cpu_idle_callback_tlb = { + CPU_IDLE_CALLBACK_VERS, + (cpu_idle_enter_cbfn_t)tlb_going_idle, + (cpu_idle_exit_cbfn_t)tlb_service, +}; +#endif + +void +cpu_event_init(void) +{ + int i, idx; + size_t sz; + intptr_t buf; + cpu_idle_cb_state_t *sp; + cpu_idle_prop_item_t *ip; + + mutex_init(&cpu_idle_cb_lock, NULL, MUTEX_DRIVER, NULL); + mutex_init(&cpu_idle_prop_lock, NULL, MUTEX_DRIVER, NULL); + + /* Create internal properties. */ + for (i = 0, ip = cpu_idle_prop_array; + i < sizeof (cpu_idle_prop_array) / sizeof (cpu_idle_prop_array[0]); + i++, ip++) { + (void) cpu_idle_prop_create_property(ip->name, ip->type, + ip->update, ip->arg, &ip->handle); + ASSERT(ip->handle != NULL); + } + + /* Allocate buffer and align to CPU_CACHE_COHERENCE_SIZE. */ + sz = sizeof (cpu_idle_cb_state_t) * max_ncpus; + sz += CPU_CACHE_COHERENCE_SIZE; + buf = (intptr_t)kmem_zalloc(sz, KM_SLEEP); + cpu_idle_cb_state = (cpu_idle_cb_state_t *)P2ROUNDUP(buf, + CPU_CACHE_COHERENCE_SIZE); + + /* Cache frequently used property value pointers. */ + for (sp = cpu_idle_cb_state, i = 0; i < max_ncpus; i++, sp++) { + idx = CPU_IDLE_CTX2IDX(i); +#define ___INIT_P(f, i) \ + sp->v.f = CPU_IDLE_HDL2VALP(cpu_idle_prop_array[(i)].handle, idx) + ___INIT_P(idle_state, CPU_IDLE_PROP_IDX_IDLE_STATE); + ___INIT_P(enter_ts, CPU_IDLE_PROP_IDX_ENTER_TS); + ___INIT_P(exit_ts, CPU_IDLE_PROP_IDX_EXIT_TS); + ___INIT_P(last_idle, CPU_IDLE_PROP_IDX_LAST_IDLE); + ___INIT_P(last_busy, CPU_IDLE_PROP_IDX_LAST_BUSY); + ___INIT_P(total_idle, CPU_IDLE_PROP_IDX_TOTAL_IDLE); + ___INIT_P(total_busy, CPU_IDLE_PROP_IDX_TOTAL_BUSY); + ___INIT_P(last_idle, CPU_IDLE_PROP_IDX_INTR_CNT); +#undef ___INIT_P + } + + /* Register built-in callbacks. */ + if (cpu_idle_register_callback(CPU_IDLE_CB_PRIO_DTRACE, + &cpu_idle_callback_dtrace, NULL, &cpu_idle_cb_handle_dtrace) != 0) { + cmn_err(CE_PANIC, + "cpu_idle: failed to register callback for dtrace."); + } +#if defined(__x86) && !defined(__xpv) + if (cpu_idle_register_callback(CPU_IDLE_CB_PRIO_TLB, + &cpu_idle_callback_tlb, NULL, &cpu_idle_cb_handle_tlb) != 0) { + cmn_err(CE_PANIC, + "cpu_idle: failed to register callback for tlb_flush."); + } +#endif +} + +void +cpu_event_init_cpu(cpu_t *cp) +{ + ASSERT(cp->cpu_seqid < max_ncpus); + cpu_idle_cb_state[cp->cpu_seqid].v.ready = B_FALSE; +} + +void +cpu_event_fini_cpu(cpu_t *cp) +{ + ASSERT(cp->cpu_seqid < max_ncpus); + cpu_idle_cb_state[cp->cpu_seqid].v.ready = B_FALSE; +} + +static void +cpu_idle_insert_callback(cpu_idle_cb_impl_t *cip) +{ + int unlock = 0, unpause = 0; + int i, cnt_new = 0, cnt_old = 0; + char *buf_new = NULL, *buf_old = NULL; + + ASSERT(MUTEX_HELD(&cpu_idle_cb_lock)); + + /* + * Expand array if it's full. + * Memory must be allocated out of pause/start_cpus() scope because + * kmem_zalloc() can't be called with KM_SLEEP flag within that scope. + */ + if (cpu_idle_cb_curr == cpu_idle_cb_max) { + cnt_new = cpu_idle_cb_max + CPU_IDLE_ARRAY_CAPACITY_INC; + buf_new = (char *)kmem_zalloc(cnt_new * + sizeof (cpu_idle_cb_item_t), KM_SLEEP); + } + + /* Try to acquire cpu_lock if not held yet. */ + if (!MUTEX_HELD(&cpu_lock)) { + mutex_enter(&cpu_lock); + unlock = 1; + } + /* + * Pause all other CPUs (and let them run pause thread). + * It's guaranteed that no other threads will access cpu_idle_cb_array + * after pause_cpus(). + */ + if (!cpus_paused()) { + pause_cpus(NULL); + unpause = 1; + } + + /* Copy content to new buffer if needed. */ + if (buf_new != NULL) { + buf_old = (char *)cpu_idle_cb_array; + cnt_old = cpu_idle_cb_max; + if (buf_old != NULL) { + ASSERT(cnt_old != 0); + bcopy(cpu_idle_cb_array, buf_new, + sizeof (cpu_idle_cb_item_t) * cnt_old); + } + cpu_idle_cb_array = (cpu_idle_cb_item_t *)buf_new; + cpu_idle_cb_max = cnt_new; + } + + /* Insert into array according to priority. */ + ASSERT(cpu_idle_cb_curr < cpu_idle_cb_max); + for (i = cpu_idle_cb_curr; i > 0; i--) { + if (cpu_idle_cb_array[i - 1].impl->priority >= cip->priority) { + break; + } + cpu_idle_cb_array[i] = cpu_idle_cb_array[i - 1]; + } + cpu_idle_cb_array[i].arg = cip->argument; + cpu_idle_cb_array[i].enter = cip->callback->idle_enter; + cpu_idle_cb_array[i].exit = cip->callback->idle_exit; + cpu_idle_cb_array[i].impl = cip; + cpu_idle_cb_curr++; + + /* Resume other CPUs from paused state if needed. */ + if (unpause) { + start_cpus(); + } + if (unlock) { + mutex_exit(&cpu_lock); + } + + /* Free old resource if needed. */ + if (buf_old != NULL) { + ASSERT(cnt_old != 0); + kmem_free(buf_old, cnt_old * sizeof (cpu_idle_cb_item_t)); + } +} + +static void +cpu_idle_remove_callback(cpu_idle_cb_impl_t *cip) +{ + int i, found = 0; + int unlock = 0, unpause = 0; + cpu_idle_cb_state_t *sp; + + ASSERT(MUTEX_HELD(&cpu_idle_cb_lock)); + + /* Try to acquire cpu_lock if not held yet. */ + if (!MUTEX_HELD(&cpu_lock)) { + mutex_enter(&cpu_lock); + unlock = 1; + } + /* + * Pause all other CPUs. + * It's guaranteed that no other threads will access cpu_idle_cb_array + * after pause_cpus(). + */ + if (!cpus_paused()) { + pause_cpus(NULL); + unpause = 1; + } + + /* Remove cip from array. */ + for (i = 0; i < cpu_idle_cb_curr; i++) { + if (found == 0) { + if (cpu_idle_cb_array[i].impl == cip) { + found = 1; + } + } else { + cpu_idle_cb_array[i - 1] = cpu_idle_cb_array[i]; + } + } + ASSERT(found != 0); + cpu_idle_cb_curr--; + + /* + * Reset property ready flag for all CPUs if no registered callback + * left because cpu_idle_enter/exit will stop updating property if + * there's no callback registered. + */ + if (cpu_idle_cb_curr == 0) { + for (sp = cpu_idle_cb_state, i = 0; i < max_ncpus; i++, sp++) { + sp->v.ready = B_FALSE; + } + } + + /* Resume other CPUs from paused state if needed. */ + if (unpause) { + start_cpus(); + } + if (unlock) { + mutex_exit(&cpu_lock); + } +} + +int +cpu_idle_register_callback(uint_t prio, cpu_idle_callback_t *cbp, + void *arg, cpu_idle_callback_handle_t *hdlp) +{ + cpu_idle_cb_state_t *sp; + cpu_idle_cb_impl_t *cip = NULL; + + /* First validate parameters. */ + ASSERT(!CPU_ON_INTR(CPU)); + ASSERT(CPU->cpu_seqid < max_ncpus); + sp = &cpu_idle_cb_state[CPU->cpu_seqid]; + if (sp->v.index != 0) { + cmn_err(CE_NOTE, + "!cpu_event: register_callback called from callback."); + return (EBUSY); + } else if (cbp == NULL || hdlp == NULL) { + cmn_err(CE_NOTE, + "!cpu_event: NULL parameters in register_callback."); + return (EINVAL); + } else if (prio < CPU_IDLE_CB_PRIO_LOW_BASE || + prio >= CPU_IDLE_CB_PRIO_RESV_BASE) { + cmn_err(CE_NOTE, + "!cpu_event: priority 0x%x out of range.", prio); + return (EINVAL); + } else if (cbp->version != CPU_IDLE_CALLBACK_VERS) { + cmn_err(CE_NOTE, + "!cpu_event: callback version %d is not supported.", + cbp->version); + return (EINVAL); + } + + mutex_enter(&cpu_idle_cb_lock); + /* Check whether callback with priority exists if not dynamic. */ + if (prio != CPU_IDLE_CB_PRIO_DYNAMIC) { + for (cip = cpu_idle_cb_busy; cip != NULL; + cip = cip->next) { + if (cip->priority == prio) { + mutex_exit(&cpu_idle_cb_lock); + cmn_err(CE_NOTE, "!cpu_event: callback with " + "priority 0x%x already exists.", prio); + return (EEXIST); + } + } + } + + cip = kmem_zalloc(sizeof (*cip), KM_SLEEP); + cip->callback = cbp; + cip->argument = arg; + cip->priority = prio; + cip->next = cpu_idle_cb_busy; + cpu_idle_cb_busy = cip; + cpu_idle_insert_callback(cip); + mutex_exit(&cpu_idle_cb_lock); + + *hdlp = (cpu_idle_callback_handle_t)cip; + + return (0); +} + +int +cpu_idle_unregister_callback(cpu_idle_callback_handle_t hdl) +{ + int rc = ENODEV; + cpu_idle_cb_state_t *sp; + cpu_idle_cb_impl_t *ip, **ipp; + + ASSERT(!CPU_ON_INTR(CPU)); + ASSERT(CPU->cpu_seqid < max_ncpus); + sp = &cpu_idle_cb_state[CPU->cpu_seqid]; + if (sp->v.index != 0) { + cmn_err(CE_NOTE, + "!cpu_event: unregister_callback called from callback."); + return (EBUSY); + } else if (hdl == NULL) { + cmn_err(CE_NOTE, + "!cpu_event: hdl is NULL in unregister_callback."); + return (EINVAL); + } + + ip = (cpu_idle_cb_impl_t *)hdl; + mutex_enter(&cpu_idle_cb_lock); + for (ipp = &cpu_idle_cb_busy; *ipp != NULL; ipp = &(*ipp)->next) { + if (*ipp == ip) { + *ipp = ip->next; + cpu_idle_remove_callback(ip); + rc = 0; + break; + } + } + mutex_exit(&cpu_idle_cb_lock); + + if (rc == 0) { + kmem_free(ip, sizeof (*ip)); + } else { + cmn_err(CE_NOTE, + "!cpu_event: callback handle %p not found.", (void *)hdl); + } + + return (rc); +} + +static int +cpu_idle_enter_state(cpu_idle_cb_state_t *sp, intptr_t state) +{ + sp->v.idle_state->cipv_intptr = state; + sp->v.enter_ts->cipv_hrtime = gethrtime_unscaled(); + sp->v.last_busy->cipv_hrtime = sp->v.enter_ts->cipv_hrtime - + sp->v.exit_ts->cipv_hrtime; + sp->v.total_busy->cipv_hrtime += sp->v.last_busy->cipv_hrtime; + if (sp->v.ready == B_FALSE) { + sp->v.ready = B_TRUE; + return (0); + } + + return (1); +} + +static void +cpu_idle_exit_state(cpu_idle_cb_state_t *sp) +{ + sp->v.idle_state->cipv_intptr = CPU_IDLE_STATE_NORMAL; + sp->v.exit_ts->cipv_hrtime = gethrtime_unscaled(); + sp->v.last_idle->cipv_hrtime = sp->v.exit_ts->cipv_hrtime - + sp->v.enter_ts->cipv_hrtime; + sp->v.total_idle->cipv_hrtime += sp->v.last_idle->cipv_hrtime; +} + +/*ARGSUSED*/ +int +cpu_idle_enter(int state, int flag, + cpu_idle_check_wakeup_t check_func, void *check_arg) +{ + int i; + cpu_idle_cb_item_t *cip; + cpu_idle_cb_state_t *sp; + cpu_idle_callback_context_t ctx; +#if defined(__x86) + ulong_t iflags; +#endif + + ctx = CPU_IDLE_GET_CTX(CPU); + ASSERT(CPU->cpu_seqid < max_ncpus); + sp = &cpu_idle_cb_state[CPU->cpu_seqid]; + ASSERT(sp->v.index == 0); + + /* + * On x86, cpu_idle_enter can be called from idle thread with either + * interrupts enabled or disabled, so we need to make sure interrupts + * are disabled here. + * On SPARC, cpu_idle_enter will be called from idle thread with + * interrupt disabled, so no special handling necessary. + */ +#if defined(__x86) + iflags = intr_clear(); +#endif + + /* Skip calling callback if state is not ready for current CPU. */ + if (cpu_idle_enter_state(sp, state) == 0) { +#if defined(__x86) + intr_restore(iflags); +#endif + return (0); + } + + for (i = 0, cip = cpu_idle_cb_array; i < cpu_idle_cb_curr; i++, cip++) { + /* + * Increase index so corresponding idle_exit callback + * will be invoked should interrupt happen during + * idle_enter callback. + */ + sp->v.index++; + + /* Call idle_enter callback function if it's not NULL. */ + if (cip->enter != NULL) { + cip->enter(cip->arg, ctx, check_func, check_arg); + + /* + * cpu_idle_enter runs with interrupts + * disabled, so the idle_enter callbacks will + * also be called with interrupts disabled. + * It is permissible for the callbacks to + * enable the interrupts, if they can also + * handle the condition if the interrupt + * occurs. + * + * However, if an interrupt occurs and we + * return here without dealing with it, we + * return to the cpu_idle_enter() caller + * with an EBUSY, and the caller will not + * enter the idle state. + * + * We detect the interrupt, by checking the + * index value of the state pointer. If it + * is not the index we incremented above, + * then it was cleared while processing + * the interrupt. + * + * Also note, that at this point of the code + * the normal index value will be one greater + * than the variable 'i' in the loop, as it + * hasn't yet been incremented. + */ + if (sp->v.index != i + 1) { +#if defined(__x86) + intr_restore(iflags); +#endif + return (EBUSY); + } + } + } +#if defined(__x86) + intr_restore(iflags); +#endif + + return (0); +} + +void +cpu_idle_exit(int flag) +{ + int i; + cpu_idle_cb_item_t *cip; + cpu_idle_cb_state_t *sp; + cpu_idle_callback_context_t ctx; +#if defined(__x86) + ulong_t iflags; +#endif + + ASSERT(CPU->cpu_seqid < max_ncpus); + sp = &cpu_idle_cb_state[CPU->cpu_seqid]; + +#if defined(__sparc) + /* + * On SPARC, cpu_idle_exit will only be called from idle thread + * with interrupt disabled. + */ + + if (sp->v.index != 0) { + ctx = CPU_IDLE_GET_CTX(CPU); + cpu_idle_exit_state(sp); + for (i = sp->v.index - 1; i >= 0; i--) { + cip = &cpu_idle_cb_array[i]; + if (cip->exit != NULL) { + cip->exit(cip->arg, ctx, flag); + } + } + sp->v.index = 0; + } +#elif defined(__x86) + /* + * On x86, cpu_idle_exit will be called from idle thread or interrupt + * handler. When called from interrupt handler, interrupts will be + * disabled. When called from idle thread, interrupts may be disabled + * or enabled. + */ + + /* Called from interrupt, interrupts are already disabled. */ + if (flag & CPU_IDLE_CB_FLAG_INTR) { + /* + * return if cpu_idle_exit already called or + * there is no registered callback. + */ + if (sp->v.index == 0) { + return; + } + ctx = CPU_IDLE_GET_CTX(CPU); + cpu_idle_exit_state(sp); + for (i = sp->v.index - 1; i >= 0; i--) { + cip = &cpu_idle_cb_array[i]; + if (cip->exit != NULL) { + cip->exit(cip->arg, ctx, flag); + } + } + sp->v.index = 0; + + /* Called from idle thread, need to disable interrupt. */ + } else { + iflags = intr_clear(); + if (sp->v.index != 0) { + ctx = CPU_IDLE_GET_CTX(CPU); + cpu_idle_exit_state(sp); + for (i = sp->v.index - 1; i >= 0; i--) { + cip = &cpu_idle_cb_array[i]; + if (cip->exit != NULL) { + cip->exit(cip->arg, ctx, flag); + } + } + sp->v.index = 0; + } + intr_restore(iflags); + } +#endif +} + +cpu_idle_callback_context_t +cpu_idle_get_context(void) +{ + return (CPU_IDLE_GET_CTX(CPU)); +} + +/* + * Allocate property structure in group of CPU_IDLE_VALUE_GROUP_SIZE to improve + * cache efficiency. To simplify implementation, allocated memory for property + * structure won't be freed. + */ +static void +cpu_idle_prop_allocate_impl(void) +{ + int i; + size_t sz; + intptr_t buf; + cpu_idle_prop_impl_t *prop; + cpu_idle_prop_value_t *valp; + + ASSERT(!CPU_ON_INTR(CPU)); + prop = kmem_zalloc(sizeof (*prop) * CPU_IDLE_VALUE_GROUP_SIZE, + KM_SLEEP); + sz = sizeof (*valp) * CPU_IDLE_VALUE_GROUP_SIZE * max_ncpus; + sz += CPU_CACHE_COHERENCE_SIZE; + buf = (intptr_t)kmem_zalloc(sz, KM_SLEEP); + valp = (cpu_idle_prop_value_t *)P2ROUNDUP(buf, + CPU_CACHE_COHERENCE_SIZE); + + for (i = 0; i < CPU_IDLE_VALUE_GROUP_SIZE; i++, prop++, valp++) { + prop->value = valp; + prop->next = cpu_idle_prop_free; + cpu_idle_prop_free = prop; + } +} + +int +cpu_idle_prop_create_property(const char *name, cpu_idle_prop_type_t type, + cpu_idle_prop_update_t update, void *arg, cpu_idle_prop_handle_t *hdlp) +{ + int rc = EEXIST; + cpu_idle_prop_impl_t *prop; + + ASSERT(!CPU_ON_INTR(CPU)); + if (name == NULL || hdlp == NULL) { + cmn_err(CE_WARN, + "!cpu_event: NULL parameters in create_property."); + return (EINVAL); + } + + mutex_enter(&cpu_idle_prop_lock); + for (prop = cpu_idle_prop_busy; prop != NULL; prop = prop->next) { + if (strcmp(prop->name, name) == 0) { + cmn_err(CE_NOTE, + "!cpu_event: property %s already exists.", name); + break; + } + } + if (prop == NULL) { + if (cpu_idle_prop_free == NULL) { + cpu_idle_prop_allocate_impl(); + } + ASSERT(cpu_idle_prop_free != NULL); + prop = cpu_idle_prop_free; + cpu_idle_prop_free = prop->next; + prop->next = cpu_idle_prop_busy; + cpu_idle_prop_busy = prop; + + ASSERT(prop->value != NULL); + prop->name = strdup(name); + prop->type = type; + prop->update = update; + prop->private = arg; + prop->refcnt = 1; + *hdlp = prop; + rc = 0; + } + mutex_exit(&cpu_idle_prop_lock); + + return (rc); +} + +int +cpu_idle_prop_destroy_property(cpu_idle_prop_handle_t hdl) +{ + int rc = ENODEV; + cpu_idle_prop_impl_t *prop, **propp; + cpu_idle_prop_value_t *valp; + + ASSERT(!CPU_ON_INTR(CPU)); + if (hdl == NULL) { + cmn_err(CE_WARN, + "!cpu_event: hdl is NULL in destroy_property."); + return (EINVAL); + } + + prop = (cpu_idle_prop_impl_t *)hdl; + mutex_enter(&cpu_idle_prop_lock); + for (propp = &cpu_idle_prop_busy; *propp != NULL; + propp = &(*propp)->next) { + if (*propp == prop) { + ASSERT(prop->refcnt > 0); + if (atomic_cas_32(&prop->refcnt, 1, 0) == 1) { + *propp = prop->next; + strfree(prop->name); + valp = prop->value; + bzero(prop, sizeof (*prop)); + prop->value = valp; + prop->next = cpu_idle_prop_free; + cpu_idle_prop_free = prop; + rc = 0; + } else { + rc = EBUSY; + } + break; + } + } + mutex_exit(&cpu_idle_prop_lock); + + return (rc); +} + +int +cpu_idle_prop_create_handle(const char *name, cpu_idle_prop_handle_t *hdlp) +{ + int rc = ENODEV; + cpu_idle_prop_impl_t *prop; + + ASSERT(!CPU_ON_INTR(CPU)); + if (name == NULL || hdlp == NULL) { + cmn_err(CE_WARN, + "!cpu_event: NULL parameters in create_handle."); + return (EINVAL); + } + + mutex_enter(&cpu_idle_prop_lock); + for (prop = cpu_idle_prop_busy; prop != NULL; prop = prop->next) { + if (strcmp(prop->name, name) == 0) { + /* Hold one refcount on object. */ + ASSERT(prop->refcnt > 0); + atomic_inc_32(&prop->refcnt); + *hdlp = (cpu_idle_prop_handle_t)prop; + rc = 0; + break; + } + } + mutex_exit(&cpu_idle_prop_lock); + + return (rc); +} + +int +cpu_idle_prop_destroy_handle(cpu_idle_prop_handle_t hdl) +{ + int rc = ENODEV; + cpu_idle_prop_impl_t *prop; + + ASSERT(!CPU_ON_INTR(CPU)); + if (hdl == NULL) { + cmn_err(CE_WARN, + "!cpu_event: hdl is NULL in destroy_handle."); + return (EINVAL); + } + + mutex_enter(&cpu_idle_prop_lock); + for (prop = cpu_idle_prop_busy; prop != NULL; prop = prop->next) { + if (prop == hdl) { + /* Release refcnt held in create_handle. */ + ASSERT(prop->refcnt > 1); + atomic_dec_32(&prop->refcnt); + rc = 0; + break; + } + } + mutex_exit(&cpu_idle_prop_lock); + + return (rc); +} + +cpu_idle_prop_type_t +cpu_idle_prop_get_type(cpu_idle_prop_handle_t hdl) +{ + ASSERT(hdl != NULL); + return (((cpu_idle_prop_impl_t *)hdl)->type); +} + +const char * +cpu_idle_prop_get_name(cpu_idle_prop_handle_t hdl) +{ + ASSERT(hdl != NULL); + return (((cpu_idle_prop_impl_t *)hdl)->name); +} + +int +cpu_idle_prop_get_value(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx, cpu_idle_prop_value_t *valp) +{ + int idx, rc = 0; + cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; + + ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); + if (hdl == NULL || valp == NULL) { + cmn_err(CE_NOTE, "!cpu_event: NULL parameters in prop_get."); + return (EINVAL); + } + idx = CPU_IDLE_CTX2IDX(ctx); + if (prop->update != NULL) { + cpu_idle_cb_state_t *sp; + + ASSERT(CPU->cpu_seqid < max_ncpus); + sp = &cpu_idle_cb_state[CPU->cpu_seqid]; + /* CPU's idle enter timestamp as sequence number. */ + rc = prop->update(prop->private, + (uint64_t)sp->v.enter_ts->cipv_hrtime, &prop->value[idx]); + } + if (rc == 0) { + *valp = prop->value[idx]; + } + + return (rc); +} + +uint32_t +cpu_idle_prop_get_uint32(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx) +{ + int idx; + cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; + + ASSERT(hdl != NULL); + ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); + idx = CPU_IDLE_CTX2IDX(ctx); + return (prop->value[idx].cipv_uint32); +} + +uint64_t +cpu_idle_prop_get_uint64(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx) +{ + int idx; + cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; + + ASSERT(hdl != NULL); + ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); + idx = CPU_IDLE_CTX2IDX(ctx); + return (prop->value[idx].cipv_uint64); +} + +intptr_t +cpu_idle_prop_get_intptr(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx) +{ + int idx; + cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; + + ASSERT(hdl != NULL); + ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); + idx = CPU_IDLE_CTX2IDX(ctx); + return (prop->value[idx].cipv_intptr); +} + +hrtime_t +cpu_idle_prop_get_hrtime(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx) +{ + int idx; + cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; + + ASSERT(hdl != NULL); + ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); + idx = CPU_IDLE_CTX2IDX(ctx); + return (prop->value[idx].cipv_hrtime); +} + +void +cpu_idle_prop_set_value(cpu_idle_prop_handle_t hdl, + cpu_idle_callback_context_t ctx, cpu_idle_prop_value_t val) +{ + int idx; + cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; + + ASSERT(hdl != NULL); + ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); + idx = CPU_IDLE_CTX2IDX(ctx); + prop->value[idx] = val; +} + +void +cpu_idle_prop_set_all(cpu_idle_prop_handle_t hdl, cpu_idle_prop_value_t val) +{ + int i, idx; + cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; + + ASSERT(hdl != NULL); + for (i = 0; i < max_ncpus; i++) { + idx = CPU_IDLE_CTX2IDX(i); + prop->value[idx] = val; + } +} + +/*ARGSUSED*/ +static int cpu_idle_prop_update_intr_cnt(void *arg, uint64_t seqnum, + cpu_idle_prop_value_t *valp) +{ + int i; + uint64_t val; + + for (val = 0, i = 0; i < PIL_MAX; i++) { + val += CPU->cpu_stats.sys.intr[i]; + } + valp->cipv_uint64 = val; + + return (0); +} + +uint_t +cpu_idle_get_cpu_state(cpu_t *cp) +{ + ASSERT(cp != NULL && cp->cpu_seqid < max_ncpus); + return ((uint_t)cpu_idle_prop_get_uint32( + cpu_idle_prop_array[CPU_IDLE_PROP_IDX_IDLE_STATE].handle, + CPU_IDLE_GET_CTX(cp))); +} |