diff options
| author | Eric Saxe <Eric.Saxe@Sun.COM> | 2009-02-25 21:04:18 -0800 |
|---|---|---|
| committer | Eric Saxe <Eric.Saxe@Sun.COM> | 2009-02-25 21:04:18 -0800 |
| commit | 0e7515250c8395f368aa45fb9acae7c4f8f8b786 (patch) | |
| tree | 5c3abde4ff53a950ad424ce362fd793369c06872 /usr/src/uts/common/os/cpu_pm.c | |
| parent | 9a5d73e03cd3312ddb571a748c40a63c58bd66e5 (diff) | |
| download | illumos-gate-0e7515250c8395f368aa45fb9acae7c4f8f8b786.tar.gz | |
PSARC 2008/777 cpupm keyword mode extensions
PSARC 2008/663 CPU Deep Idle Keyword
6567156 bring CPU power awareness to the dispatcher
6700904 deeper C-State support required on follow-ons to Intel Penryn processor generation microarchitecture
6805661 cmt_root may contain duplicates on UMA systems
--HG--
rename : usr/src/uts/i86pc/io/cpudrv/cpudrv_mach.c => usr/src/uts/i86pc/io/cpudrv_mach.c
rename : usr/src/uts/i86pc/io/cpudrv/cpu_acpi.c => usr/src/uts/i86pc/os/cpupm/cpu_acpi.c
rename : usr/src/uts/i86pc/io/cpudrv/cpudrv_amd.c => usr/src/uts/i86pc/os/cpupm/cpupm_amd.c
rename : usr/src/uts/i86pc/io/cpudrv/cpudrv_intel.c => usr/src/uts/i86pc/os/cpupm/cpupm_intel.c
rename : usr/src/uts/i86pc/os/cpupm.c => usr/src/uts/i86pc/os/cpupm/cpupm_mach.c
rename : usr/src/uts/i86pc/io/cpudrv/cpudrv_throttle.c => usr/src/uts/i86pc/os/cpupm/cpupm_throttle.c
rename : usr/src/uts/i86pc/io/cpudrv/pwrnow.c => usr/src/uts/i86pc/os/cpupm/pwrnow.c
rename : usr/src/uts/i86pc/io/cpudrv/speedstep.c => usr/src/uts/i86pc/os/cpupm/speedstep.c
rename : usr/src/uts/i86pc/sys/cpupm.h => usr/src/uts/i86pc/sys/cpupm_mach.h
rename : usr/src/uts/i86pc/sys/cpudrv_throttle.h => usr/src/uts/i86pc/sys/cpupm_throttle.h
Diffstat (limited to 'usr/src/uts/common/os/cpu_pm.c')
| -rw-r--r-- | usr/src/uts/common/os/cpu_pm.c | 840 |
1 files changed, 840 insertions, 0 deletions
diff --git a/usr/src/uts/common/os/cpu_pm.c b/usr/src/uts/common/os/cpu_pm.c new file mode 100644 index 0000000000..848907af1d --- /dev/null +++ b/usr/src/uts/common/os/cpu_pm.c @@ -0,0 +1,840 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/cpu_pm.h> +#include <sys/cmn_err.h> +#include <sys/sdt.h> + +/* + * Solaris Event Based CPU Power Manager + * + * This file implements platform independent event based CPU power management. + * When CPUs are configured into the system, the CMT scheduling subsystem will + * query the platform to determine if the CPU belongs to any power management + * domains. That is, sets of CPUs that share power management states. + * + * Active Power Management domains represent a group of CPUs across which the + * Operating System can request speed changes (which may in turn result + * in voltage changes). This allows the operating system to trade off + * performance for power savings. + * + * Idle Power Management domains can enter power savings states when they are + * unutilized. These states allow the Operating System to trade off power + * for performance (in the form of latency to transition from the idle state + * to an active one). + * + * For each active and idle power domain the CMT subsystem instantiates, a + * cpupm_domain_t structure is created. As the dispatcher schedules threads + * to run on the system's CPUs, it will also track the utilization of the + * enumerated power domains. Significant changes in utilization will result + * in the dispatcher sending the power manager events that relate to the + * utilization of the power domain. The power manager recieves the events, + * and in the context of the policy objectives in force, may decide to request + * the domain's power/performance state be changed. + * + * Under the "elastic" CPUPM policy, when the utilization rises, the CPU power + * manager will request the CPUs in the domain run at their fastest (and most + * power consuming) state. When the domain becomes idle (utilization at zero), + * the power manager will request that the CPUs run at a speed that saves the + * most power. + * + * The advantage of this scheme, is that the CPU power manager working with the + * dispatcher can be extremely responsive to changes in utilization. Optimizing + * for performance in the presence of utilization, and power savings in the + * presence of idleness. Such close collaboration with the dispatcher has other + * benefits that will play out in the form of more sophisticated power / + * performance policy in the near future. + * + * Avoiding state thrashing in the presence of transient periods of utilization + * and idleness while still being responsive to non-transient periods is key. + * The power manager implmeents several "governors" that are used to throttle + * state transitions when a significant amount of transient idle or transient + * work is detected. + * + * Kernel background activity (e.g. taskq threads) are by far the most common + * form of transient utilization. Ungoverned in the face of this utililzation, + * hundreds of state transitions per second would result on an idle system. + * + * Transient idleness is common when a thread briefly yields the CPU to + * wait for an event elsewhere in the system. Where the idle period is short + * enough, the overhead associated with making the state transition doesn't + * justify the power savings. + */ + +static cpupm_domain_t *cpupm_domains = NULL; + +/* + * Uninitialized state of CPU power management is disabled + */ +cpupm_policy_t cpupm_policy = CPUPM_POLICY_DISABLED; + +/* + * Periods of utilization lasting less than this time interval are characterized + * as transient. State changes associated with transient work are considered + * to be mispredicted. That is, it's not worth raising and lower power states + * where the utilization lasts for less than this interval. + */ +hrtime_t cpupm_tw_predict_interval; + +/* + * Periods of idleness lasting less than this time interval are characterized + * as transient. State changes associated with transient idle are considered + * to be mispredicted. That is, it's not worth lowering and raising power + * states where the idleness lasts for less than this interval. + */ +hrtime_t cpupm_ti_predict_interval; + +/* + * Number of mispredictions after which future transitions will be governed. + */ +int cpupm_mispredict_thresh = 2; + +/* + * Likewise, the number of mispredicted governed transitions after which the + * governor will be removed. + */ +int cpupm_mispredict_gov_thresh = 10; + +/* + * The transient work and transient idle prediction intervals are initialized + * to be some multiple of the amount of time it takes to transition a power + * domain from the highest to the lowest power state, and back again, which + * is measured. + * + * The default values of those multiples are specified here. Tuning them higher + * will result in the transient work, and transient idle governors being used + * more aggresively, which limits the frequency of state transitions at the + * expense of performance and power savings, respectively. + */ +#define CPUPM_TI_GOV_DEFAULT_MULTIPLE 600 +#define CPUPM_TW_GOV_DEFAULT_MULTIPLE 25 + +/* + * Number of high=>low=>high measurements performed, of which the average + * is taken. + */ +#define CPUPM_BENCHMARK_ITERS 5 + +int cpupm_ti_gov_multiple = CPUPM_TI_GOV_DEFAULT_MULTIPLE; +int cpupm_tw_gov_multiple = CPUPM_TW_GOV_DEFAULT_MULTIPLE; + + +static int cpupm_governor_initialize(void); +static void cpupm_state_change_global(cpupm_dtype_t, cpupm_state_name_t); + +cpupm_policy_t +cpupm_get_policy(void) +{ + return (cpupm_policy); +} + +int +cpupm_set_policy(cpupm_policy_t new_policy) +{ + static int gov_init = 0; + int result = 0; + + mutex_enter(&cpu_lock); + if (new_policy == cpupm_policy) { + mutex_exit(&cpu_lock); + return (result); + } + + /* + * Pausing CPUs causes a high priority thread to be scheduled + * on all other CPUs (besides the current one). This locks out + * other CPUs from making CPUPM state transitions. + */ + switch (new_policy) { + case CPUPM_POLICY_DISABLED: + pause_cpus(NULL); + cpupm_policy = CPUPM_POLICY_DISABLED; + start_cpus(); + + result = cmt_pad_disable(PGHW_POW_ACTIVE); + + /* + * Once PAD has been enabled, it should always be possible + * to disable it. + */ + ASSERT(result == 0); + + /* + * Bring all the active power domains to the maximum + * performance state. + */ + cpupm_state_change_global(CPUPM_DTYPE_ACTIVE, + CPUPM_STATE_MAX_PERF); + + break; + case CPUPM_POLICY_ELASTIC: + + result = cmt_pad_enable(PGHW_POW_ACTIVE); + if (result < 0) { + /* + * Failed to enable PAD across the active power + * domains, which may well be because none were + * enumerated. + */ + break; + } + + pause_cpus(NULL); + /* + * Attempt to initialize the governor parameters the first + * time through. + */ + if (gov_init == 0) { + result = cpupm_governor_initialize(); + if (result == 0) { + gov_init = 1; + } else { + /* + * Failed to initialize the governor parameters + */ + start_cpus(); + break; + } + } + cpupm_policy = CPUPM_POLICY_ELASTIC; + start_cpus(); + + break; + default: + cmn_err(CE_WARN, "Attempt to set unknown CPUPM policy %d\n", + new_policy); + ASSERT(0); + break; + } + mutex_exit(&cpu_lock); + + return (result); +} + +/* + * Look for an existing power domain + */ +static cpupm_domain_t * +cpupm_domain_find(id_t id, cpupm_dtype_t type) +{ + ASSERT(MUTEX_HELD(&cpu_lock)); + + cpupm_domain_t *dom; + + dom = cpupm_domains; + while (dom != NULL) { + if (id == dom->cpd_id && type == dom->cpd_type) + return (dom); + dom = dom->cpd_next; + } + return (NULL); +} + +/* + * Create a new domain + */ +static cpupm_domain_t * +cpupm_domain_create(id_t id, cpupm_dtype_t type) +{ + cpupm_domain_t *dom; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + dom = kmem_zalloc(sizeof (cpupm_domain_t), KM_SLEEP); + dom->cpd_id = id; + dom->cpd_type = type; + + /* Link into the known domain list */ + dom->cpd_next = cpupm_domains; + cpupm_domains = dom; + + return (dom); +} + +static void +cpupm_domain_state_enum(struct cpu *cp, cpupm_domain_t *dom) +{ + /* + * In the envent we're enumerating because the domain's state + * configuration has changed, toss any existing states. + */ + if (dom->cpd_nstates > 0) { + kmem_free(dom->cpd_states, + sizeof (cpupm_state_t) * dom->cpd_nstates); + dom->cpd_nstates = 0; + } + + /* + * Query to determine the number of states, allocate storage + * large enough to hold the state information, and pass it back + * to the platform driver to complete the enumeration. + */ + dom->cpd_nstates = cpupm_plat_state_enumerate(cp, dom->cpd_type, NULL); + + if (dom->cpd_nstates == 0) + return; + + dom->cpd_states = + kmem_zalloc(dom->cpd_nstates * sizeof (cpupm_state_t), KM_SLEEP); + (void) cpupm_plat_state_enumerate(cp, dom->cpd_type, dom->cpd_states); +} + +/* + * Initialize the specified type of power domain on behalf of the CPU + */ +cpupm_domain_t * +cpupm_domain_init(struct cpu *cp, cpupm_dtype_t type) +{ + cpupm_domain_t *dom; + id_t did; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + /* + * Instantiate the domain if it doesn't already exist + * and enumerate its power states. + */ + did = cpupm_domain_id(cp, type); + dom = cpupm_domain_find(did, type); + if (dom == NULL) { + dom = cpupm_domain_create(did, type); + cpupm_domain_state_enum(cp, dom); + } + + /* + * Named state initialization + */ + if (type == CPUPM_DTYPE_ACTIVE) { + /* + * For active power domains, the highest performance + * state is defined as first state returned from + * the domain enumeration. + */ + dom->cpd_named_states[CPUPM_STATE_MAX_PERF] = + &dom->cpd_states[0]; + dom->cpd_named_states[CPUPM_STATE_LOW_POWER] = + &dom->cpd_states[dom->cpd_nstates - 1]; + + /* + * Begin by assuming CPU is running at the max perf state. + */ + dom->cpd_state = dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; + } + + return (dom); +} + +/* + * Return the id associated with the given type of domain + * to which cp belongs + */ +id_t +cpupm_domain_id(struct cpu *cp, cpupm_dtype_t type) +{ + return (cpupm_plat_domain_id(cp, type)); +} + +/* + * Initiate a state change for the specified domain on behalf of cp + */ +int +cpupm_change_state(struct cpu *cp, cpupm_domain_t *dom, cpupm_state_t *state) +{ + if (cpupm_plat_change_state(cp, state) < 0) + return (-1); + + DTRACE_PROBE2(cpupm__change__state, + cpupm_domain_t *, dom, + cpupm_state_t *, state); + + dom->cpd_state = state; + return (0); +} + +/* + * Interface into the CPU power manager to indicate a significant change + * in utilization of the specified active power domain + */ +void +cpupm_utilization_event(struct cpu *cp, hrtime_t now, cpupm_domain_t *dom, + cpupm_util_event_t event) +{ + cpupm_state_t *new_state = NULL; + hrtime_t last; + + if (cpupm_policy == CPUPM_POLICY_DISABLED) { + return; + } + + /* + * What follows is a simple elastic power state management policy. + * + * If the utilization has become non-zero, and the domain was + * previously at it's lowest power state, then transition it + * to the highest state in the spirit of "race to idle". + * + * If the utilization has dropped to zero, then transition the + * domain to its lowest power state. + * + * Statistics are maintained to implement governors to reduce state + * transitions resulting from either transient work, or periods of + * transient idleness on the domain. + */ + switch (event) { + case CPUPM_DOM_REMAIN_BUSY: + + /* + * We've received an event that the domain is running a thread + * that's made it to the end of it's time slice. If we are at + * low power, then raise it. If the transient work governor + * is engaged, then remove it. + */ + if (dom->cpd_state == + dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) { + new_state = + dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; + if (dom->cpd_tw_governed == B_TRUE) { + dom->cpd_tw_governed = B_FALSE; + dom->cpd_tw = 0; + } + } + break; + + case CPUPM_DOM_BUSY_FROM_IDLE: + last = dom->cpd_last_lower; + dom->cpd_last_raise = now; + + DTRACE_PROBE3(cpupm__raise__req, + cpupm_domain_t *, dom, + hrtime_t, last, + hrtime_t, now); + + if (dom->cpd_state == + dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) { + + /* + * There's non-zero utilization, and the domain is + * running in the lower power state. Before we + * consider raising power, perform some book keeping + * for the transient idle governor. + */ + if (dom->cpd_ti_governed == B_FALSE) { + if ((now - last) < cpupm_ti_predict_interval) { + /* + * We're raising the domain power and + * we *just* lowered it. Consider + * this a mispredicted power state + * transition due to a transient + * idle period. + */ + if (++dom->cpd_ti >= + cpupm_mispredict_thresh) { + /* + * There's enough transient + * idle transitions to + * justify governing future + * lowering requests. + */ + dom->cpd_ti_governed = B_TRUE; + dom->cpd_ti = 0; + DTRACE_PROBE1( + cpupm__ti__governed, + cpupm_domain_t *, dom); + } + } else { + /* + * We correctly predicted the last + * lowering. + */ + dom->cpd_ti = 0; + } + } + if (dom->cpd_tw_governed == B_TRUE) { + /* + * Raise requests are governed due to + * transient work. + */ + DTRACE_PROBE1(cpupm__raise__governed, + cpupm_domain_t *, dom); + + /* + * It's likely that we'll be governed for a + * while. If the transient idle governor is + * also in place, examine the preceeding idle + * interval to see if that still makes sense. + */ + if (dom->cpd_ti_governed == B_TRUE && + ((now - last) >= + cpupm_ti_predict_interval)) { + if (++dom->cpd_ti >= + cpupm_mispredict_gov_thresh) { + dom->cpd_ti_governed = + B_FALSE; + dom->cpd_ti = 0; + } + } + return; + } + /* + * Prepare to transition to the higher power state + */ + new_state = dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; + + } else if (dom->cpd_state == + dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) { + + /* + * Utilization is non-zero, and we're already running + * in the higher power state. Take this opportunity to + * perform some book keeping if the last lowering + * request was governed. + */ + if (dom->cpd_ti_governed == B_TRUE) { + if ((now - last) >= cpupm_ti_predict_interval) { + /* + * The domain is transient idle + * governed, and we mispredicted + * governing the last lowering request. + */ + if (++dom->cpd_ti >= + cpupm_mispredict_gov_thresh) { + /* + * There's enough non-transient + * idle periods to justify + * removing the governor. + */ + dom->cpd_ti_governed = B_FALSE; + dom->cpd_ti = 0; + DTRACE_PROBE1( + cpupm__ti__ungoverned, + cpupm_domain_t *, dom); + } + } else { + /* + * Correctly predicted governing the + * last lowering request. + */ + dom->cpd_ti = 0; + } + } + } + break; + + case CPUPM_DOM_IDLE_FROM_BUSY: + last = dom->cpd_last_raise; + dom->cpd_last_lower = now; + + DTRACE_PROBE3(cpupm__lower__req, + cpupm_domain_t *, dom, + hrtime_t, last, + hrtime_t, now); + + if (dom->cpd_state == + dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) { + + /* + * The domain is idle, and is running in the highest + * performance state. Before we consider lowering power, + * perform some book keeping for the transient work + * governor. + */ + if (dom->cpd_tw_governed == B_FALSE) { + if ((now - last) < cpupm_tw_predict_interval) { + /* + * We're lowering the domain power and + * we *just* raised it. Consider the + * last raise mispredicted due to + * transient work. + */ + if (++dom->cpd_tw >= + cpupm_mispredict_thresh) { + /* + * There's enough transient idle + * transitions to justify + * governing future lowering + * requests. + */ + dom->cpd_tw_governed = B_TRUE; + dom->cpd_tw = 0; + DTRACE_PROBE1( + cpupm__tw__governed, + cpupm_domain_t *, dom); + } + } else { + /* + * We correctly predicted during the + * last raise. + */ + dom->cpd_tw = 0; + } + } + if (dom->cpd_ti_governed == B_TRUE) { + /* + * Lowering requests are governed due to + * transient idleness. + */ + DTRACE_PROBE1(cpupm__lowering__governed, + cpupm_domain_t *, dom); + + /* + * It's likely that we'll be governed for a + * while. If the transient work governor is + * also in place, examine the preceeding busy + * interval to see if that still makes sense. + */ + if (dom->cpd_tw_governed == B_TRUE && + ((now - last) >= + cpupm_tw_predict_interval)) { + if (++dom->cpd_tw >= + cpupm_mispredict_gov_thresh) { + dom->cpd_tw_governed = + B_FALSE; + dom->cpd_tw = 0; + } + } + return; + } + + /* + * Prepare to transition to a lower power state. + */ + new_state = + dom->cpd_named_states[CPUPM_STATE_LOW_POWER]; + + } else if (dom->cpd_state == + dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) { + + /* + * The domain is idle, and we're already running in + * the lower power state. Take this opportunity to + * perform some book keeping if the last raising + * request was governed. + */ + if (dom->cpd_tw_governed == B_TRUE) { + if ((now - last) >= cpupm_tw_predict_interval) { + /* + * The domain is transient work + * governed, and we mispredicted + * governing the last raising request. + */ + if (++dom->cpd_tw >= + cpupm_mispredict_gov_thresh) { + /* + * There's enough non-transient + * work to justify removing + * the governor. + */ + dom->cpd_tw_governed = B_FALSE; + dom->cpd_tw = 0; + DTRACE_PROBE1( + cpupm__tw__ungoverned, + cpupm_domain_t *, dom); + } + } else { + /* + * We correctly predicted governing + * the last raise. + */ + dom->cpd_tw = 0; + } + } + } + break; + } + /* + * Change the power state + * Not much currently done if this doesn't succeed + */ + if (new_state) + (void) cpupm_change_state(cp, dom, new_state); +} + + +/* + * Interface called by platforms to dynamically change the + * MAX performance cpupm state + */ +void +cpupm_redefine_max_activepwr_state(struct cpu *cp, int max_perf_level) +{ + cpupm_domain_t *dom; + id_t did; + cpupm_dtype_t type = CPUPM_DTYPE_ACTIVE; + boolean_t change_state = B_FALSE; + cpupm_state_t *new_state = NULL; + + did = cpupm_domain_id(cp, type); + mutex_enter(&cpu_lock); + dom = cpupm_domain_find(did, type); + mutex_exit(&cpu_lock); + + /* + * Can use a lock to avoid changing the power state of the cpu when + * CPUPM_STATE_MAX_PERF is getting changed. + * Since the occurance of events to change MAX_PERF is not frequent, + * it may not be a good idea to overburden with locks. In the worst + * case, for one cycle the power may not get changed to the required + * level + */ + if (dom != NULL) { + if (dom->cpd_state == + dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) { + change_state = B_TRUE; + } + + /* + * If an out of range level is passed, use the lowest supported + * speed. + */ + if (max_perf_level >= dom->cpd_nstates && + dom->cpd_nstates > 1) { + max_perf_level = dom->cpd_nstates - 1; + } + + dom->cpd_named_states[CPUPM_STATE_MAX_PERF] = + &dom->cpd_states[max_perf_level]; + + /* + * If the current state is MAX_PERF, change the current state + * to the new MAX_PERF + */ + if (change_state) { + new_state = + dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; + if (new_state) { + (void) cpupm_change_state(cp, dom, new_state); + } + } + } +} + +/* + * Benchmark some power state transitions and use the transition latencies as + * a basis for initializing parameters for the transient idle and transient + * work governors. + * + * Returns 0 on success or -1 if the governor parameters could not be + * initialized. + */ +static int +cpupm_governor_initialize(void) +{ + cpu_t *cp = CPU; + cpupm_domain_t *dom; + cpupm_state_t *low, *high; + id_t did; + hrtime_t start, delta, deltas = 0; + int iterations; + + did = cpupm_domain_id(cp, CPUPM_DTYPE_ACTIVE); + if (did == CPUPM_NO_DOMAIN) + return (-1); + + dom = cpupm_domain_find(did, CPUPM_DTYPE_ACTIVE); + if (dom == NULL) + return (-1); + + low = dom->cpd_named_states[CPUPM_STATE_LOW_POWER]; + high = dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; + + for (iterations = 0; iterations < CPUPM_BENCHMARK_ITERS; iterations++) { + + /* + * Measure the amount of time it takes to transition the + * domain down to the lowest, and back to the highest power + * state. + */ + start = gethrtime_unscaled(); + (void) cpupm_change_state(cp, dom, low); + (void) cpupm_change_state(cp, dom, high); + delta = gethrtime_unscaled() - start; + + DTRACE_PROBE1(cpupm__benchmark__latency, + hrtime_t, delta); + + deltas += delta; + } + + /* + * Figure the average latency, and tune the transient work and + * transient idle prediction intervals accordingly. + */ + delta = deltas / iterations; + + cpupm_ti_predict_interval = delta * cpupm_ti_gov_multiple; + cpupm_tw_predict_interval = delta * cpupm_tw_gov_multiple; + + return (0); +} + +/* + * Initiate a state change in all CPUPM domain instances of the specified type + */ +static void +cpupm_state_change_global(cpupm_dtype_t type, cpupm_state_name_t state) +{ + cpu_t *cp; + pg_cmt_t *pwr_pg; + cpupm_domain_t *dom; + group_t *hwset; + group_iter_t giter; + pg_cpu_itr_t cpu_iter; + pghw_type_t hw; + + ASSERT(MUTEX_HELD(&cpu_lock)); + + switch (type) { + case CPUPM_DTYPE_ACTIVE: + hw = PGHW_POW_ACTIVE; + break; + default: + /* + * Power domain types other than "active" unsupported. + */ + ASSERT(type == CPUPM_DTYPE_ACTIVE); + return; + } + + if ((hwset = pghw_set_lookup(hw)) == NULL) + return; + + /* + * Iterate over the power domains + */ + group_iter_init(&giter); + while ((pwr_pg = group_iterate(hwset, &giter)) != NULL) { + + dom = (cpupm_domain_t *)pwr_pg->cmt_pg.pghw_handle; + + /* + * Iterate over the CPUs in each domain + */ + PG_CPU_ITR_INIT(pwr_pg, cpu_iter); + while ((cp = pg_cpu_next(&cpu_iter)) != NULL) { + (void) cpupm_change_state(cp, dom, + dom->cpd_named_states[state]); + } + } +} |
