diff options
Diffstat (limited to 'usr/src/uts/i86pc/io/cpudrv/cpudrv_mach.c')
-rw-r--r-- | usr/src/uts/i86pc/io/cpudrv/cpudrv_mach.c | 506 |
1 files changed, 506 insertions, 0 deletions
diff --git a/usr/src/uts/i86pc/io/cpudrv/cpudrv_mach.c b/usr/src/uts/i86pc/io/cpudrv/cpudrv_mach.c new file mode 100644 index 0000000000..4380f3cd10 --- /dev/null +++ b/usr/src/uts/i86pc/io/cpudrv/cpudrv_mach.c @@ -0,0 +1,506 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * CPU power management driver support for i86pc. + */ + +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/cpupm.h> +#include <sys/cpudrv_mach.h> +#include <sys/machsystm.h> + +/* + * Constants used by the Processor Device Notification handler + * that identify what kind of change has occurred. We currently + * only handle PPC_CHANGE_NOTIFICATION. The other two are + * ignored. + */ +#define PPC_CHANGE_NOTIFICATION 0x80 +#define CST_CHANGE_NOTIFICATION 0x81 +#define TPC_CHANGE_NOTIFICATION 0x82 + +/* + * Note that our driver numbers the power levels from lowest to + * highest starting at 1 (i.e., the lowest power level is 1 and + * the highest power level is cpupm->num_spd). The x86 modules get + * their power levels from ACPI which numbers power levels from + * highest to lowest starting at 0 (i.e., the lowest power level + * is (cpupm->num_spd - 1) and the highest power level is 0). So to + * map one of our driver power levels to one understood by ACPI we + * simply subtract our driver power level from cpupm->num_spd. Likewise, + * to map an ACPI power level to the proper driver power level, we + * subtract the ACPI power level from cpupm->num_spd. + */ +#define PM_2_PLAT_LEVEL(cpupm, pm_level) (cpupm->num_spd - pm_level) +#define PLAT_2_PM_LEVEL(cpupm, plat_level) (cpupm->num_spd - plat_level) + +extern boolean_t cpudrv_intel_init(cpudrv_devstate_t *); +extern boolean_t cpudrv_amd_init(cpudrv_devstate_t *); + +typedef struct cpudrv_mach_vendor { + boolean_t (*cpuv_init)(cpudrv_devstate_t *); +} cpudrv_mach_vendor_t; + +/* + * Table of supported vendors. + */ +static cpudrv_mach_vendor_t cpudrv_vendors[] = { + cpudrv_intel_init, + cpudrv_amd_init, + NULL +}; + +uint_t +cpudrv_pm_get_speeds(cpudrv_devstate_t *cpudsp, int **speeds) +{ + cpudrv_mach_state_t *mach_state = cpudsp->mach_state; + return (cpu_acpi_get_speeds(mach_state->acpi_handle, speeds)); +} + +void +cpudrv_pm_free_speeds(int *speeds, uint_t nspeeds) +{ + cpu_acpi_free_speeds(speeds, nspeeds); +} + +/* + * Change CPU speed using interface provided by module. + */ +int +cpudrv_pm_change_speed(cpudrv_devstate_t *cpudsp, cpudrv_pm_spd_t *new_spd) +{ + cpudrv_mach_state_t *mach_state = cpudsp->mach_state; + cpudrv_pm_t *cpupm; + uint32_t plat_level; + int ret; + + if (!(mach_state->caps & CPUDRV_P_STATES)) + return (DDI_FAILURE); + ASSERT(mach_state->cpupm_pstate_ops != NULL); + cpupm = &(cpudsp->cpudrv_pm); + plat_level = PM_2_PLAT_LEVEL(cpupm, new_spd->pm_level); + ret = mach_state->cpupm_pstate_ops->cpups_power(cpudsp, plat_level); + if (ret != 0) + return (DDI_FAILURE); + return (DDI_SUCCESS); +} + +/* + * Determine the cpu_id for the CPU device. + */ +boolean_t +cpudrv_pm_get_cpu_id(dev_info_t *dip, processorid_t *cpu_id) +{ + return ((*cpu_id = ddi_prop_get_int(DDI_DEV_T_ANY, dip, + DDI_PROP_DONTPASS, "reg", -1)) != -1); + +} + +/* + * All CPU instances have been initialized successfully. + */ +boolean_t +cpudrv_pm_power_ready(void) +{ + return (cpupm_is_enabled(CPUPM_P_STATES) && cpupm_is_ready()); +} + +/* + * All CPU instances have been initialized successfully. + */ +boolean_t +cpudrv_pm_throttle_ready(void) +{ + return (cpupm_is_enabled(CPUPM_T_STATES) && cpupm_is_ready()); +} + +/* + * Is the current thread the thread that is handling the + * PPC change notification? + */ +boolean_t +cpudrv_pm_is_governor_thread(cpudrv_pm_t *cpupm) +{ + return (curthread == cpupm->pm_governor_thread); +} + +/* + * Initialize the machine. + * See if a module exists for managing power for this CPU. + */ +boolean_t +cpudrv_mach_pm_init(cpudrv_devstate_t *cpudsp) +{ + cpudrv_mach_vendor_t *vendors; + cpudrv_mach_state_t *mach_state; + int ret; + + mach_state = cpudsp->mach_state = + kmem_zalloc(sizeof (cpudrv_mach_state_t), KM_SLEEP); + mach_state->caps = CPUDRV_NO_STATES; + + mach_state->acpi_handle = cpu_acpi_init(cpudsp->dip); + if (mach_state->acpi_handle == NULL) { + cpudrv_mach_pm_free(cpudsp); + cmn_err(CE_WARN, "!cpudrv_mach_pm_init: instance %d: " + "unable to get ACPI handle", + ddi_get_instance(cpudsp->dip)); + cmn_err(CE_NOTE, "!CPU power management will not function."); + return (B_FALSE); + } + + /* + * Loop through the CPU management module table and see if + * any of the modules implement CPU power management + * for this CPU. + */ + for (vendors = cpudrv_vendors; vendors->cpuv_init != NULL; vendors++) { + if (vendors->cpuv_init(cpudsp)) + break; + } + + /* + * Nope, we can't power manage this CPU. + */ + if (vendors == NULL) { + cpudrv_mach_pm_free(cpudsp); + return (B_FALSE); + } + + /* + * If P-state support exists for this system, then initialize it. + */ + if (mach_state->cpupm_pstate_ops != NULL) { + ret = mach_state->cpupm_pstate_ops->cpups_init(cpudsp); + if (ret != 0) { + cmn_err(CE_WARN, "!cpudrv_mach_pm_init: instance %d:" + " unable to initialize P-state support", + ddi_get_instance(cpudsp->dip)); + mach_state->cpupm_pstate_ops = NULL; + cpupm_disable(CPUPM_P_STATES); + } else { + mach_state->caps |= CPUDRV_P_STATES; + } + } + + if (mach_state->cpupm_tstate_ops != NULL) { + ret = mach_state->cpupm_tstate_ops->cputs_init(cpudsp); + if (ret != 0) { + cmn_err(CE_WARN, "!cpudrv_mach_pm_init: instance %d:" + " unable to initialize T-state support", + ddi_get_instance(cpudsp->dip)); + mach_state->cpupm_tstate_ops = NULL; + cpupm_disable(CPUPM_T_STATES); + } else { + mach_state->caps |= CPUDRV_T_STATES; + } + } + + if (mach_state->caps == CPUDRV_NO_STATES) { + cpudrv_mach_pm_free(cpudsp); + return (B_FALSE); + } + + return (B_TRUE); +} + +/* + * Free any resources allocated by cpudrv_mach_pm_init(). + */ +void +cpudrv_mach_pm_free(cpudrv_devstate_t *cpudsp) +{ + cpudrv_mach_state_t *mach_state = cpudsp->mach_state; + + if (mach_state == NULL) + return; + if (mach_state->cpupm_pstate_ops != NULL) { + mach_state->cpupm_pstate_ops->cpups_fini(cpudsp); + mach_state->cpupm_pstate_ops = NULL; + } + + if (mach_state->cpupm_tstate_ops != NULL) { + mach_state->cpupm_tstate_ops->cputs_fini(cpudsp); + mach_state->cpupm_tstate_ops = NULL; + } + + if (mach_state->acpi_handle != NULL) { + cpu_acpi_fini(mach_state->acpi_handle); + mach_state->acpi_handle = NULL; + } + + kmem_free(mach_state, sizeof (cpudrv_mach_state_t)); + cpudsp->mach_state = NULL; +} + +/* + * This routine changes the top speed to which the CPUs can transition by: + * + * - Resetting the up_spd for all speeds lower than the new top speed + * to point to the new top speed. + * - Updating the framework with a new "normal" (maximum power) for this + * device. + */ +void +cpudrv_pm_set_topspeed(void *ctx, int plat_level) +{ + cpudrv_devstate_t *cpudsp; + cpudrv_pm_t *cpupm; + cpudrv_pm_spd_t *spd; + cpudrv_pm_spd_t *top_spd; + dev_info_t *dip; + int pm_level; + int instance; + int i; + + dip = ctx; + instance = ddi_get_instance(dip); + cpudsp = ddi_get_soft_state(cpudrv_state, instance); + ASSERT(cpudsp != NULL); + + mutex_enter(&cpudsp->lock); + cpupm = &(cpudsp->cpudrv_pm); + pm_level = PLAT_2_PM_LEVEL(cpupm, plat_level); + for (i = 0, spd = cpupm->head_spd; spd; i++, spd = spd->down_spd) { + /* + * Don't mess with speeds that are higher than the new + * top speed. They should be out of range anyway. + */ + if (spd->pm_level > pm_level) + continue; + /* + * This is the new top speed. + */ + if (spd->pm_level == pm_level) + top_spd = spd; + + spd->up_spd = top_spd; + } + cpupm->targ_spd = top_spd; + + cpupm->pm_governor_thread = curthread; + + mutex_exit(&cpudsp->lock); + + (void) pm_update_maxpower(dip, 0, top_spd->pm_level); +} + +/* + * This routine reads the ACPI _PPC object. It's accessed as a callback + * by the ppm driver whenever a _PPC change notification is received. + */ +int +cpudrv_pm_get_topspeed(void *ctx) +{ + cpudrv_mach_state_t *mach_state; + cpu_acpi_handle_t handle; + cpudrv_devstate_t *cpudsp; + dev_info_t *dip; + int instance; + int plat_level; + + dip = ctx; + instance = ddi_get_instance(dip); + cpudsp = ddi_get_soft_state(cpudrv_state, instance); + ASSERT(cpudsp != NULL); + mach_state = cpudsp->mach_state; + handle = mach_state->acpi_handle; + + cpu_acpi_cache_ppc(handle); + plat_level = CPU_ACPI_PPC(handle); + return (plat_level); +} + +/* + * This routine reads the ACPI _TPC object. It's accessed as a callback + * by the cpu driver whenever a _TPC change notification is received. + */ +int +cpudrv_pm_get_topthrottle(cpudrv_devstate_t *cpudsp) +{ + cpudrv_mach_state_t *mach_state; + cpu_acpi_handle_t handle; + int throtl_level; + + mach_state = cpudsp->mach_state; + handle = mach_state->acpi_handle; + + cpu_acpi_cache_tpc(handle); + throtl_level = CPU_ACPI_TPC(handle); + return (throtl_level); +} + +/* + * Take care of CPU throttling when _TPC notification arrives + */ +void +cpudrv_pm_throttle_instance(cpudrv_devstate_t *cpudsp) +{ + cpudrv_mach_state_t *mach_state; + uint32_t new_level; + int ret; + + ASSERT(cpudsp != NULL); + mach_state = cpudsp->mach_state; + if (!(mach_state->caps & CPUDRV_T_STATES)) + return; + ASSERT(mach_state->cpupm_tstate_ops != NULL); + + /* + * Get the new T-State support level + */ + new_level = cpudrv_pm_get_topthrottle(cpudsp); + + /* + * Change the cpu throttling to the new level + */ + ret = mach_state->cpupm_tstate_ops->cputs_throttle(cpudsp, new_level); + if (ret != 0) { + cmn_err(CE_WARN, "Cannot change the cpu throttling to the new" + " level: %d, Instance: %d", new_level, cpudsp->cpu_id); + } +} + +/* + * Take care of CPU throttling when _TPC notification arrives + */ +void +cpudrv_pm_manage_throttling(void *ctx) +{ + cpudrv_devstate_t *cpudsp; + cpudrv_mach_state_t *mach_state; + cpudrv_tstate_domain_t *domain; + cpudrv_tstate_domain_node_t *domain_node; + int instance; + boolean_t is_ready; + + instance = ddi_get_instance((dev_info_t *)ctx); + cpudsp = ddi_get_soft_state(cpudrv_state, instance); + ASSERT(cpudsp != NULL); + + /* + * We currently refuse to power manage if the CPU is not ready to + * take cross calls (cross calls fail silently if CPU is not ready + * for it). + * + * Additionally, for x86 platforms we cannot power manage + * any one instance, until all instances have been initialized. + * That's because we don't know what the CPU domains look like + * until all instances have been initialized. + */ + is_ready = CPUDRV_PM_XCALL_IS_READY(cpudsp->cpu_id); + if (!is_ready) { + DPRINTF(D_POWER, ("cpudrv_power: instance %d: " + "CPU not ready for x-calls\n", instance)); + } else if (!(is_ready = cpudrv_pm_throttle_ready())) { + DPRINTF(D_POWER, ("cpudrv_power: instance %d: " + "waiting for all CPUs to be ready\n", instance)); + } + if (!is_ready) { + return; + } + + mach_state = cpudsp->mach_state; + domain_node = mach_state->tstate_domain_node; + domain = domain_node->tdn_domain; + + switch (domain->td_type) { + case CPU_ACPI_SW_ANY: + /* + * Just throttle the current instance and all other instances + * under the same domain will get throttled to the same level + */ + cpudrv_pm_throttle_instance(cpudsp); + break; + case CPU_ACPI_HW_ALL: + case CPU_ACPI_SW_ALL: + /* + * Along with the current instance, throttle all the CPU's that + * belong to the same domain + */ + mutex_enter(&domain->td_lock); + for (domain_node = domain->td_node; domain_node != NULL; + domain_node = domain_node->tdn_next) + cpudrv_pm_throttle_instance(domain_node->tdn_cpudsp); + mutex_exit(&domain->td_lock); + break; + + default: + cmn_err(CE_WARN, "Not a valid coordination type (%x) to" + " throttle cpu", domain->td_domain); + break; + } +} + +/* + * This notification handler is called whenever the ACPI _PPC + * object changes. The _PPC is a sort of governor on power levels. + * It sets an upper threshold on which, _PSS defined, power levels + * are usuable. The _PPC value is dynamic and may change as properties + * (i.e., thermal or AC source) of the system change. + */ +/* ARGSUSED */ +static void +cpudrv_pm_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) +{ + /* + * We only handle _PPC change notifications. + */ + if (val == PPC_CHANGE_NOTIFICATION) + cpudrv_pm_redefine_topspeed(ctx); + else if (val == TPC_CHANGE_NOTIFICATION) { + cpudrv_pm_manage_throttling(ctx); + } +} + +void +cpudrv_pm_install_notify_handler(cpudrv_devstate_t *cpudsp, dev_info_t *dip) +{ + cpudrv_mach_state_t *mach_state = cpudsp->mach_state; + cpu_acpi_install_notify_handler(mach_state->acpi_handle, + cpudrv_pm_notify_handler, dip); +} + +void +cpudrv_pm_redefine_topspeed(void *ctx) +{ + /* + * This should never happen, unless ppm does not get loaded. + */ + if (cpupm_redefine_topspeed == NULL) { + cmn_err(CE_WARN, "cpudrv_pm_redefine_topspeed: " + "cpupm_redefine_topspeed has not been initialized - " + "ignoring notification"); + return; + } + + /* + * ppm callback needs to handle redefinition for all CPUs in + * the domain. + */ + (*cpupm_redefine_topspeed)(ctx); +} |