diff options
author | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
---|---|---|
committer | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
commit | 7c478bd95313f5f23a4c958a745db2134aa03244 (patch) | |
tree | c871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/uts/common/os/sunpm.c | |
download | illumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz |
OpenSolaris Launch
Diffstat (limited to 'usr/src/uts/common/os/sunpm.c')
-rw-r--r-- | usr/src/uts/common/os/sunpm.c | 9096 |
1 files changed, 9096 insertions, 0 deletions
diff --git a/usr/src/uts/common/os/sunpm.c b/usr/src/uts/common/os/sunpm.c new file mode 100644 index 0000000000..d209375e29 --- /dev/null +++ b/usr/src/uts/common/os/sunpm.c @@ -0,0 +1,9096 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * sunpm.c builds sunpm.o "power management framework" + * kernel-resident power management code. Implements power management + * policy + * Assumes: all backwards compat. device components wake up on & + * the pm_info pointer in dev_info is initially NULL + * + * PM - (device) Power Management + * + * Each device may have 0 or more components. If a device has no components, + * then it can't be power managed. Each component has 2 or more + * power states. + * + * "Backwards Compatible" (bc) devices: + * There are two different types of devices from the point of view of this + * code. The original type, left over from the original PM implementation on + * the voyager platform are known in this code as "backwards compatible" + * devices (PM_ISBC(dip) returns true). + * They are recognized by the pm code by the lack of a pm-components property + * and a call made by the driver to pm_create_components(9F). + * For these devices, component 0 is special, and represents the power state + * of the device. If component 0 is to be set to power level 0 (off), then + * the framework must first call into the driver's detach(9E) routine with + * DDI_PM_SUSPEND, to get the driver to save the hardware state of the device. + * After setting component 0 from 0 to a non-zero power level, a call must be + * made into the driver's attach(9E) routine with DDI_PM_RESUME. + * + * Currently, the only way to get a bc device power managed is via a set of + * ioctls (PM_DIRECT_PM, PM_SET_CURRENT_POWER) issued to /dev/pm. + * + * For non-bc devices, the driver describes the components by exporting a + * pm-components(9P) property that tells how many components there are, + * tells what each component's power state values are, and provides human + * readable strings (currently unused) for each component name and power state. + * Devices which export pm-components(9P) are automatically power managed + * whenever autopm is enabled (via PM_START_PM ioctl issued by pmconfig(1M) + * after parsing power.conf(4)). + * For these devices, all components are considered independent of each other, + * and it is up to the driver to decide when a transition requires saving or + * restoring hardware state. + * + * Each device component also has a threshold time associated with each power + * transition (see power.conf(4)), and a busy/idle state maintained by the + * driver calling pm_idle_component(9F) and pm_busy_component(9F). + * Components are created idle. + * + * The PM framework provides several functions: + * -implement PM policy as described in power.conf(4) + * Policy is set by pmconfig(1M) issuing pm ioctls based on power.conf(4). + * Policies consist of: + * -set threshold values (defaults if none provided by pmconfig) + * -set dependencies among devices + * -enable/disable autopm + * -turn down idle components based on thresholds (if autopm is enabled) + * (aka scanning) + * -maintain power states based on dependencies among devices + * -upon request, or when the frame buffer powers off, attempt to turn off + * all components that are idle or become idle over the next (10 sec) + * period in an attempt to get down to an EnergyStar compliant state + * -prevent powering off of a device which exported the + * pm-no-involuntary-power-cycles property without active involvement of + * the device's driver (so no removing power when the device driver is + * not attached) + * -provide a mechanism for a device driver to request that a device's component + * be brought back to the power level necessary for the use of the device + * -allow a process to directly control the power levels of device components + * (via ioctls issued to /dev/pm--see usr/src/uts/common/io/pm.c) + * -ensure that the console frame buffer is powered up before being referenced + * via prom_printf() or other prom calls that might generate console output + * -maintain implicit dependencies (e.g. parent must be powered up if child is) + * -provide "backwards compatible" behavior for devices without pm-components + * property + * + * Scanning: + * Whenever autopm is enabled, the framework attempts to bring each component + * of each device to its lowest power based on the threshold of idleness + * associated with each transition and the busy/idle state of the component. + * + * The actual work of this is done by pm_scan_dev(), which cycles through each + * component of a device, checking its idleness against its current threshold, + * and calling pm_set_power() as appropriate to change the power level. + * This function also indicates when it would next be profitable to scan the + * device again, and a new scan is scheduled after that time. + * + * Dependencies: + * It is possible to establish a dependency between the power states of two + * otherwise unrelated devices. This is currently done to ensure that the + * cdrom is always up whenever the console framebuffer is up, so that the user + * can insert a cdrom and see a popup as a result. + * + * The dependency terminology used in power.conf(4) is not easy to understand, + * so we've adopted a different terminology in the implementation. We write + * of a "keeps up" and a "kept up" device. A relationship can be established + * where one device keeps up another. That means that if the keepsup device + * has any component that is at a non-zero power level, all components of the + * "kept up" device must be brought to full power. This relationship is + * asynchronous. When the keeping device is powered up, a request is queued + * to a worker thread to bring up the kept device. The caller does not wait. + * Scan will not turn down a kept up device. + * + * Direct PM: + * A device may be directly power managed by a process. If a device is + * directly pm'd, then it will not be scanned, and dependencies will not be + * enforced. * If a directly pm'd device's driver requests a power change (via + * pm_raise_power(9F)), then the request is blocked and notification is sent + * to the controlling process, which must issue the requested power change for + * the driver to proceed. + * + */ + +#include <sys/types.h> +#include <sys/errno.h> +#include <sys/callb.h> /* callback registration during CPR */ +#include <sys/conf.h> /* driver flags and functions */ +#include <sys/open.h> /* OTYP_CHR definition */ +#include <sys/stat.h> /* S_IFCHR definition */ +#include <sys/pathname.h> /* name -> dev_info xlation */ +#include <sys/ddi_impldefs.h> /* dev_info node fields */ +#include <sys/kmem.h> /* memory alloc stuff */ +#include <sys/debug.h> +#include <sys/archsystm.h> +#include <sys/pm.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/sunndi.h> +#include <sys/sunpm.h> +#include <sys/epm.h> +#include <sys/vfs.h> +#include <sys/mode.h> +#include <sys/mkdev.h> +#include <sys/promif.h> +#include <sys/consdev.h> +#include <sys/esunddi.h> +#include <sys/modctl.h> +#include <sys/fs/ufs_fs.h> +#include <sys/note.h> +#include <sys/taskq.h> +#include <sys/bootconf.h> +#include <sys/reboot.h> +#include <sys/spl.h> +#include <sys/disp.h> +#include <sys/sobject.h> +#include <sys/sunmdi.h> + + +/* + * PM LOCKING + * The list of locks: + * Global pm mutex locks. + * + * pm_scan_lock: + * It protects the timeout id of the scan thread, and the value + * of autopm_enabled. This lock is not held concurrently with + * any other PM locks. + * + * pm_clone_lock: Protects the clone list and count of poll events + * pending for the pm driver. + * Lock ordering: + * pm_clone_lock -> pm_pscc_interest_rwlock, + * pm_clone_lock -> pm_pscc_direct_rwlock. + * + * pm_rsvp_lock: + * Used to synchronize the data structures used for processes + * to rendezvous with state change information when doing + * direct PM. + * Lock ordering: + * pm_rsvp_lock -> pm_pscc_interest_rwlock, + * pm_rsvp_lock -> pm_pscc_direct_rwlock, + * pm_rsvp_lock -> pm_clone_lock. + * + * ppm_lock: protects the list of registered ppm drivers + * Lock ordering: + * ppm_lock -> ppm driver unit_lock + * + * pm_compcnt_lock: + * Protects count of components that are not at their lowest + * power level. + * Lock ordering: + * pm_compcnt_lock -> ppm_lock. + * + * pm_dep_thread_lock: + * Protects work list for pm_dep_thread. Not taken concurrently + * with any other pm lock. + * + * pm_remdrv_lock: + * Serializes the operation of removing noinvol data structure + * entries for a branch of the tree when a driver has been + * removed from the system (modctl_rem_major). + * Lock ordering: + * pm_remdrv_lock -> pm_noinvol_rwlock. + * + * pm_cfb_lock: (High level spin lock) + * Protects the count of how many components of the console + * frame buffer are off (so we know if we have to bring up the + * console as a result of a prom_printf, etc. + * No other locks are taken while holding this lock. + * + * pm_loan_lock: + * Protects the lock_loan list. List is used to record that one + * thread has acquired a power lock but has launched another thread + * to complete its processing. An entry in the list indicates that + * the worker thread can borrow the lock held by the other thread, + * which must block on the completion of the worker. Use is + * specific to module loading. + * No other locks are taken while holding this lock. + * + * Global PM rwlocks + * + * pm_thresh_rwlock: + * Protects the list of thresholds recorded for future use (when + * devices attach). + * Lock ordering: + * pm_thresh_rwlock -> devi_pm_lock + * + * pm_noinvol_rwlock: + * Protects list of detached nodes that had noinvol registered. + * No other PM locks are taken while holding pm_noinvol_rwlock. + * + * pm_pscc_direct_rwlock: + * Protects the list that maps devices being directly power + * managed to the processes that manage them. + * Lock ordering: + * pm_pscc_direct_rwlock -> psce_lock + * + * pm_pscc_interest_rwlock; + * Protects the list that maps state change events to processes + * that want to know about them. + * Lock ordering: + * pm_pscc_interest_rwlock -> psce_lock + * + * per-dip locks: + * + * Each node has these per-dip locks, which are only used if the device is + * a candidate for power management (e.g. has pm components) + * + * devi_pm_lock: + * Protects all power management state of the node except for + * power level, which is protected by ndi_devi_enter(). + * Encapsulated in macros PM_LOCK_DIP()/PM_UNLOCK_DIP(). + * Lock ordering: + * devi_pm_lock -> pm_rsvp_lock, + * devi_pm_lock -> pm_dep_thread_lock, + * devi_pm_lock -> pm_noinvol_rwlock, + * devi_pm_lock -> power lock + * + * power lock (ndi_devi_enter()): + * Since changing power level is possibly a slow operation (30 + * seconds to spin up a disk drive), this is locked separately. + * Since a call into the driver to change the power level of one + * component may result in a call back into the framework to change + * the power level of another, this lock allows re-entrancy by + * the same thread (ndi_devi_enter is used for this because + * the USB framework uses ndi_devi_enter in its power entry point, + * and use of any other lock would produce a deadlock. + * + * devi_pm_busy_lock: + * This lock protects the integrity of the busy count. It is + * only taken by pm_busy_component() and pm_idle_component and + * some code that adjust the busy time after the timer gets set + * up or after a CPR operation. It is per-dip to keep from + * single-threading all the disk drivers on a system. + * It could be per component instead, but most devices have + * only one component. + * No other PM locks are taken while holding this lock. + * + */ + +static int stdout_is_framebuffer; +static kmutex_t e_pm_power_lock; +static kmutex_t pm_loan_lock; +kmutex_t pm_scan_lock; +callb_id_t pm_cpr_cb_id; +callb_id_t pm_panic_cb_id; +callb_id_t pm_halt_cb_id; +int pm_comps_notlowest; /* no. of comps not at lowest power */ +int pm_powering_down; /* cpr is source of DDI_SUSPEND calls */ + +clock_t pm_min_scan = PM_MIN_SCAN; +clock_t pm_id_ticks = 5; /* ticks to wait before scan during idle-down */ + +static int pm_busop_set_power(dev_info_t *, + void *, pm_bus_power_op_t, void *, void *); +static int pm_busop_match_request(dev_info_t *, void *); +static int pm_all_to_normal_nexus(dev_info_t *, pm_canblock_t); + +/* + * Dependency Processing is done thru a seperate thread. + */ +kmutex_t pm_dep_thread_lock; +kcondvar_t pm_dep_thread_cv; +pm_dep_wk_t *pm_dep_thread_workq = NULL; +pm_dep_wk_t *pm_dep_thread_tail = NULL; + +/* + * Autopm must be turned on by a PM_START_PM ioctl, so we don't end up + * power managing things in single user mode that have been suppressed via + * power.conf entries. Protected by pm_scan_lock. + */ +int autopm_enabled; + +/* + * This flag is true while processes are stopped for a checkpoint/resume. + * Controlling processes of direct pm'd devices are not available to + * participate in power level changes, so we bypass them when this is set. + */ +static int pm_processes_stopped; + +#ifdef DEBUG + +/* + * see common/sys/epm.h for PMD_* values + */ +uint_t pm_debug = 0; + +/* + * If pm_divertdebug is set, then no prom_printf calls will be made by + * PMD(), which will prevent debug output from bringing up the console + * frame buffer. Clearing this variable before setting pm_debug will result + * in PMD output going to the console. + * + * pm_divertdebug is incremented in pm_set_power() if dip == cfb_dip to avoid + * deadlocks and decremented at the end of pm_set_power() + */ +uint_t pm_divertdebug = 1; +kmutex_t pm_debug_lock; /* protects pm_divertdebug */ + +void prdeps(char *); +#endif + +/* Globals */ + +/* + * List of recorded thresholds and dependencies + */ +pm_thresh_rec_t *pm_thresh_head; +krwlock_t pm_thresh_rwlock; + +pm_pdr_t *pm_dep_head; +static int pm_unresolved_deps = 0; +static int pm_prop_deps = 0; + +/* + * List of devices that exported no-involuntary-power-cycles property + */ +pm_noinvol_t *pm_noinvol_head; + +/* + * Locks used in noinvol processing + */ +krwlock_t pm_noinvol_rwlock; +kmutex_t pm_remdrv_lock; + +int pm_default_idle_threshold = PM_DEFAULT_SYS_IDLENESS; +int pm_system_idle_threshold; +/* + * By default nexus has 0 threshold, and depends on its children to keep it up + */ +int pm_default_nexus_threshold = 0; + +/* + * Data structures shared with common/io/pm.c + */ +kmutex_t pm_clone_lock; +kcondvar_t pm_clones_cv[PM_MAX_CLONE]; +uint_t pm_poll_cnt[PM_MAX_CLONE]; /* count of events for poll */ +unsigned char pm_interest[PM_MAX_CLONE]; +struct pollhead pm_pollhead; + +extern int hz; +extern char *platform_module_list[]; + +/* + * Wrappers for use in ddi_walk_devs + */ + +static int pm_set_dev_thr_walk(dev_info_t *, void *); +static int pm_restore_direct_lvl_walk(dev_info_t *, void *); +static int pm_save_direct_lvl_walk(dev_info_t *, void *); +static int pm_discard_dep_walk(dev_info_t *, void *); +#ifdef DEBUG +static int pm_desc_pwrchk_walk(dev_info_t *, void *); +#endif + +/* + * Routines for managing noinvol devices + */ +int pm_noinvol_update(int, int, int, char *, dev_info_t *); +void pm_noinvol_update_node(dev_info_t *, + pm_bp_noinvol_t *req); + +kmutex_t pm_rsvp_lock; +kmutex_t pm_compcnt_lock; +krwlock_t pm_pscc_direct_rwlock; +krwlock_t pm_pscc_interest_rwlock; + +#define PSC_INTEREST 0 /* belongs to interest psc list */ +#define PSC_DIRECT 1 /* belongs to direct psc list */ + +pscc_t *pm_pscc_interest; +pscc_t *pm_pscc_direct; + +#define PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip)) +#define PM_IS_NEXUS(dip) NEXUS_DRV(devopsp[PM_MAJOR(dip)]) +#define POWERING_ON(old, new) ((old) == 0 && (new) != 0) +#define POWERING_OFF(old, new) ((old) != 0 && (new) == 0) +#define PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm) + +#define PM_INCR_NOTLOWEST(dip) { \ + mutex_enter(&pm_compcnt_lock); \ + if (!PM_IS_NEXUS(dip) || \ + (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\ + if (pm_comps_notlowest == 0) \ + pm_ppm_notify_all_lowest(dip, PM_NOT_ALL_LOWEST);\ + pm_comps_notlowest++; \ + PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr notlowest->%d\n",\ + pmf, PM_DEVICE(dip), pm_comps_notlowest)) \ + } \ + mutex_exit(&pm_compcnt_lock); \ +} +#define PM_DECR_NOTLOWEST(dip) { \ + mutex_enter(&pm_compcnt_lock); \ + if (!PM_IS_NEXUS(dip) || \ + (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\ + ASSERT(pm_comps_notlowest); \ + pm_comps_notlowest--; \ + PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr notlowest to " \ + "%d\n", pmf, PM_DEVICE(dip), pm_comps_notlowest))\ + if (pm_comps_notlowest == 0) \ + pm_ppm_notify_all_lowest(dip, PM_ALL_LOWEST); \ + } \ + mutex_exit(&pm_compcnt_lock); \ +} + +/* + * console frame-buffer power-management is not enabled when + * debugging services are present. to override, set pm_cfb_override + * to non-zero. + */ +uint_t pm_cfb_comps_off = 0; /* PM_LEVEL_UNKNOWN is considered on */ +kmutex_t pm_cfb_lock; +int pm_cfb_enabled = 1; /* non-zero allows pm of console frame buffer */ +#ifdef DEBUG +int pm_cfb_override = 1; /* non-zero allows pm of cfb with debuggers */ +#else +int pm_cfb_override = 0; /* non-zero allows pm of cfb with debuggers */ +#endif + +static dev_info_t *cfb_dip = 0; +static dev_info_t *cfb_dip_detaching = 0; +uint_t cfb_inuse = 0; +static ddi_softintr_t pm_soft_id; +static clock_t pm_soft_pending; +int pm_scans_disabled = 0; + +/* + * A structure to record the fact that one thread has borrowed a lock held + * by another thread. The context requires that the lender block on the + * completion of the borrower. + */ +typedef struct lock_loan { + struct lock_loan *pmlk_next; + kthread_t *pmlk_borrower; + kthread_t *pmlk_lender; + dev_info_t *pmlk_dip; +} lock_loan_t; +static lock_loan_t lock_loan_head; /* list head is a dummy element */ + +#ifdef DEBUG +#define PMD_FUNC(func, name) char *(func) = (name); +#else +#define PMD_FUNC(func, name) +#endif + + +/* + * Must be called before first device (including pseudo) attach + */ +void +pm_init_locks(void) +{ + mutex_init(&pm_scan_lock, NULL, MUTEX_DRIVER, NULL); + mutex_init(&pm_rsvp_lock, NULL, MUTEX_DRIVER, NULL); + mutex_init(&pm_compcnt_lock, NULL, MUTEX_DRIVER, NULL); + mutex_init(&pm_dep_thread_lock, NULL, MUTEX_DRIVER, NULL); + mutex_init(&pm_remdrv_lock, NULL, MUTEX_DRIVER, NULL); + mutex_init(&pm_loan_lock, NULL, MUTEX_DRIVER, NULL); + rw_init(&pm_thresh_rwlock, NULL, RW_DEFAULT, NULL); + rw_init(&pm_noinvol_rwlock, NULL, RW_DEFAULT, NULL); + cv_init(&pm_dep_thread_cv, NULL, CV_DEFAULT, NULL); +} + +static boolean_t +pm_cpr_callb(void *arg, int code) +{ + _NOTE(ARGUNUSED(arg)) + static int auto_save; + static int pm_reset_timestamps(dev_info_t *, void *); + + switch (code) { + case CB_CODE_CPR_CHKPT: + /* + * Cancel scan or wait for scan in progress to finish + * Other threads may be trying to restart the scan, so we + * have to keep at it unil it sticks + */ + mutex_enter(&pm_scan_lock); + ASSERT(!pm_scans_disabled); + pm_scans_disabled = 1; + auto_save = autopm_enabled; + autopm_enabled = 0; + mutex_exit(&pm_scan_lock); + ddi_walk_devs(ddi_root_node(), pm_scan_stop_walk, NULL); + break; + + case CB_CODE_CPR_RESUME: + ASSERT(!autopm_enabled); + ASSERT(pm_scans_disabled); + pm_scans_disabled = 0; + /* + * Call pm_reset_timestamps to reset timestamps of each + * device to the time when the system is resumed so that their + * idleness can be re-calculated. That's to avoid devices from + * being powered down right after resume if the system was in + * suspended mode long enough. + */ + ddi_walk_devs(ddi_root_node(), pm_reset_timestamps, NULL); + + autopm_enabled = auto_save; + /* + * If there is any auto-pm device, get the scanning + * going. Otherwise don't bother. + */ + ddi_walk_devs(ddi_root_node(), pm_rescan_walk, NULL); + break; + } + return (B_TRUE); +} + +/* + * This callback routine is called when there is a system panic. This function + * exists for prototype matching. + */ +static boolean_t +pm_panic_callb(void *arg, int code) +{ + _NOTE(ARGUNUSED(arg, code)) + void pm_cfb_check_and_powerup(void); + PMD(PMD_CFB, ("pm_panic_callb\n")) + pm_cfb_check_and_powerup(); + return (B_TRUE); +} + +static boolean_t +pm_halt_callb(void *arg, int code) +{ + _NOTE(ARGUNUSED(arg, code)) + return (B_TRUE); /* XXX for now */ +} + +/* + * This needs to be called after the root and platform drivers are loaded + * and be single-threaded with respect to driver attach/detach + */ +void +pm_init(void) +{ + PMD_FUNC(pmf, "pm_init") + char **mod; + extern pri_t minclsyspri; + static void pm_dep_thread(void); + + pm_comps_notlowest = 0; + pm_system_idle_threshold = pm_default_idle_threshold; + + pm_cpr_cb_id = callb_add(pm_cpr_callb, (void *)NULL, + CB_CL_CPR_PM, "pm_cpr"); + pm_panic_cb_id = callb_add(pm_panic_callb, (void *)NULL, + CB_CL_PANIC, "pm_panic"); + pm_halt_cb_id = callb_add(pm_halt_callb, (void *)NULL, + CB_CL_HALT, "pm_halt"); + + /* + * Create a thread to do dependency processing. + */ + (void) thread_create(NULL, 0, (void (*)())pm_dep_thread, NULL, 0, &p0, + TS_RUN, minclsyspri); + + /* + * loadrootmodules already loaded these ppm drivers, now get them + * attached so they can claim the root drivers as they attach + */ + for (mod = platform_module_list; *mod; mod++) { + if (i_ddi_attach_hw_nodes(*mod) != DDI_SUCCESS) { + cmn_err(CE_WARN, "!cannot load platform pm driver %s\n", + *mod); + } else { + PMD(PMD_DHR, ("%s: %s (%s)\n", pmf, *mod, + ddi_major_to_name(ddi_name_to_major(*mod)))) + } + } +} + +/* + * pm_scan_init - create pm scan data structure. Called (if autopm enabled) + * when device becomes power managed or after a failed detach and when autopm + * is started via PM_START_PM ioctl, and after a CPR resume to get all the + * devices scanning again. + */ +void +pm_scan_init(dev_info_t *dip) +{ + PMD_FUNC(pmf, "scan_init") + pm_scan_t *scanp; + + ASSERT(!PM_ISBC(dip)); + + PM_LOCK_DIP(dip); + scanp = PM_GET_PM_SCAN(dip); + if (!scanp) { + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): create scan data\n", + pmf, PM_DEVICE(dip))) + scanp = kmem_zalloc(sizeof (pm_scan_t), KM_SLEEP); + DEVI(dip)->devi_pm_scan = scanp; + } else if (scanp->ps_scan_flags & PM_SCAN_STOP) { + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): " + "clear PM_SCAN_STOP flag\n", pmf, PM_DEVICE(dip))) + scanp->ps_scan_flags &= ~PM_SCAN_STOP; + } + PM_UNLOCK_DIP(dip); +} + +/* + * pm_scan_fini - remove pm scan data structure when stopping pm on the device + */ +void +pm_scan_fini(dev_info_t *dip) +{ + PMD_FUNC(pmf, "scan_fini") + pm_scan_t *scanp; + + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip))) + ASSERT(!PM_ISBC(dip)); + PM_LOCK_DIP(dip); + scanp = PM_GET_PM_SCAN(dip); + if (!scanp) { + PM_UNLOCK_DIP(dip); + return; + } + + ASSERT(!scanp->ps_scan_id && !(scanp->ps_scan_flags & + (PM_SCANNING | PM_SCAN_DISPATCHED | PM_SCAN_AGAIN))); + + kmem_free(scanp, sizeof (pm_scan_t)); + DEVI(dip)->devi_pm_scan = NULL; + PM_UNLOCK_DIP(dip); +} + +/* + * Given a pointer to a component struct, return the current power level + * (struct contains index unless it is a continuous level). + * Located here in hopes of getting both this and dev_is_needed into the + * cache together + */ +static int +cur_power(pm_component_t *cp) +{ + if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN) + return (cp->pmc_cur_pwr); + + return (cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]); +} + +static char * +pm_decode_direction(int direction) +{ + switch (direction) { + case PM_LEVEL_UPONLY: + return ("up"); + + case PM_LEVEL_EXACT: + return ("exact"); + + case PM_LEVEL_DOWNONLY: + return ("down"); + + default: + return ("INVALID DIRECTION"); + } + _NOTE(NOTREACHED); + ASSERT(0); +} + +char * +pm_decode_op(pm_bus_power_op_t op) +{ + switch (op) { + case BUS_POWER_CHILD_PWRCHG: + return ("CHILD_PWRCHG"); + case BUS_POWER_NEXUS_PWRUP: + return ("NEXUS_PWRUP"); + case BUS_POWER_PRE_NOTIFICATION: + return ("PRE_NOTIFICATION"); + case BUS_POWER_POST_NOTIFICATION: + return ("POST_NOTIFICATION"); + case BUS_POWER_HAS_CHANGED: + return ("HAS_CHANGED"); + case BUS_POWER_NOINVOL: + return ("NOINVOL"); + default: + return ("UNKNOWN OP"); + } + _NOTE(NOTREACHED); + ASSERT(0); +} + +/* + * Returns true if level is a possible (valid) power level for component + */ +int +e_pm_valid_power(dev_info_t *dip, int cmpt, int level) +{ + PMD_FUNC(pmf, "e_pm_valid_power") + pm_component_t *cp = PM_CP(dip, cmpt); + int i; + int *ip = cp->pmc_comp.pmc_lvals; + int limit = cp->pmc_comp.pmc_numlevels; + + if (level < 0) + return (0); + for (i = 0; i < limit; i++) { + if (level == *ip++) + return (1); + } +#ifdef DEBUG + if (pm_debug & PMD_FAIL) { + ip = cp->pmc_comp.pmc_lvals; + + for (i = 0; i < limit; i++) + PMD(PMD_FAIL, ("%s: index=%d, level=%d\n", + pmf, i, *ip++)) + } +#endif + return (0); +} + +/* + * Returns true if device is pm'd (after calling pm_start if need be) + */ +int +e_pm_valid_info(dev_info_t *dip, pm_info_t **infop) +{ + pm_info_t *info; + static int pm_start(dev_info_t *dip); + + /* + * Check if the device is power managed if not. + * To make the common case (device is power managed already) + * fast, we check without the lock. If device is not already + * power managed, then we take the lock and the long route through + * go get it managed. Devices never go unmanaged until they + * detach. + */ + info = PM_GET_PM_INFO(dip); + if (!info) { + if (!DEVI_IS_ATTACHING(dip)) { + return (0); + } + if (pm_start(dip) != DDI_SUCCESS) { + return (0); + } + info = PM_GET_PM_INFO(dip); + } + ASSERT(info); + if (infop != NULL) + *infop = info; + return (1); +} + +int +e_pm_valid_comp(dev_info_t *dip, int cmpt, pm_component_t **cpp) +{ + if (cmpt >= 0 && cmpt < PM_NUMCMPTS(dip)) { + if (cpp != NULL) + *cpp = PM_CP(dip, cmpt); + return (1); + } else { + return (0); + } +} + +/* + * Internal guts of ddi_dev_is_needed and pm_raise/lower_power + */ +static int +dev_is_needed(dev_info_t *dip, int cmpt, int level, int direction) +{ + PMD_FUNC(pmf, "din") + pm_component_t *cp; + char *pathbuf; + int result; + + ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY); + if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp) || + !e_pm_valid_power(dip, cmpt, level)) + return (DDI_FAILURE); + + PMD(PMD_DIN, ("%s: %s@%s(%s#%d) cmpt=%d, dir=%s, new=%d, cur=%d\n", + pmf, PM_DEVICE(dip), cmpt, pm_decode_direction(direction), + level, cur_power(cp))) + + if (pm_set_power(dip, cmpt, level, direction, + PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) { + if (direction == PM_LEVEL_UPONLY) { + pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, pathbuf); + cmn_err(CE_WARN, "Device %s failed to power up.", + pathbuf); + kmem_free(pathbuf, MAXPATHLEN); + } + PMD(PMD_DIN | PMD_FAIL, ("%s: %s@%s(%s#%d) [%d] %s->%d failed, " + "errno %d\n", pmf, PM_DEVICE(dip), cmpt, + pm_decode_direction(direction), level, result)) + return (DDI_FAILURE); + } + + PMD(PMD_RESCAN | PMD_DIN, ("%s: pm_rescan %s@%s(%s#%d)\n", pmf, + PM_DEVICE(dip))) + pm_rescan(dip); + return (DDI_SUCCESS); +} + +/* + * We can get multiple pm_rescan() threads, if one of them discovers + * that no scan is running at the moment, it kicks it into action. + * Otherwise, it tells the current scanning thread to scan again when + * it is done by asserting the PM_SCAN_AGAIN flag. The PM_SCANNING and + * PM_SCAN_AGAIN flags are used to regulate scan, to make sure only one + * thread at a time runs the pm_scan_dev() code. + */ +void +pm_rescan(void *arg) +{ + PMD_FUNC(pmf, "rescan") + dev_info_t *dip = (dev_info_t *)arg; + pm_info_t *info; + pm_scan_t *scanp; + timeout_id_t scanid; + + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip))) + PM_LOCK_DIP(dip); + info = PM_GET_PM_INFO(dip); + scanp = PM_GET_PM_SCAN(dip); + if (pm_scans_disabled || !autopm_enabled || !info || !scanp || + (scanp->ps_scan_flags & PM_SCAN_STOP)) { + PM_UNLOCK_DIP(dip); + return; + } + if (scanp->ps_scan_flags & PM_SCANNING) { + scanp->ps_scan_flags |= PM_SCAN_AGAIN; + PM_UNLOCK_DIP(dip); + return; + } else if (scanp->ps_scan_id) { + scanid = scanp->ps_scan_id; + scanp->ps_scan_id = 0; + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): cancel timeout scanid %lx\n", + pmf, PM_DEVICE(dip), (ulong_t)scanid)) + PM_UNLOCK_DIP(dip); + (void) untimeout(scanid); + PM_LOCK_DIP(dip); + } + + /* + * Dispatching pm_scan during attach time is risky due to the fact that + * attach might soon fail and dip dissolved, and panic may happen while + * attempting to stop scan. So schedule a pm_rescan instead. + * (Note that if either of the first two terms are true, taskq_dispatch + * will not be invoked). + * + * Multiple pm_scan dispatching is unecessary and costly to keep track + * of. The PM_SCAN_DISPATCHED flag is used between pm_rescan and pm_scan + * to regulate the dispatching. + * + * Scan is stopped before the device is detached (in pm_detaching()) + * but it may get re-started during the post_detach processing if the + * driver fails to detach. + */ + if (DEVI_IS_ATTACHING(dip) || + (scanp->ps_scan_flags & PM_SCAN_DISPATCHED) || + !taskq_dispatch(system_taskq, pm_scan, (void *)dip, TQ_NOSLEEP)) { + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): attaching, pm_scan already " + "dispatched or dispatching failed\n", pmf, PM_DEVICE(dip))) + if (scanp->ps_scan_id) { + scanid = scanp->ps_scan_id; + scanp->ps_scan_id = 0; + PM_UNLOCK_DIP(dip); + (void) untimeout(scanid); + PM_LOCK_DIP(dip); + if (scanp->ps_scan_id) { + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): a competing " + "thread scheduled pm_rescan, scanid %lx\n", + pmf, PM_DEVICE(dip), + (ulong_t)scanp->ps_scan_id)) + PM_UNLOCK_DIP(dip); + return; + } + } + scanp->ps_scan_id = timeout(pm_rescan, (void *)dip, + (scanp->ps_idle_down ? pm_id_ticks : + (pm_min_scan * hz))); + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): scheduled next pm_rescan, " + "scanid %lx\n", pmf, PM_DEVICE(dip), + (ulong_t)scanp->ps_scan_id)) + } else { + PMD(PMD_SCAN, ("%s: dispatched pm_scan for %s@%s(%s#%d)\n", + pmf, PM_DEVICE(dip))) + scanp->ps_scan_flags |= PM_SCAN_DISPATCHED; + } + PM_UNLOCK_DIP(dip); +} + +void +pm_scan(void *arg) +{ + PMD_FUNC(pmf, "scan") + dev_info_t *dip = (dev_info_t *)arg; + pm_scan_t *scanp; + time_t nextscan; + + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip))) + + PM_LOCK_DIP(dip); + scanp = PM_GET_PM_SCAN(dip); + ASSERT(scanp && PM_GET_PM_INFO(dip)); + + if (pm_scans_disabled || !autopm_enabled || + (scanp->ps_scan_flags & PM_SCAN_STOP)) { + scanp->ps_scan_flags &= ~(PM_SCAN_AGAIN | PM_SCAN_DISPATCHED); + PM_UNLOCK_DIP(dip); + return; + } + + if (scanp->ps_idle_down) { + /* + * make sure we remember idledown was in affect until + * we've completed the scan + */ + PMID_SET_SCANS(scanp->ps_idle_down) + PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown starts " + "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down)) + } + + /* possible having two threads running pm_scan() */ + if (scanp->ps_scan_flags & PM_SCANNING) { + scanp->ps_scan_flags |= PM_SCAN_AGAIN; + PMD(PMD_SCAN, ("%s: scanning, will scan %s@%s(%s#%d) again\n", + pmf, PM_DEVICE(dip))) + scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED; + PM_UNLOCK_DIP(dip); + return; + } + + scanp->ps_scan_flags |= PM_SCANNING; + scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED; + do { + scanp->ps_scan_flags &= ~PM_SCAN_AGAIN; + PM_UNLOCK_DIP(dip); + nextscan = pm_scan_dev(dip); + PM_LOCK_DIP(dip); + } while (scanp->ps_scan_flags & PM_SCAN_AGAIN); + + ASSERT(scanp->ps_scan_flags & PM_SCANNING); + scanp->ps_scan_flags &= ~PM_SCANNING; + + if (scanp->ps_idle_down) { + scanp->ps_idle_down &= ~PMID_SCANS; + PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown ends " + "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down)) + } + + /* schedule for next idle check */ + if (nextscan != LONG_MAX) { + if (nextscan > (LONG_MAX / hz)) + nextscan = (LONG_MAX - 1) / hz; + if (scanp->ps_scan_id) { + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): while scanning " + "another rescan scheduled scanid(%lx)\n", pmf, + PM_DEVICE(dip), (ulong_t)scanp->ps_scan_id)) + PM_UNLOCK_DIP(dip); + return; + } else if (!(scanp->ps_scan_flags & PM_SCAN_STOP)) { + scanp->ps_scan_id = timeout(pm_rescan, (void *)dip, + (clock_t)(nextscan * hz)); + PMD(PMD_SCAN, ("%s: nextscan for %s@%s(%s#%d) in " + "%lx sec, scanid(%lx) \n", pmf, PM_DEVICE(dip), + (ulong_t)nextscan, (ulong_t)scanp->ps_scan_id)) + } + } + PM_UNLOCK_DIP(dip); +} + +void +pm_get_timestamps(dev_info_t *dip, time_t *valuep) +{ + int components = PM_NUMCMPTS(dip); + int i; + + ASSERT(components > 0); + PM_LOCK_BUSY(dip); /* so we get a consistent view */ + for (i = 0; i < components; i++) { + valuep[i] = PM_CP(dip, i)->pmc_timestamp; + } + PM_UNLOCK_BUSY(dip); +} + +/* + * Returns true if device needs to be kept up because it exported the + * "no-involuntary-power-cycles" property or we're pretending it did (console + * fb case) or it is an ancestor of such a device and has used up the "one + * free cycle" allowed when all such leaf nodes have voluntarily powered down + * upon detach + */ +int +pm_noinvol(dev_info_t *dip) +{ + PMD_FUNC(pmf, "noinvol") + + /* + * This doesn't change over the life of a driver, so no locking needed + */ + if (PM_IS_CFB(dip)) { + PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB %s@%s(%s#%d)\n", + pmf, PM_DEVICE(dip))) + return (1); + } + /* + * Not an issue if no such kids + */ + if (DEVI(dip)->devi_pm_noinvolpm == 0) { +#ifdef DEBUG + if (DEVI(dip)->devi_pm_volpmd != 0) { + dev_info_t *pdip = dip; + do { + PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d) noinvol %d " + "volpmd %d\n", pmf, PM_DEVICE(pdip), + DEVI(pdip)->devi_pm_noinvolpm, + DEVI(pdip)->devi_pm_volpmd)) + pdip = ddi_get_parent(pdip); + } while (pdip); + } +#endif + ASSERT(DEVI(dip)->devi_pm_volpmd == 0); + return (0); + } + + /* + * Since we now maintain the counts correct at every node, we no longer + * need to look up the tree. An ancestor cannot use up the free cycle + * without the children getting their counts adjusted. + */ + +#ifdef DEBUG + if (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd) + PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s@%s(%s#%d)\n", pmf, + DEVI(dip)->devi_pm_noinvolpm, DEVI(dip)->devi_pm_volpmd, + PM_DEVICE(dip))) +#endif + return (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd); +} + +/* + * This function performs the actual scanning of the device. + * It attempts to power off the indicated device's components if they have + * been idle and other restrictions are met. + * pm_scan_dev calculates and returns when the next scan should happen for + * this device. + */ +time_t +pm_scan_dev(dev_info_t *dip) +{ + PMD_FUNC(pmf, "scan_dev") + pm_scan_t *scanp; + time_t *timestamp, idletime, now, thresh; + time_t timeleft = 0; + int i, nxtpwr, curpwr, pwrndx, unused; + size_t size; + pm_component_t *cp; + dev_info_t *pdip = ddi_get_parent(dip); + int circ; + static int cur_threshold(dev_info_t *, int); + static int pm_next_lower_power(pm_component_t *, int); + + /* + * skip attaching device + */ + if (DEVI_IS_ATTACHING(dip)) { + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) is attaching, timeleft(%lx)\n", + pmf, PM_DEVICE(dip), pm_min_scan)) + return (pm_min_scan); + } + + PM_LOCK_DIP(dip); + scanp = PM_GET_PM_SCAN(dip); + ASSERT(scanp && PM_GET_PM_INFO(dip)); + + PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip))) + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): kuc is %d\n", pmf, PM_DEVICE(dip), + PM_KUC(dip))) + + /* no scan under the following conditions */ + if (pm_scans_disabled || !autopm_enabled || + (scanp->ps_scan_flags & PM_SCAN_STOP) || + (PM_KUC(dip) != 0) || + PM_ISDIRECT(dip) || pm_noinvol(dip)) { + PM_UNLOCK_DIP(dip); + PMD(PMD_SCAN, ("%s: [END, %s@%s(%s#%d)] no scan, " + "scan_disabled(%d), apm_enabled(%d), kuc(%d), " + "%s directpm, %s pm_noinvol\n", pmf, PM_DEVICE(dip), + pm_scans_disabled, autopm_enabled, PM_KUC(dip), + PM_ISDIRECT(dip) ? "is" : "is not", + pm_noinvol(dip) ? "is" : "is not")) + return (LONG_MAX); + } + PM_UNLOCK_DIP(dip); + + if (!ndi_devi_tryenter(pdip, &circ)) { + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) can't hold pdip", + pmf, PM_DEVICE(pdip))) + return ((time_t)1); + } + now = gethrestime_sec(); + size = PM_NUMCMPTS(dip) * sizeof (time_t); + timestamp = kmem_alloc(size, KM_SLEEP); + pm_get_timestamps(dip, timestamp); + + /* + * Since we removed support for backwards compatible devices, + * (see big comment at top of file) + * it is no longer required to deal with component 0 last. + */ + for (i = 0; i < PM_NUMCMPTS(dip); i++) { + /* + * If already off (an optimization, perhaps) + */ + cp = PM_CP(dip, i); + pwrndx = cp->pmc_cur_pwr; + curpwr = (pwrndx == PM_LEVEL_UNKNOWN) ? + PM_LEVEL_UNKNOWN : + cp->pmc_comp.pmc_lvals[pwrndx]; + + if (pwrndx == 0) { + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d off or " + "lowest\n", pmf, PM_DEVICE(dip), i)) + /* skip device if off or at its lowest */ + continue; + } + + thresh = cur_threshold(dip, i); /* comp i threshold */ + if ((timestamp[i] == 0) || (cp->pmc_busycount > 0)) { + /* were busy or newly became busy by another thread */ + if (timeleft == 0) + timeleft = max(thresh, pm_min_scan); + else + timeleft = min( + timeleft, max(thresh, pm_min_scan)); + continue; + } + + idletime = now - timestamp[i]; /* idle time */ + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d idle time %lx\n", + pmf, PM_DEVICE(dip), i, idletime)) + if (idletime >= thresh || PM_IS_PID(dip)) { + nxtpwr = pm_next_lower_power(cp, pwrndx); + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, %d->%d\n", + pmf, PM_DEVICE(dip), i, curpwr, nxtpwr)) + if (pm_set_power(dip, i, nxtpwr, PM_LEVEL_DOWNONLY, + PM_CANBLOCK_FAIL, 1, &unused) != DDI_SUCCESS && + PM_CURPOWER(dip, i) != nxtpwr) { + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, " + "%d->%d Failed\n", pmf, PM_DEVICE(dip), + i, curpwr, nxtpwr)) + timeleft = pm_min_scan; + continue; + } else { + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, " + "%d->%d, GOOD curpwr %d\n", pmf, + PM_DEVICE(dip), i, curpwr, nxtpwr, + cur_power(cp))) + + if (nxtpwr == 0) /* component went off */ + continue; + + /* + * scan to next lower level + */ + if (timeleft == 0) + timeleft = max( + 1, cur_threshold(dip, i)); + else + timeleft = min(timeleft, + max(1, cur_threshold(dip, i))); + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, " + "timeleft(%lx)\n", pmf, PM_DEVICE(dip), + i, timeleft)) + } + } else { /* comp not idle long enough */ + if (timeleft == 0) + timeleft = thresh - idletime; + else + timeleft = min(timeleft, (thresh - idletime)); + PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, timeleft=" + "%lx\n", pmf, PM_DEVICE(dip), i, timeleft)) + } + } + ndi_devi_exit(pdip, circ); + kmem_free(timestamp, size); + PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] timeleft(%lx)\n", pmf, + PM_DEVICE(dip), timeleft)) + + /* + * if components are already at lowest level, timeleft is left 0 + */ + return ((timeleft == 0) ? LONG_MAX : timeleft); +} + +/* + * pm_scan_stop - cancel scheduled pm_rescan, + * wait for termination of dispatched pm_scan thread + * and active pm_scan_dev thread. + */ +void +pm_scan_stop(dev_info_t *dip) +{ + PMD_FUNC(pmf, "scan_stop") + pm_scan_t *scanp; + timeout_id_t scanid; + + PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip))) + PM_LOCK_DIP(dip); + scanp = PM_GET_PM_SCAN(dip); + if (!scanp) { + PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] scan not initialized\n", + pmf, PM_DEVICE(dip))) + PM_UNLOCK_DIP(dip); + return; + } + scanp->ps_scan_flags |= PM_SCAN_STOP; + + /* cancel scheduled scan taskq */ + while (scanp->ps_scan_id) { + scanid = scanp->ps_scan_id; + scanp->ps_scan_id = 0; + PM_UNLOCK_DIP(dip); + (void) untimeout(scanid); + PM_LOCK_DIP(dip); + } + + while (scanp->ps_scan_flags & (PM_SCANNING | PM_SCAN_DISPATCHED)) { + PM_UNLOCK_DIP(dip); + delay(1); + PM_LOCK_DIP(dip); + } + PM_UNLOCK_DIP(dip); + PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip))) +} + +int +pm_scan_stop_walk(dev_info_t *dip, void *arg) +{ + _NOTE(ARGUNUSED(arg)) + + if (!PM_GET_PM_SCAN(dip)) + return (DDI_WALK_CONTINUE); + ASSERT(!PM_ISBC(dip)); + pm_scan_stop(dip); + return (DDI_WALK_CONTINUE); +} + +/* + * Converts a power level value to its index + */ +static int +power_val_to_index(pm_component_t *cp, int val) +{ + int limit, i, *ip; + + ASSERT(val != PM_LEVEL_UPONLY && val != PM_LEVEL_DOWNONLY && + val != PM_LEVEL_EXACT); + /* convert power value into index (i) */ + limit = cp->pmc_comp.pmc_numlevels; + ip = cp->pmc_comp.pmc_lvals; + for (i = 0; i < limit; i++) + if (val == *ip++) + return (i); + return (-1); +} + +/* + * Converts a numeric power level to a printable string + */ +static char * +power_val_to_string(pm_component_t *cp, int val) +{ + int index; + + if (val == PM_LEVEL_UPONLY) + return ("<UPONLY>"); + + if (val == PM_LEVEL_UNKNOWN || + (index = power_val_to_index(cp, val)) == -1) + return ("<LEVEL_UNKNOWN>"); + + return (cp->pmc_comp.pmc_lnames[index]); +} + +/* + * Return true if this node has been claimed by a ppm. + */ +static int +pm_ppm_claimed(dev_info_t *dip) +{ + return (PPM(dip) != NULL); +} + +/* + * A node which was voluntarily power managed has just used up its "free cycle" + * and need is volpmd field cleared, and the same done to all its descendents + */ +static void +pm_clear_volpm_dip(dev_info_t *dip) +{ + PMD_FUNC(pmf, "clear_volpm_dip") + + if (dip == NULL) + return; + PMD(PMD_NOINVOL, ("%s: clear volpm from %s@%s(%s#%d)\n", pmf, + PM_DEVICE(dip))) + DEVI(dip)->devi_pm_volpmd = 0; + for (dip = ddi_get_child(dip); dip; dip = ddi_get_next_sibling(dip)) { + pm_clear_volpm_dip(dip); + } +} + +/* + * A node which was voluntarily power managed has used up the "free cycles" + * for the subtree that it is the root of. Scan through the list of detached + * nodes and adjust the counts of any that are descendents of the node. + */ +static void +pm_clear_volpm_list(dev_info_t *dip) +{ + PMD_FUNC(pmf, "clear_volpm_list") + char *pathbuf; + size_t len; + pm_noinvol_t *ip; + + pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, pathbuf); + len = strlen(pathbuf); + PMD(PMD_NOINVOL, ("%s: clear volpm list %s\n", pmf, pathbuf)) + rw_enter(&pm_noinvol_rwlock, RW_WRITER); + for (ip = pm_noinvol_head; ip; ip = ip->ni_next) { + PMD(PMD_NOINVOL, ("%s: clear volpm: ni_path %s\n", pmf, + ip->ni_path)) + if (strncmp(pathbuf, ip->ni_path, len) == 0 && + ip->ni_path[len] == '/') { + PMD(PMD_NOINVOL, ("%s: clear volpm: %s\n", pmf, + ip->ni_path)) + ip->ni_volpmd = 0; + ip->ni_wasvolpmd = 0; + } + } + kmem_free(pathbuf, MAXPATHLEN); + rw_exit(&pm_noinvol_rwlock); +} + +/* + * Powers a device, suspending or resuming the driver if it is a backward + * compatible device, calling into ppm to change power level. + * Called with the component's power lock held. + */ +static int +power_dev(dev_info_t *dip, int comp, int level, int old_level, + pm_canblock_t canblock, pm_ppm_devlist_t **devlist) +{ + PMD_FUNC(pmf, "power_dev") + power_req_t power_req; + int power_op_ret; /* DDI_SUCCESS or DDI_FAILURE */ + int resume_needed = 0; + int suspended = 0; + int result; + struct pm_component *cp = PM_CP(dip, comp); + int bc = PM_ISBC(dip); + int pm_all_components_off(dev_info_t *); + int clearvolpmd = 0; + char pathbuf[MAXNAMELEN]; +#ifdef DEBUG + char *ppmname, *ppmaddr; +#endif + /* + * If this is comp 0 of a backwards compat device and we are + * going to take the power away, we need to detach it with + * DDI_PM_SUSPEND command. + */ + if (bc && comp == 0 && POWERING_OFF(old_level, level)) { + if (devi_detach(dip, DDI_PM_SUSPEND) != DDI_SUCCESS) { + /* We could not suspend before turning cmpt zero off */ + PMD(PMD_ERROR, ("%s: could not suspend %s@%s(%s#%d)\n", + pmf, PM_DEVICE(dip))) + return (DDI_FAILURE); + } else { + DEVI(dip)->devi_pm_flags |= PMC_SUSPENDED; + suspended++; + } + } + power_req.request_type = PMR_PPM_SET_POWER; + power_req.req.ppm_set_power_req.who = dip; + power_req.req.ppm_set_power_req.cmpt = comp; + power_req.req.ppm_set_power_req.old_level = old_level; + power_req.req.ppm_set_power_req.new_level = level; + power_req.req.ppm_set_power_req.canblock = canblock; + power_req.req.ppm_set_power_req.cookie = NULL; +#ifdef DEBUG + if (pm_ppm_claimed(dip)) { + ppmname = PM_NAME(PPM(dip)); + ppmaddr = PM_ADDR(PPM(dip)); + + } else { + ppmname = "noppm"; + ppmaddr = "0"; + } + PMD(PMD_PPM, ("%s: %s@%s(%s#%d):%s[%d] %s (%d) -> %s (%d) via %s@%s\n", + pmf, PM_DEVICE(dip), cp->pmc_comp.pmc_name, comp, + power_val_to_string(cp, old_level), old_level, + power_val_to_string(cp, level), level, ppmname, ppmaddr)) +#endif + /* + * If non-bc noinvolpm device is turning first comp on, or noinvolpm + * bc device comp 0 is powering on, then we count it as a power cycle + * against its voluntary count. + */ + if (DEVI(dip)->devi_pm_volpmd && + (!bc && pm_all_components_off(dip) && level != 0) || + (bc && comp == 0 && POWERING_ON(old_level, level))) + clearvolpmd = 1; + if ((power_op_ret = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, + &power_req, &result)) == DDI_SUCCESS) { + /* + * Now do involuntary pm accounting; If we've just cycled power + * on a voluntarily pm'd node, and by inference on its entire + * subtree, we need to set the subtree (including those nodes + * already detached) volpmd counts to 0, and subtract out the + * value of the current node's volpmd count from the ancestors + */ + if (clearvolpmd) { + int volpmd = DEVI(dip)->devi_pm_volpmd; + pm_clear_volpm_dip(dip); + pm_clear_volpm_list(dip); + if (volpmd) { + (void) ddi_pathname(dip, pathbuf); + (void) pm_noinvol_update(PM_BP_NOINVOL_POWER, + volpmd, 0, pathbuf, dip); + } + } + } else { + PMD(PMD_FAIL, ("%s: can't set comp %d (%s) of %s@%s(%s#%d) " + "to level %d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name, + PM_DEVICE(dip), level, power_val_to_string(cp, level))) + } + /* + * If some other devices were also powered up (e.g. other cpus in + * the same domain) return a pointer to that list + */ + if (devlist) { + *devlist = (pm_ppm_devlist_t *) + power_req.req.ppm_set_power_req.cookie; + } + /* + * We will have to resume the device if the device is backwards compat + * device and either of the following is true: + * -This is comp 0 and we have successfully powered it up + * -This is comp 0 and we have failed to power it down. Resume is + * needed because we have suspended it above + */ + + if (bc && comp == 0) { + ASSERT(PM_ISDIRECT(dip) || DEVI_IS_DETACHING(dip)); + if (power_op_ret == DDI_SUCCESS) { + if (POWERING_ON(old_level, level)) { + /* + * It must be either suspended or resumed + * via pm_power_has_changed path + */ + ASSERT((DEVI(dip)->devi_pm_flags & + PMC_SUSPENDED) || + (PM_CP(dip, comp)->pmc_flags & + PM_PHC_WHILE_SET_POWER)); + + resume_needed = suspended; + } + } else { + if (POWERING_OFF(old_level, level)) { + /* + * It must be either suspended or resumed + * via pm_power_has_changed path + */ + ASSERT((DEVI(dip)->devi_pm_flags & + PMC_SUSPENDED) || + (PM_CP(dip, comp)->pmc_flags & + PM_PHC_WHILE_SET_POWER)); + + resume_needed = suspended; + } + } + } + if (resume_needed) { + ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED); + /* ppm is not interested in DDI_PM_RESUME */ + if ((power_op_ret = devi_attach(dip, DDI_PM_RESUME)) == + DDI_SUCCESS) { + DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED; + } else + cmn_err(CE_WARN, "!pm: Can't resume %s@%s(%s#%d)", + PM_DEVICE(dip)); + } + return (power_op_ret); +} + +/* + * Return true if we are the owner or a borrower of the devi lock. See + * pm_lock_power_single() about borrowing the lock. + */ +static int +pm_devi_lock_held(dev_info_t *dip) +{ + lock_loan_t *cur; + + if (DEVI_BUSY_OWNED(dip)) + return (1); + + /* return false if no locks borrowed */ + if (lock_loan_head.pmlk_next == NULL) + return (0); + + mutex_enter(&pm_loan_lock); + /* see if our thread is registered as a lock borrower. */ + for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next) + if (cur->pmlk_borrower == curthread) + break; + mutex_exit(&pm_loan_lock); + + return (cur != NULL && cur->pmlk_lender == DEVI(dip)->devi_busy_thread); +} + +/* + * pm_set_power: adjusts power level of device. Assumes device is power + * manageable & component exists. + * + * Cases which require us to bring up devices we keep up ("wekeepups") for + * backwards compatible devices: + * component 0 is off and we're bringing it up from 0 + * bring up wekeepup first + * and recursively when component 0 is off and we bring some other + * component up from 0 + * For devices which are not backward compatible, our dependency notion is much + * simpler. Unless all components are off, then wekeeps must be on. + * We don't treat component 0 differently. + * Canblock tells how to deal with a direct pm'd device. + * Scan arg tells us if we were called from scan, in which case we don't need + * to go back to the root node and walk down to change power. + */ +int +pm_set_power(dev_info_t *dip, int comp, int level, int direction, + pm_canblock_t canblock, int scan, int *retp) +{ + PMD_FUNC(pmf, "set_power") + char *pathbuf; + pm_bp_child_pwrchg_t bpc; + pm_sp_misc_t pspm; + int ret = DDI_SUCCESS; + int unused = DDI_SUCCESS; + dev_info_t *pdip = ddi_get_parent(dip); + +#ifdef DEBUG + int diverted = 0; + + /* + * This prevents operations on the console from calling prom_printf and + * either deadlocking or bringing up the console because of debug + * output + */ + if (dip == cfb_dip) { + diverted++; + mutex_enter(&pm_debug_lock); + pm_divertdebug++; + mutex_exit(&pm_debug_lock); + } +#endif + ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY || + direction == PM_LEVEL_EXACT); + PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d, dir=%s, new=%d\n", + pmf, PM_DEVICE(dip), comp, pm_decode_direction(direction), level)) + pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, pathbuf); + bpc.bpc_dip = dip; + bpc.bpc_path = pathbuf; + bpc.bpc_comp = comp; + bpc.bpc_olevel = PM_CURPOWER(dip, comp); + bpc.bpc_nlevel = level; + pspm.pspm_direction = direction; + pspm.pspm_errnop = retp; + pspm.pspm_canblock = canblock; + pspm.pspm_scan = scan; + bpc.bpc_private = &pspm; + + /* + * If a config operation is being done (we've locked the parent) or + * we already hold the power lock (we've locked the node) + * then we can operate directly on the node because we have already + * brought up all the ancestors, otherwise, we have to go back to the + * top of the tree. + */ + if (pm_devi_lock_held(pdip) || pm_devi_lock_held(dip)) + ret = pm_busop_set_power(dip, NULL, BUS_POWER_CHILD_PWRCHG, + (void *)&bpc, (void *)&unused); + else + ret = pm_busop_bus_power(ddi_root_node(), NULL, + BUS_POWER_CHILD_PWRCHG, (void *)&bpc, (void *)&unused); +#ifdef DEBUG + if (ret != DDI_SUCCESS || *retp != DDI_SUCCESS) { + PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) can't change power, ret=%d, " + "errno=%d\n", pmf, PM_DEVICE(dip), ret, *retp)) + } + if (diverted) { + mutex_enter(&pm_debug_lock); + pm_divertdebug--; + mutex_exit(&pm_debug_lock); + } +#endif + kmem_free(pathbuf, MAXPATHLEN); + return (ret); +} + + +static dev_info_t * +find_dip(dev_info_t *dip, char *dev_name, int holddip) +{ + PMD_FUNC(pmf, "find_dip") + dev_info_t *cdip; + char *child_dev, *addr; + char *device; /* writeable copy of path */ + int dev_len = strlen(dev_name)+1; + int circ; + + device = kmem_zalloc(dev_len, KM_SLEEP); + (void) strcpy(device, dev_name); + addr = strchr(device, '@'); + child_dev = strchr(device, '/'); + if ((addr != NULL) && (child_dev == NULL || addr < child_dev)) { + /* + * We have device = "name@addr..." form + */ + *addr++ = '\0'; /* for strcmp (and skip '@') */ + if (child_dev != NULL) + *child_dev++ = '\0'; /* for strcmp (and skip '/') */ + } else { + /* + * We have device = "name/..." or "name" + */ + addr = ""; + if (child_dev != NULL) + *child_dev++ = '\0'; /* for strcmp (and skip '/') */ + } + for (; dip != NULL; dip = ddi_get_next_sibling(dip)) { + if (strcmp(ddi_node_name(dip), device) == 0) { + /* If the driver isn't loaded, we prune the search */ + if (i_ddi_node_state(dip) < DS_READY) { + continue; + } + if (strcmp(ddi_get_name_addr(dip), addr) == 0) { + PMD(PMD_NAMETODIP, ("%s: matched %s@%s" + "(%s#%d)\n", pmf, PM_DEVICE(dip))) + if (child_dev != NULL) { + PMD(PMD_NAMETODIP, ("%s: %s@%s(%s#%d): " + "held, call find_dip %s\n", pmf, + PM_DEVICE(dip), child_dev)) + ndi_devi_enter(dip, &circ); + cdip = dip; + dip = find_dip(ddi_get_child(dip), + child_dev, holddip); + ndi_devi_exit(cdip, circ); + PMD(PMD_NAMETODIP, ("%s: %s@%s(%s#%d): " + "release, find_dip rets %s\n", pmf, + PM_DEVICE(cdip), child_dev)) + } else { + if (holddip) { + e_ddi_hold_devi(dip); + PMD(PMD_DHR | PMD_NAMETODIP, + ("%s: held %s@%s(%s#%d), " + "refcnt=%d\n", pmf, + PM_DEVICE(dip), + e_ddi_devi_holdcnt(dip))) + } + } + kmem_free(device, dev_len); + return (dip); + } + } + } + kmem_free(device, dev_len); + return (dip); +} + +/* + * If holddip is set, then if a dip is found we return with the node held + */ +dev_info_t * +pm_name_to_dip(char *pathname, int holddip) +{ + PMD_FUNC(pmf, "name_to_dip") + dev_info_t *dip = NULL; + char dev_name[MAXNAMELEN]; + dev_info_t *first_child; + int circular; + + if (!pathname) + return (NULL); + + (void) strncpy(dev_name, pathname, MAXNAMELEN); + + PMD(PMD_NAMETODIP, ("%s: devname: %s\n", pmf, dev_name)) + + /* + * First we attempt to match the node in the tree. If we succeed + * we hold the driver and look up the dip again. + * No need to hold the root as that node is always held. + */ + if (dev_name[0] == '/') { + ndi_devi_enter(ddi_root_node(), &circular); + first_child = ddi_get_child(ddi_root_node()); + dip = find_dip(first_child, dev_name + 1, holddip); + ndi_devi_exit(ddi_root_node(), circular); + + } else { + PMD(PMD_NAMETODIP, ("%s: physpath with unrooted " + "search\n", pmf)) + return (NULL); + } + + ASSERT(!dip || + (ddi_name_to_major(ddi_binding_name(dip)) != (major_t)-1)); + + return (dip); +} + +/* + * Search for a dependency and mark it unsatisfied + */ +static void +pm_unsatisfy(char *keeper, char *kept) +{ + PMD_FUNC(pmf, "unsatisfy") + pm_pdr_t *dp; + + PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf, keeper, kept)) + for (dp = pm_dep_head; dp; dp = dp->pdr_next) { + if (!dp->pdr_isprop) { + if (strcmp(dp->pdr_keeper, keeper) == 0 && + (dp->pdr_kept_count > 0) && + strcmp(dp->pdr_kept_paths[0], kept) == 0) { + if (dp->pdr_satisfied) { + dp->pdr_satisfied = 0; + pm_unresolved_deps++; + PMD(PMD_KEEPS, ("%s: clear satisfied, " + "pm_unresolved_deps now %d\n", pmf, + pm_unresolved_deps)) + } + } + } + } +} + +/* + * Device dip is being un power managed, it keeps up count other devices. + * We need to release any hold we have on the kept devices, and also + * mark the dependency no longer satisfied. + */ +static void +pm_unkeeps(int count, char *keeper, char **keptpaths, int pwr) +{ + PMD_FUNC(pmf, "unkeeps") + int i, j; + dev_info_t *kept; + dev_info_t *dip; + struct pm_component *cp; + int keeper_on = 0, circ; + + PMD(PMD_KEEPS, ("%s: count=%d, keeper=%s, keptpaths=%p\n", pmf, count, + keeper, (void *)keptpaths)) + /* + * Try to grab keeper. Keeper may have gone away by now, + * in this case, used the passed in value pwr + */ + dip = pm_name_to_dip(keeper, 1); + for (i = 0; i < count; i++) { + /* Release power hold */ + kept = pm_name_to_dip(keptpaths[i], 1); + if (kept) { + PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)[%d]\n", pmf, + PM_DEVICE(kept), i)) + /* + * We need to check if we skipped a bringup here + * because we could have failed the bringup + * (ie DIRECT PM device) and have + * not increment the count. + */ + if ((dip != NULL) && (PM_GET_PM_INFO(dip) != NULL)) { + keeper_on = 0; + PM_LOCK_POWER(dip, &circ); + for (j = 0; j < PM_NUMCMPTS(dip); j++) { + cp = &DEVI(dip)->devi_pm_components[j]; + if (cur_power(cp)) { + keeper_on++; + break; + } + } + if (keeper_on && (PM_SKBU(kept) == 0)) { + pm_rele_power(kept); + DEVI(kept)->devi_pm_flags + &= ~PMC_SKIP_BRINGUP; + } + PM_UNLOCK_POWER(dip, circ); + } else if (pwr) { + if (PM_SKBU(kept) == 0) { + pm_rele_power(kept); + DEVI(kept)->devi_pm_flags + &= ~PMC_SKIP_BRINGUP; + } + } + ddi_release_devi(kept); + } + /* + * mark this dependency not satisfied + */ + pm_unsatisfy(keeper, keptpaths[i]); + } + if (dip) + ddi_release_devi(dip); +} + +/* + * Device kept is being un power managed, it is kept up by keeper. + * We need to mark the dependency no longer satisfied. + */ +static void +pm_unkepts(char *kept, char *keeper) +{ + PMD_FUNC(pmf, "unkepts") + PMD(PMD_KEEPS, ("%s: kept=%s, keeper=%s\n", pmf, kept, keeper)) + ASSERT(keeper != NULL); + /* + * mark this dependency not satisfied + */ + pm_unsatisfy(keeper, kept); +} + +/* + * Removes dependency information and hold on the kepts, if the path is a + * path of a keeper. + */ +static void +pm_free_keeper(char *path, int pwr) +{ + pm_pdr_t *dp; + int i; + size_t length; + + for (dp = pm_dep_head; dp; dp = dp->pdr_next) { + if (strcmp(dp->pdr_keeper, path) != 0) + continue; + /* + * Remove all our kept holds and the dependency records, + * then free up the kept lists. + */ + pm_unkeeps(dp->pdr_kept_count, path, dp->pdr_kept_paths, pwr); + if (dp->pdr_kept_count) { + for (i = 0; i < dp->pdr_kept_count; i++) { + length = strlen(dp->pdr_kept_paths[i]); + kmem_free(dp->pdr_kept_paths[i], length + 1); + } + kmem_free(dp->pdr_kept_paths, + dp->pdr_kept_count * sizeof (char **)); + dp->pdr_kept_paths = NULL; + dp->pdr_kept_count = 0; + } + } +} + +/* + * Removes the device represented by path from the list of kepts, if the + * path is a path of a kept + */ +static void +pm_free_kept(char *path) +{ + pm_pdr_t *dp; + int i; + int j, count; + size_t length; + char **paths; + + for (dp = pm_dep_head; dp; dp = dp->pdr_next) { + if (dp->pdr_kept_count == 0) + continue; + count = dp->pdr_kept_count; + /* Remove this device from the kept path lists */ + for (i = 0; i < count; i++) { + if (strcmp(dp->pdr_kept_paths[i], path) == 0) { + pm_unkepts(path, dp->pdr_keeper); + length = strlen(dp->pdr_kept_paths[i]) + 1; + kmem_free(dp->pdr_kept_paths[i], length); + dp->pdr_kept_paths[i] = NULL; + dp->pdr_kept_count--; + } + } + /* Compact the kept paths array */ + if (dp->pdr_kept_count) { + length = dp->pdr_kept_count * sizeof (char **); + paths = kmem_zalloc(length, KM_SLEEP); + j = 0; + for (i = 0; i < count; i++) { + if (dp->pdr_kept_paths[i] != NULL) { + paths[j] = dp->pdr_kept_paths[i]; + j++; + } + } + ASSERT(j == dp->pdr_kept_count); + } + /* Now free the old array and point to the new one */ + kmem_free(dp->pdr_kept_paths, count * sizeof (char **)); + if (dp->pdr_kept_count) + dp->pdr_kept_paths = paths; + else + dp->pdr_kept_paths = NULL; + } +} + +/* + * Free the dependency information for a device. + */ +void +pm_free_keeps(char *path, int pwr) +{ + PMD_FUNC(pmf, "free_keeps") + +#ifdef DEBUG + int doprdeps = 0; + void prdeps(char *); + + PMD(PMD_KEEPS, ("%s: %s\n", pmf, path)) + if (pm_debug & PMD_KEEPS) { + doprdeps = 1; + prdeps("pm_free_keeps before"); + } +#endif + /* + * First assume we are a keeper and remove all our kepts. + */ + pm_free_keeper(path, pwr); + /* + * Now assume we a kept device, and remove all our records. + */ + pm_free_kept(path); +#ifdef DEBUG + if (doprdeps) { + prdeps("pm_free_keeps after"); + } +#endif +} + +static int +pm_is_kept(char *path) +{ + pm_pdr_t *dp; + int i; + + for (dp = pm_dep_head; dp; dp = dp->pdr_next) { + if (dp->pdr_kept_count == 0) + continue; + for (i = 0; i < dp->pdr_kept_count; i++) { + if (strcmp(dp->pdr_kept_paths[i], path) == 0) + return (1); + } + } + return (0); +} + +static void +e_pm_hold_rele_power(dev_info_t *dip, int cnt) +{ + PMD_FUNC(pmf, "hold_rele_power") + int circ; + + if ((dip == NULL) || + (PM_GET_PM_INFO(dip) == NULL) || PM_ISBC(dip)) + return; + PM_LOCK_POWER(dip, &circ); + ASSERT(cnt >= 0 && PM_KUC(dip) >= 0 || cnt < 0 && PM_KUC(dip) > 0); + PMD(PMD_KIDSUP, ("%s: kidsupcnt for %s@%s(%s#%d) %d->%d\n", pmf, + PM_DEVICE(dip), PM_KUC(dip), (PM_KUC(dip) + cnt))) + PM_KUC(dip) += cnt; + ASSERT(PM_KUC(dip) >= 0); + PM_UNLOCK_POWER(dip, circ); + if (cnt < 0 && PM_KUC(dip) == 0) + pm_rescan(dip); +} + +#define MAX_PPM_HANDLERS 4 + +kmutex_t ppm_lock; /* in case we ever do multi-threaded startup */ + +struct ppm_callbacks { + int (*ppmc_func)(dev_info_t *); + dev_info_t *ppmc_dip; +} ppm_callbacks[MAX_PPM_HANDLERS + 1]; + + +/* + * This routine calls into all the registered ppms to notify them + * that either all components of power-managed devices are at their + * lowest levels or no longer all are at their lowest levels. + */ +static void +pm_ppm_notify_all_lowest(dev_info_t *dip, int mode) +{ + struct ppm_callbacks *ppmcp; + power_req_t power_req; + int result = 0; + + power_req.request_type = PMR_PPM_ALL_LOWEST; + power_req.req.ppm_all_lowest_req.mode = mode; + mutex_enter(&ppm_lock); + for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++) + (void) pm_ctlops((dev_info_t *)ppmcp->ppmc_dip, dip, + DDI_CTLOPS_POWER, &power_req, &result); + mutex_exit(&ppm_lock); +} + +static void +pm_set_pm_info(dev_info_t *dip, void *value) +{ + DEVI(dip)->devi_pm_info = value; +} + +pm_rsvp_t *pm_blocked_list; + +/* + * Look up an entry in the blocked list by dip and component + */ +static pm_rsvp_t * +pm_rsvp_lookup(dev_info_t *dip, int comp) +{ + pm_rsvp_t *p; + ASSERT(MUTEX_HELD(&pm_rsvp_lock)); + for (p = pm_blocked_list; p; p = p->pr_next) + if (p->pr_dip == dip && p->pr_comp == comp) { + return (p); + } + return (NULL); +} + +/* + * Called when a device which is direct power managed (or the parent or + * dependent of such a device) changes power, or when a pm clone is closed + * that was direct power managing a device. This call results in pm_blocked() + * (below) returning. + */ +void +pm_proceed(dev_info_t *dip, int cmd, int comp, int newlevel) +{ + PMD_FUNC(pmf, "proceed") + pm_rsvp_t *found = NULL; + pm_rsvp_t *p; + + mutex_enter(&pm_rsvp_lock); + switch (cmd) { + /* + * we're giving up control, let any pending op continue + */ + case PMP_RELEASE: + for (p = pm_blocked_list; p; p = p->pr_next) { + if (dip == p->pr_dip) { + p->pr_retval = PMP_RELEASE; + PMD(PMD_DPM, ("%s: RELEASE %s@%s(%s#%d)\n", + pmf, PM_DEVICE(dip))) + cv_signal(&p->pr_cv); + } + } + break; + + /* + * process has done PM_SET_CURRENT_POWER; let a matching request + * succeed and a non-matching request for the same device fail + */ + case PMP_SETPOWER: + found = pm_rsvp_lookup(dip, comp); + if (!found) /* if driver not waiting */ + break; + /* + * This cannot be pm_lower_power, since that can only happen + * during detach or probe + */ + if (found->pr_newlevel <= newlevel) { + found->pr_retval = PMP_SUCCEED; + PMD(PMD_DPM, ("%s: SUCCEED %s@%s(%s#%d)\n", pmf, + PM_DEVICE(dip))) + } else { + found->pr_retval = PMP_FAIL; + PMD(PMD_DPM, ("%s: FAIL %s@%s(%s#%d)\n", pmf, + PM_DEVICE(dip))) + } + cv_signal(&found->pr_cv); + break; + + default: + panic("pm_proceed unknown cmd %d", cmd); + } + mutex_exit(&pm_rsvp_lock); +} + +/* + * This routine dispatches new work to the dependency thread. Caller must + * be prepared to block for memory if necessary. + */ +void +pm_dispatch_to_dep_thread(int cmd, char *keeper, char *kept, int wait, + int *res, int cached_pwr) +{ + pm_dep_wk_t *new_work; + + new_work = kmem_zalloc(sizeof (pm_dep_wk_t), KM_SLEEP); + new_work->pdw_type = cmd; + new_work->pdw_wait = wait; + new_work->pdw_done = 0; + new_work->pdw_ret = 0; + new_work->pdw_pwr = cached_pwr; + cv_init(&new_work->pdw_cv, NULL, CV_DEFAULT, NULL); + if (keeper != NULL) { + new_work->pdw_keeper = kmem_zalloc(strlen(keeper) + 1, + KM_SLEEP); + (void) strcpy(new_work->pdw_keeper, keeper); + } + if (kept != NULL) { + new_work->pdw_kept = kmem_zalloc(strlen(kept) + 1, KM_SLEEP); + (void) strcpy(new_work->pdw_kept, kept); + } + mutex_enter(&pm_dep_thread_lock); + if (pm_dep_thread_workq == NULL) { + pm_dep_thread_workq = new_work; + pm_dep_thread_tail = new_work; + new_work->pdw_next = NULL; + } else { + pm_dep_thread_tail->pdw_next = new_work; + pm_dep_thread_tail = new_work; + new_work->pdw_next = NULL; + } + cv_signal(&pm_dep_thread_cv); + /* If caller asked for it, wait till it is done. */ + if (wait) { + while (!new_work->pdw_done) + cv_wait(&new_work->pdw_cv, &pm_dep_thread_lock); + /* + * Pass return status, if any, back. + */ + if (res != NULL) + *res = new_work->pdw_ret; + /* + * If we asked to wait, it is our job to free the request + * structure. + */ + if (new_work->pdw_keeper) + kmem_free(new_work->pdw_keeper, + strlen(new_work->pdw_keeper) + 1); + if (new_work->pdw_kept) + kmem_free(new_work->pdw_kept, + strlen(new_work->pdw_kept) + 1); + kmem_free(new_work, sizeof (pm_dep_wk_t)); + } + mutex_exit(&pm_dep_thread_lock); +} + +/* + * Release the pm resource for this device. + */ +void +pm_rem_info(dev_info_t *dip) +{ + PMD_FUNC(pmf, "rem_info") + int i, count = 0; + pm_info_t *info = PM_GET_PM_INFO(dip); + dev_info_t *pdip = ddi_get_parent(dip); + char *pathbuf; + int work_type = PM_DEP_WK_DETACH; + + ASSERT(info); + + ASSERT(!PM_IAM_LOCKING_DIP(dip)); + if (PM_ISDIRECT(dip)) { + info->pmi_dev_pm_state &= ~PM_DIRECT; + ASSERT(info->pmi_clone); + info->pmi_clone = 0; + pm_proceed(dip, PMP_RELEASE, -1, -1); + } + ASSERT(!PM_GET_PM_SCAN(dip)); + + /* + * Now adjust parent's kidsupcnt. BC nodes we check only comp 0, + * Others we check all components. BC node that has already + * called pm_destroy_components() has zero component count. + * Parents that get notification are not adjusted because their + * kidsupcnt is always 0 (or 1 during configuration). + */ + PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d) has %d components\n", pmf, + PM_DEVICE(dip), PM_NUMCMPTS(dip))) + + /* node is detached, so we can examine power without locking */ + if (PM_ISBC(dip)) { + count = (PM_CURPOWER(dip, 0) != 0); + } else { + for (i = 0; i < PM_NUMCMPTS(dip); i++) + count += (PM_CURPOWER(dip, i) != 0); + } + + if (PM_NUMCMPTS(dip) && pdip && !PM_WANTS_NOTIFICATION(pdip)) + e_pm_hold_rele_power(pdip, -count); + + /* Schedule a request to clean up dependency records */ + pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, pathbuf); + pm_dispatch_to_dep_thread(work_type, pathbuf, pathbuf, + PM_DEP_NOWAIT, NULL, (count > 0)); + kmem_free(pathbuf, MAXPATHLEN); + + /* + * Adjust the pm_comps_notlowest count since this device is + * not being power-managed anymore. + */ + for (i = 0; i < PM_NUMCMPTS(dip); i++) { + if (PM_CURPOWER(dip, i) != 0) + PM_DECR_NOTLOWEST(dip); + } + /* + * Once we clear the info pointer, it looks like it is not power + * managed to everybody else. + */ + pm_set_pm_info(dip, NULL); + kmem_free(info, sizeof (pm_info_t)); +} + +int +pm_get_norm_pwrs(dev_info_t *dip, int **valuep, size_t *length) +{ + int components = PM_NUMCMPTS(dip); + int *bufp; + size_t size; + int i; + + if (components <= 0) { + cmn_err(CE_NOTE, "!pm: %s@%s(%s#%d) has no components, " + "can't get normal power values\n", PM_DEVICE(dip)); + return (DDI_FAILURE); + } else { + size = components * sizeof (int); + bufp = kmem_alloc(size, KM_SLEEP); + for (i = 0; i < components; i++) { + bufp[i] = pm_get_normal_power(dip, i); + } + } + *length = size; + *valuep = bufp; + return (DDI_SUCCESS); +} + +static int +pm_reset_timestamps(dev_info_t *dip, void *arg) +{ + _NOTE(ARGUNUSED(arg)) + + int components; + int i; + + if (!PM_GET_PM_INFO(dip)) + return (DDI_WALK_CONTINUE); + components = PM_NUMCMPTS(dip); + ASSERT(components > 0); + PM_LOCK_BUSY(dip); + for (i = 0; i < components; i++) { + struct pm_component *cp; + /* + * If the component was not marked as busy, + * reset its timestamp to now. + */ + cp = PM_CP(dip, i); + if (cp->pmc_timestamp) + cp->pmc_timestamp = gethrestime_sec(); + } + PM_UNLOCK_BUSY(dip); + return (DDI_WALK_CONTINUE); +} + +/* + * Convert a power level to an index into the levels array (or + * just PM_LEVEL_UNKNOWN in that special case). + */ +static int +pm_level_to_index(dev_info_t *dip, pm_component_t *cp, int level) +{ + PMD_FUNC(pmf, "level_to_index") + int i; + int limit = cp->pmc_comp.pmc_numlevels; + int *ip = cp->pmc_comp.pmc_lvals; + + if (level == PM_LEVEL_UNKNOWN) + return (level); + + for (i = 0; i < limit; i++) { + if (level == *ip++) { + PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d)[%d] to %x\n", + pmf, PM_DEVICE(dip), + (int)(cp - DEVI(dip)->devi_pm_components), level)) + return (i); + } + } + panic("pm_level_to_index: level %d not found for device " + "%s@%s(%s#%d)", level, PM_DEVICE(dip)); + /*NOTREACHED*/ +} + +/* + * Internal function to set current power level + */ +static void +e_pm_set_cur_pwr(dev_info_t *dip, pm_component_t *cp, int level) +{ + PMD_FUNC(pmf, "set_cur_pwr") + int curpwr = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ? + cp->pmc_phc_pwr : cp->pmc_cur_pwr); + + /* + * Nothing to adjust if current & new levels are the same. + */ + if (curpwr != PM_LEVEL_UNKNOWN && + level == cp->pmc_comp.pmc_lvals[curpwr]) + return; + + /* + * Keep the count for comps doing transition to/from lowest + * level. + */ + if (curpwr == 0) { + PM_INCR_NOTLOWEST(dip); + } else if (level == cp->pmc_comp.pmc_lvals[0]) { + PM_DECR_NOTLOWEST(dip); + } + cp->pmc_phc_pwr = PM_LEVEL_UNKNOWN; + cp->pmc_cur_pwr = pm_level_to_index(dip, cp, level); +} + +/* + * This is the default method of setting the power of a device if no ppm + * driver has claimed it. + */ +int +pm_power(dev_info_t *dip, int comp, int level) +{ + PMD_FUNC(pmf, "power") + struct dev_ops *ops; + int (*fn)(dev_info_t *, int, int); + struct pm_component *cp = PM_CP(dip, comp); + int retval; + pm_info_t *info = PM_GET_PM_INFO(dip); + static int pm_phc_impl(dev_info_t *, int, int, int); + + PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf, + PM_DEVICE(dip), comp, level)) + if (!(ops = ddi_get_driver(dip))) { + PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) has no ops\n", pmf, + PM_DEVICE(dip))) + return (DDI_FAILURE); + } + if ((ops->devo_rev < 2) || !(fn = ops->devo_power)) { + PMD(PMD_FAIL, ("%s: %s%s\n", pmf, + (ops->devo_rev < 2 ? " wrong devo_rev" : ""), + (!fn ? " devo_power NULL" : ""))) + return (DDI_FAILURE); + } + cp->pmc_flags |= PM_POWER_OP; + retval = (*fn)(dip, comp, level); + cp->pmc_flags &= ~PM_POWER_OP; + if (retval == DDI_SUCCESS) { + e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level); + return (DDI_SUCCESS); + } + + /* + * If pm_power_has_changed() detected a deadlock with pm_power() it + * updated only the power level of the component. If our attempt to + * set the device new to a power level above has failed we sync the + * total power state via phc code now. + */ + if (cp->pmc_flags & PM_PHC_WHILE_SET_POWER) { + int phc_lvl = + cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]; + + ASSERT(info); + (void) pm_phc_impl(dip, comp, phc_lvl, 0); + PMD(PMD_PHC, ("%s: phc %s@%s(%s#%d) comp=%d level=%d\n", + pmf, PM_DEVICE(dip), comp, phc_lvl)) + } + + PMD(PMD_FAIL, ("%s: can't set comp=%d (%s) of %s@%s(%s#%d) to " + "level=%d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name, PM_DEVICE(dip), + level, power_val_to_string(cp, level))); + return (DDI_FAILURE); +} + +int +pm_unmanage(dev_info_t *dip) +{ + PMD_FUNC(pmf, "unmanage") + power_req_t power_req; + int result, retval = 0; + + ASSERT(!PM_IAM_LOCKING_DIP(dip)); + PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf, + PM_DEVICE(dip))) + power_req.request_type = PMR_PPM_UNMANAGE; + power_req.req.ppm_config_req.who = dip; + if (pm_ppm_claimed(dip)) + retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, + &power_req, &result); +#ifdef DEBUG + else + retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, + &power_req, &result); +#endif + ASSERT(retval == DDI_SUCCESS); + pm_rem_info(dip); + return (retval); +} + +int +pm_raise_power(dev_info_t *dip, int comp, int level) +{ + if (level < 0) + return (DDI_FAILURE); + if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) || + !e_pm_valid_power(dip, comp, level)) + return (DDI_FAILURE); + + return (dev_is_needed(dip, comp, level, PM_LEVEL_UPONLY)); +} + +int +pm_lower_power(dev_info_t *dip, int comp, int level) +{ + PMD_FUNC(pmf, "pm_lower_power") + + if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) || + !e_pm_valid_power(dip, comp, level)) { + PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) " + "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level)) + return (DDI_FAILURE); + } + + if (!DEVI_IS_DETACHING(dip)) { + PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) not detaching\n", + pmf, PM_DEVICE(dip))) + return (DDI_FAILURE); + } + + /* + * If we don't care about saving power, or we're treating this node + * specially, then this is a no-op + */ + if (!autopm_enabled || pm_noinvol(dip)) { + PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) %s%s\n", pmf, PM_DEVICE(dip), + !autopm_enabled ? "!autopm_enabled " : "", + pm_noinvol(dip) ? "pm_noinvol()" : "")) + return (DDI_SUCCESS); + } + + if (dev_is_needed(dip, comp, level, PM_LEVEL_DOWNONLY) != DDI_SUCCESS) { + PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) dev_is_needed failed\n", pmf, + PM_DEVICE(dip))) + return (DDI_FAILURE); + } + return (DDI_SUCCESS); +} + +/* + * Find the entries struct for a given dip in the blocked list, return it locked + */ +static psce_t * +pm_psc_dip_to_direct(dev_info_t *dip, pscc_t **psccp) +{ + pscc_t *p; + psce_t *psce; + + rw_enter(&pm_pscc_direct_rwlock, RW_READER); + for (p = pm_pscc_direct; p; p = p->pscc_next) { + if (p->pscc_dip == dip) { + *psccp = p; + psce = p->pscc_entries; + mutex_enter(&psce->psce_lock); + ASSERT(psce); + rw_exit(&pm_pscc_direct_rwlock); + return (psce); + } + } + rw_exit(&pm_pscc_direct_rwlock); + panic("sunpm: no entry for dip %p in direct list", (void *)dip); + /*NOTREACHED*/ +} + +/* + * Write an entry indicating a power level change (to be passed to a process + * later) in the given psce. + * If we were called in the path that brings up the console fb in the + * case of entering the prom, we don't want to sleep. If the alloc fails, then + * we create a record that has a size of -1, a physaddr of NULL, and that + * has the overflow flag set. + */ +static int +psc_entry(ushort_t event, psce_t *psce, dev_info_t *dip, int comp, int new, + int old, int which, pm_canblock_t canblock) +{ + char buf[MAXNAMELEN]; + pm_state_change_t *p; + size_t size; + caddr_t physpath = NULL; + int overrun = 0; + + ASSERT(MUTEX_HELD(&psce->psce_lock)); + (void) ddi_pathname(dip, buf); + size = strlen(buf) + 1; + p = psce->psce_in; + if (canblock == PM_CANBLOCK_BYPASS) { + physpath = kmem_alloc(size, KM_NOSLEEP); + if (physpath == NULL) { + /* + * mark current entry as overrun + */ + p->flags |= PSC_EVENT_LOST; + size = (size_t)-1; + } + } else + physpath = kmem_alloc(size, KM_SLEEP); + if (p->size) { /* overflow; mark the next entry */ + if (p->size != (size_t)-1) + kmem_free(p->physpath, p->size); + ASSERT(psce->psce_out == p); + if (p == psce->psce_last) { + psce->psce_first->flags |= PSC_EVENT_LOST; + psce->psce_out = psce->psce_first; + } else { + (p + 1)->flags |= PSC_EVENT_LOST; + psce->psce_out = (p + 1); + } + overrun++; + } else if (physpath == NULL) { /* alloc failed, mark this entry */ + p->flags |= PSC_EVENT_LOST; + p->size = 0; + p->physpath = NULL; + } + if (which == PSC_INTEREST) { + mutex_enter(&pm_compcnt_lock); + if (pm_comps_notlowest == 0) + p->flags |= PSC_ALL_LOWEST; + else + p->flags &= ~PSC_ALL_LOWEST; + mutex_exit(&pm_compcnt_lock); + } + p->event = event; + p->timestamp = gethrestime_sec(); + p->component = comp; + p->old_level = old; + p->new_level = new; + p->physpath = physpath; + p->size = size; + if (physpath != NULL) + (void) strcpy(p->physpath, buf); + if (p == psce->psce_last) + psce->psce_in = psce->psce_first; + else + psce->psce_in = ++p; + mutex_exit(&psce->psce_lock); + return (overrun); +} + +/* + * Find the next entry on the interest list. We keep a pointer to the item we + * last returned in the user's cooke. Returns a locked entries struct. + */ +static psce_t * +psc_interest(void **cookie, pscc_t **psccp) +{ + pscc_t *pscc; + pscc_t **cookiep = (pscc_t **)cookie; + + if (*cookiep == NULL) + pscc = pm_pscc_interest; + else + pscc = (*cookiep)->pscc_next; + if (pscc) { + *cookiep = pscc; + *psccp = pscc; + mutex_enter(&pscc->pscc_entries->psce_lock); + return (pscc->pscc_entries); + } else { + return (NULL); + } +} + +/* + * Create an entry for a process to pick up indicating a power level change. + */ +static void +pm_enqueue_notify(ushort_t cmd, dev_info_t *dip, int comp, + int newlevel, int oldlevel, pm_canblock_t canblock) +{ + PMD_FUNC(pmf, "enqueue_notify") + pscc_t *pscc; + psce_t *psce; + void *cookie = NULL; + int overrun; + + ASSERT(MUTEX_HELD(&pm_rsvp_lock)); + switch (cmd) { + case PSC_PENDING_CHANGE: /* only for controlling process */ + PMD(PMD_DPM, ("%s: PENDING %s@%s(%s#%d), comp %d, %d -> %d\n", + pmf, PM_DEVICE(dip), comp, oldlevel, newlevel)) + psce = pm_psc_dip_to_direct(dip, &pscc); + ASSERT(psce); + PMD(PMD_IOCTL, ("%s: PENDING: %s@%s(%s#%d) pm_poll_cnt[%d] " + "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone, + pm_poll_cnt[pscc->pscc_clone])) + overrun = psc_entry(cmd, psce, dip, comp, newlevel, oldlevel, + PSC_DIRECT, canblock); + PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone)) + mutex_enter(&pm_clone_lock); + if (!overrun) + pm_poll_cnt[pscc->pscc_clone]++; + cv_signal(&pm_clones_cv[pscc->pscc_clone]); + pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN)); + mutex_exit(&pm_clone_lock); + break; + case PSC_HAS_CHANGED: + PMD(PMD_DPM, ("%s: HAS %s@%s(%s#%d), comp %d, %d -> %d\n", + pmf, PM_DEVICE(dip), comp, oldlevel, newlevel)) + if (PM_ISDIRECT(dip) && canblock != PM_CANBLOCK_BYPASS) { + psce = pm_psc_dip_to_direct(dip, &pscc); + PMD(PMD_IOCTL, ("%s: HAS: %s@%s(%s#%d) pm_poll_cnt[%d] " + "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone, + pm_poll_cnt[pscc->pscc_clone])) + overrun = psc_entry(cmd, psce, dip, comp, newlevel, + oldlevel, PSC_DIRECT, canblock); + PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone)) + mutex_enter(&pm_clone_lock); + if (!overrun) + pm_poll_cnt[pscc->pscc_clone]++; + cv_signal(&pm_clones_cv[pscc->pscc_clone]); + pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN)); + mutex_exit(&pm_clone_lock); + } + mutex_enter(&pm_clone_lock); + rw_enter(&pm_pscc_interest_rwlock, RW_READER); + while ((psce = psc_interest(&cookie, &pscc)) != NULL) { + (void) psc_entry(cmd, psce, dip, comp, newlevel, + oldlevel, PSC_INTEREST, canblock); + cv_signal(&pm_clones_cv[pscc->pscc_clone]); + } + rw_exit(&pm_pscc_interest_rwlock); + mutex_exit(&pm_clone_lock); + break; +#ifdef DEBUG + default: + ASSERT(0); +#endif + } +} + +static void +pm_enqueue_notify_others(pm_ppm_devlist_t **listp, pm_canblock_t canblock) +{ + if (listp) { + pm_ppm_devlist_t *p, *next = NULL; + + for (p = *listp; p; p = next) { + next = p->ppd_next; + pm_enqueue_notify(PSC_HAS_CHANGED, p->ppd_who, + p->ppd_cmpt, p->ppd_new_level, p->ppd_old_level, + canblock); + kmem_free(p, sizeof (pm_ppm_devlist_t)); + } + *listp = NULL; + } +} + +/* + * Try to get the power locks of the parent node and target (child) + * node. Return true if successful (with both locks held) or false + * (with no locks held). + */ +static int +pm_try_parent_child_locks(dev_info_t *pdip, + dev_info_t *dip, int *pcircp, int *circp) +{ + if (ndi_devi_tryenter(pdip, pcircp)) + if (PM_TRY_LOCK_POWER(dip, circp)) { + return (1); + } else { + ndi_devi_exit(pdip, *pcircp); + } + return (0); +} + +/* + * Determine if the power lock owner is blocked by current thread. + * returns : + * 1 - If the thread owning the effective power lock (the first lock on + * which a thread blocks when it does PM_LOCK_POWER) is blocked by + * a mutex held by the current thread. + * + * 0 - otherwise + * + * Note : This function is called by pm_power_has_changed to determine whether + * it is executing in parallel with pm_set_power. + */ +static int +pm_blocked_by_us(dev_info_t *dip) +{ + power_req_t power_req; + kthread_t *owner; + int result; + kmutex_t *mp; + dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm; + + power_req.request_type = PMR_PPM_POWER_LOCK_OWNER; + power_req.req.ppm_power_lock_owner_req.who = dip; + if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req, &result) != + DDI_SUCCESS) { + /* + * It is assumed that if the device is claimed by ppm, ppm + * will always implement this request type and it'll always + * return success. We panic here, if it fails. + */ + panic("pm: Can't determine power lock owner of %s@%s(%s#%d)\n", + PM_DEVICE(dip)); + /*NOTREACHED*/ + } + + if ((owner = power_req.req.ppm_power_lock_owner_req.owner) != NULL && + owner->t_state == TS_SLEEP && + owner->t_sobj_ops && + SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_MUTEX && + (mp = (kmutex_t *)owner->t_wchan) && + mutex_owner(mp) == curthread) + return (1); + + return (0); +} + +/* + * Notify parent which wants to hear about a child's power changes. + */ +static void +pm_notify_parent(dev_info_t *dip, + dev_info_t *pdip, int comp, int old_level, int level) +{ + pm_bp_has_changed_t bphc; + pm_sp_misc_t pspm; + char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + int result = DDI_SUCCESS; + + bphc.bphc_dip = dip; + bphc.bphc_path = ddi_pathname(dip, pathbuf); + bphc.bphc_comp = comp; + bphc.bphc_olevel = old_level; + bphc.bphc_nlevel = level; + pspm.pspm_canblock = PM_CANBLOCK_BLOCK; + pspm.pspm_scan = 0; + bphc.bphc_private = &pspm; + (void) (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL, + BUS_POWER_HAS_CHANGED, (void *)&bphc, (void *)&result); + kmem_free(pathbuf, MAXPATHLEN); +} + +/* + * Check if we need to resume a BC device, and make the attach call as required. + */ +static int +pm_check_and_resume(dev_info_t *dip, int comp, int old_level, int level) +{ + int ret = DDI_SUCCESS; + + if (PM_ISBC(dip) && comp == 0 && old_level == 0 && level != 0) { + ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED); + /* ppm is not interested in DDI_PM_RESUME */ + if ((ret = devi_attach(dip, DDI_PM_RESUME)) != DDI_SUCCESS) + /* XXX Should we mark it resumed, */ + /* even though it failed? */ + cmn_err(CE_WARN, "!pm: Can't resume %s@%s", + PM_NAME(dip), PM_ADDR(dip)); + DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED; + } + + return (ret); +} + +/* + * Tests outside the lock to see if we should bother to enqueue an entry + * for any watching process. If yes, then caller will take the lock and + * do the full protocol + */ +static int +pm_watchers() +{ + if (pm_processes_stopped) + return (0); + return (pm_pscc_direct || pm_pscc_interest); +} + +/* + * A driver is reporting that the power of one of its device's components + * has changed. Update the power state accordingly. + */ +int +pm_power_has_changed(dev_info_t *dip, int comp, int level) +{ + PMD_FUNC(pmf, "pm_power_has_changed") + int ret; + dev_info_t *pdip = ddi_get_parent(dip); + struct pm_component *cp; + int blocked, circ, pcirc, old_level; + static int pm_phc_impl(dev_info_t *, int, int, int); + + if (level < 0) { + PMD(PMD_FAIL, ("%s: %s@%s(%s#%d): bad level=%d\n", pmf, + PM_DEVICE(dip), level)) + return (DDI_FAILURE); + } + + PMD(PMD_KIDSUP | PMD_DEP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf, + PM_DEVICE(dip), comp, level)) + + if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, &cp) || + !e_pm_valid_power(dip, comp, level)) + return (DDI_FAILURE); + + /* + * A driver thread calling pm_power_has_changed and another thread + * calling pm_set_power can deadlock. The problem is not resolvable + * by changing lock order, so we use pm_blocked_by_us() to detect + * this specific deadlock. If we can't get the lock immediately + * and we are deadlocked, just update the component's level, do + * notifications, and return. We intend to update the total power + * state later (if the other thread fails to set power to the + * desired level). If we were called because of a power change on a + * component that isn't involved in a set_power op, update all state + * immediately. + */ + cp = PM_CP(dip, comp); + while (!pm_try_parent_child_locks(pdip, dip, &pcirc, &circ)) { + if (((blocked = pm_blocked_by_us(dip)) != 0) && + (cp->pmc_flags & PM_POWER_OP)) { + if (pm_watchers()) { + mutex_enter(&pm_rsvp_lock); + pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, + level, cur_power(cp), PM_CANBLOCK_BLOCK); + mutex_exit(&pm_rsvp_lock); + } + if (pdip && PM_WANTS_NOTIFICATION(pdip)) + pm_notify_parent(dip, + pdip, comp, cur_power(cp), level); + (void) pm_check_and_resume(dip, + comp, cur_power(cp), level); + + /* + * Stash the old power index, update curpwr, and flag + * that the total power state needs to be synched. + */ + cp->pmc_flags |= PM_PHC_WHILE_SET_POWER; + /* + * Several pm_power_has_changed calls could arrive + * while the set power path remains blocked. Keep the + * oldest old power and the newest new power of any + * sequence of phc calls which arrive during deadlock. + */ + if (cp->pmc_phc_pwr == PM_LEVEL_UNKNOWN) + cp->pmc_phc_pwr = cp->pmc_cur_pwr; + cp->pmc_cur_pwr = + pm_level_to_index(dip, cp, level); + PMD(PMD_PHC, ("%s: deadlock for %s@%s(%s#%d), comp=%d, " + "level=%d\n", pmf, PM_DEVICE(dip), comp, level)) + return (DDI_SUCCESS); + } else + if (blocked) { /* blocked, but different cmpt? */ + if (!ndi_devi_tryenter(pdip, &pcirc)) { + cmn_err(CE_NOTE, + "!pm: parent kuc not updated due " + "to possible deadlock.\n"); + return (pm_phc_impl(dip, + comp, level, 1)); + } + old_level = cur_power(cp); + if (pdip && !PM_WANTS_NOTIFICATION(pdip) && + (!PM_ISBC(dip) || comp == 0) && + POWERING_ON(old_level, level)) + pm_hold_power(pdip); + ret = pm_phc_impl(dip, comp, level, 1); + if (pdip && !PM_WANTS_NOTIFICATION(pdip)) { + if ((!PM_ISBC(dip) || + comp == 0) && level == 0 && + old_level != PM_LEVEL_UNKNOWN) + pm_rele_power(pdip); + } + ndi_devi_exit(pdip, pcirc); + /* child lock not held: deadlock */ + return (ret); + } + delay(1); + PMD(PMD_PHC, ("%s: try lock again\n", pmf)) + } + + /* non-deadlock case */ + old_level = cur_power(cp); + if (pdip && !PM_WANTS_NOTIFICATION(pdip) && + (!PM_ISBC(dip) || comp == 0) && POWERING_ON(old_level, level)) + pm_hold_power(pdip); + ret = pm_phc_impl(dip, comp, level, 1); + if (pdip && !PM_WANTS_NOTIFICATION(pdip)) { + if ((!PM_ISBC(dip) || comp == 0) && level == 0 && + old_level != PM_LEVEL_UNKNOWN) + pm_rele_power(pdip); + } + PM_UNLOCK_POWER(dip, circ); + ndi_devi_exit(pdip, pcirc); + return (ret); +} + +/* + * Account for power changes to a component of the the console frame buffer. + * If lowering power from full (or "unkown", which is treatd as full) + * we will increment the "components off" count of the fb device. + * Subsequent lowering of the same component doesn't affect the count. If + * raising a component back to full power, we will decrement the count. + * + * Return: the increment value for pm_cfb_comps_off (-1, 0, or 1) + */ +static int +calc_cfb_comps_incr(dev_info_t *dip, int cmpt, int old, int new) +{ + struct pm_component *cp = PM_CP(dip, cmpt); + int on = (old == PM_LEVEL_UNKNOWN || old == cp->pmc_norm_pwr); + int want_normal = (new == cp->pmc_norm_pwr); + int incr = 0; + + if (on && !want_normal) + incr = 1; + else if (!on && want_normal) + incr = -1; + return (incr); +} + +/* + * Adjust the count of console frame buffer components < full power. + */ +static void +update_comps_off(int incr, dev_info_t *dip) +{ + mutex_enter(&pm_cfb_lock); + pm_cfb_comps_off += incr; + ASSERT(pm_cfb_comps_off <= PM_NUMCMPTS(dip)); + mutex_exit(&pm_cfb_lock); +} + +/* + * Update the power state in the framework (via the ppm). The 'notify' + * argument tells whether to notify watchers. Power lock is already held. + */ +static int +pm_phc_impl(dev_info_t *dip, int comp, int level, int notify) +{ + PMD_FUNC(pmf, "phc_impl") + power_req_t power_req; + int i, dodeps = 0; + dev_info_t *pdip = ddi_get_parent(dip); + int result; + int old_level; + struct pm_component *cp; + int incr = 0; + dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm; + int work_type = 0; + char *pathbuf; + + /* Must use "official" power level for this test. */ + cp = PM_CP(dip, comp); + old_level = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ? + cp->pmc_phc_pwr : cp->pmc_cur_pwr); + if (old_level != PM_LEVEL_UNKNOWN) + old_level = cp->pmc_comp.pmc_lvals[old_level]; + + if (level == old_level) { + PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d is already at " + "level=%d\n", pmf, PM_DEVICE(dip), comp, level)) + return (DDI_SUCCESS); + } + + /* + * Tell ppm about this. + */ + power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY; + power_req.req.ppm_notify_level_req.who = dip; + power_req.req.ppm_notify_level_req.cmpt = comp; + power_req.req.ppm_notify_level_req.new_level = level; + power_req.req.ppm_notify_level_req.old_level = old_level; + if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req, + &result) == DDI_FAILURE) { + PMD(PMD_FAIL, ("%s: pm_ctlops %s@%s(%s#%d) to %d failed\n", + pmf, PM_DEVICE(dip), level)) + return (DDI_FAILURE); + } + + if (PM_IS_CFB(dip)) { + incr = calc_cfb_comps_incr(dip, comp, old_level, level); + + if (incr) { + update_comps_off(incr, dip); + PMD(PMD_CFB, ("%s: %s@%s(%s#%d) comp=%d %d->%d " + "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip), + comp, old_level, level, pm_cfb_comps_off)) + } + } + e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level); + result = DDI_SUCCESS; + + if (notify) { + if (pdip && PM_WANTS_NOTIFICATION(pdip)) + pm_notify_parent(dip, pdip, comp, old_level, level); + (void) pm_check_and_resume(dip, comp, old_level, level); + } + + /* + * Decrement the dependency kidsup count if we turn a device + * off. + */ + if (POWERING_OFF(old_level, level)) { + dodeps = 1; + for (i = 0; i < PM_NUMCMPTS(dip); i++) { + cp = PM_CP(dip, i); + if (cur_power(cp)) { + dodeps = 0; + break; + } + } + if (dodeps) + work_type = PM_DEP_WK_POWER_OFF; + } + + /* + * Increment if we turn it on. Check to see + * if other comps are already on, if so, + * dont increment. + */ + if (POWERING_ON(old_level, level)) { + dodeps = 1; + for (i = 0; i < PM_NUMCMPTS(dip); i++) { + cp = PM_CP(dip, i); + if (comp == i) + continue; + /* -1 also treated as 0 in this case */ + if (cur_power(cp) > 0) { + dodeps = 0; + break; + } + } + if (dodeps) + work_type = PM_DEP_WK_POWER_ON; + } + + if (dodeps) { + pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, pathbuf); + pm_dispatch_to_dep_thread(work_type, pathbuf, NULL, + PM_DEP_NOWAIT, NULL, 0); + kmem_free(pathbuf, MAXPATHLEN); + } + + if (notify && (level != old_level) && pm_watchers()) { + mutex_enter(&pm_rsvp_lock); + pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, level, old_level, + PM_CANBLOCK_BLOCK); + mutex_exit(&pm_rsvp_lock); + } + + PMD(PMD_RESCAN, ("%s: %s@%s(%s#%d): pm_rescan\n", pmf, PM_DEVICE(dip))) + pm_rescan(dip); + return (DDI_SUCCESS); +} + +/* + * This function is called at startup time to notify pm of the existence + * of any platform power managers for this platform. As a result of + * this registration, each function provided will be called each time + * a device node is attached, until one returns true, and it must claim the + * device node (by returning non-zero) if it wants to be involved in the + * node's power management. If it does claim the node, then it will + * subsequently be notified of attach and detach events. + * + */ + +int +pm_register_ppm(int (*func)(dev_info_t *), dev_info_t *dip) +{ + PMD_FUNC(pmf, "register_ppm") + struct ppm_callbacks *ppmcp; + pm_component_t *cp; + int i, pwr, result, circ; + power_req_t power_req; + struct ppm_notify_level_req *p = &power_req.req.ppm_notify_level_req; + void pm_ppm_claim(dev_info_t *); + + mutex_enter(&ppm_lock); + ppmcp = ppm_callbacks; + for (i = 0; i < MAX_PPM_HANDLERS; i++, ppmcp++) { + if (ppmcp->ppmc_func == NULL) { + ppmcp->ppmc_func = func; + ppmcp->ppmc_dip = dip; + break; + } + } + mutex_exit(&ppm_lock); + + if (i >= MAX_PPM_HANDLERS) + return (DDI_FAILURE); + while ((dip = ddi_get_parent(dip)) != NULL) { + if (PM_GET_PM_INFO(dip) == NULL) + continue; + pm_ppm_claim(dip); + if (pm_ppm_claimed(dip)) { + /* + * Tell ppm about this. + */ + power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY; + p->old_level = PM_LEVEL_UNKNOWN; + p->who = dip; + PM_LOCK_POWER(dip, &circ); + for (i = 0; i < PM_NUMCMPTS(dip); i++) { + cp = PM_CP(dip, i); + pwr = cp->pmc_cur_pwr; + if (pwr != PM_LEVEL_UNKNOWN) { + p->cmpt = i; + p->new_level = cur_power(cp); + p->old_level = PM_LEVEL_UNKNOWN; + if (pm_ctlops(PPM(dip), dip, + DDI_CTLOPS_POWER, &power_req, + &result) == DDI_FAILURE) { + PMD(PMD_FAIL, ("%s: pc " + "%s@%s(%s#%d) to %d " + "fails\n", pmf, + PM_DEVICE(dip), pwr)) + } + } + } + PM_UNLOCK_POWER(dip, circ); + } + } + return (DDI_SUCCESS); +} + +/* + * Call the ppm's that have registered and adjust the devinfo struct as + * appropriate. First one to claim it gets it. The sets of devices claimed + * by each ppm are assumed to be disjoint. + */ +void +pm_ppm_claim(dev_info_t *dip) +{ + struct ppm_callbacks *ppmcp; + + if (PPM(dip)) { + return; + } + mutex_enter(&ppm_lock); + for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++) { + if ((*ppmcp->ppmc_func)(dip)) { + DEVI(dip)->devi_pm_ppm = + (struct dev_info *)ppmcp->ppmc_dip; + mutex_exit(&ppm_lock); + return; + } + } + mutex_exit(&ppm_lock); +} + +/* + * Node is being detached so stop autopm until we see if it succeeds, in which + * case pm_stop will be called. For backwards compatible devices we bring the + * device up to full power on the assumption the detach will succeed. + */ +void +pm_detaching(dev_info_t *dip) +{ + PMD_FUNC(pmf, "detaching") + pm_info_t *info = PM_GET_PM_INFO(dip); + int iscons; + + PMD(PMD_REMDEV, ("%s: %s@%s(%s#%d), %d comps\n", pmf, PM_DEVICE(dip), + PM_NUMCMPTS(dip))) + if (info == NULL) + return; + ASSERT(DEVI_IS_DETACHING(dip)); + PM_LOCK_DIP(dip); + info->pmi_dev_pm_state |= PM_DETACHING; + PM_UNLOCK_DIP(dip); + if (!PM_ISBC(dip)) + pm_scan_stop(dip); + + /* + * console and old-style devices get brought up when detaching. + */ + iscons = PM_IS_CFB(dip); + if (iscons || PM_ISBC(dip)) { + (void) pm_all_to_normal(dip, PM_CANBLOCK_BYPASS); + if (iscons) { + mutex_enter(&pm_cfb_lock); + while (cfb_inuse) { + mutex_exit(&pm_cfb_lock); + PMD(PMD_CFB, ("%s: delay; cfb_inuse\n", pmf)) + delay(1); + mutex_enter(&pm_cfb_lock); + } + ASSERT(cfb_dip_detaching == NULL); + ASSERT(cfb_dip); + cfb_dip_detaching = cfb_dip; /* case detach fails */ + cfb_dip = NULL; + mutex_exit(&pm_cfb_lock); + } + } +} + +/* + * Node failed to detach. If it used to be autopm'd, make it so again. + */ +void +pm_detach_failed(dev_info_t *dip) +{ + PMD_FUNC(pmf, "detach_failed") + pm_info_t *info = PM_GET_PM_INFO(dip); + int pm_all_at_normal(dev_info_t *); + + if (info == NULL) + return; + ASSERT(DEVI_IS_DETACHING(dip)); + if (info->pmi_dev_pm_state & PM_DETACHING) { + info->pmi_dev_pm_state &= ~PM_DETACHING; + if (info->pmi_dev_pm_state & PM_ALLNORM_DEFERRED) { + /* Make sure the operation is still needed */ + if (!pm_all_at_normal(dip)) { + if (pm_all_to_normal(dip, + PM_CANBLOCK_FAIL) != DDI_SUCCESS) { + PMD(PMD_ERROR, ("%s: could not bring " + "%s@%s(%s#%d) to normal\n", pmf, + PM_DEVICE(dip))) + } + } + info->pmi_dev_pm_state &= ~PM_ALLNORM_DEFERRED; + } + } + if (!PM_ISBC(dip)) { + mutex_enter(&pm_scan_lock); + if (autopm_enabled) + pm_scan_init(dip); + mutex_exit(&pm_scan_lock); + pm_rescan(dip); + } +} + +/* generic Backwards Compatible component */ +static char *bc_names[] = {"off", "on"}; + +static pm_comp_t bc_comp = {"unknown", 2, NULL, NULL, &bc_names[0]}; + +static void +e_pm_default_levels(dev_info_t *dip, pm_component_t *cp, int norm) +{ + pm_comp_t *pmc; + pmc = &cp->pmc_comp; + pmc->pmc_numlevels = 2; + pmc->pmc_lvals[0] = 0; + pmc->pmc_lvals[1] = norm; + e_pm_set_cur_pwr(dip, cp, norm); +} + +static void +e_pm_default_components(dev_info_t *dip, int cmpts) +{ + int i; + pm_component_t *p = DEVI(dip)->devi_pm_components; + + p = DEVI(dip)->devi_pm_components; + for (i = 0; i < cmpts; i++, p++) { + p->pmc_comp = bc_comp; /* struct assignment */ + p->pmc_comp.pmc_lvals = kmem_zalloc(2 * sizeof (int), + KM_SLEEP); + p->pmc_comp.pmc_thresh = kmem_alloc(2 * sizeof (int), + KM_SLEEP); + p->pmc_comp.pmc_numlevels = 2; + p->pmc_comp.pmc_thresh[0] = INT_MAX; + p->pmc_comp.pmc_thresh[1] = INT_MAX; + } +} + +/* + * Called from functions that require components to exist already to allow + * for their creation by parsing the pm-components property. + * Device will not be power managed as a result of this call + * No locking needed because we're single threaded by the ndi_devi_enter + * done while attaching, and the device isn't visible until after it has + * attached + */ +int +pm_premanage(dev_info_t *dip, int style) +{ + PMD_FUNC(pmf, "premanage") + pm_comp_t *pcp, *compp; + int cmpts, i, norm, error; + pm_component_t *p = DEVI(dip)->devi_pm_components; + pm_comp_t *pm_autoconfig(dev_info_t *, int *); + + ASSERT(!PM_IAM_LOCKING_DIP(dip)); + /* + * If this dip has already been processed, don't mess with it + */ + if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE) + return (DDI_SUCCESS); + if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_FAILED) { + return (DDI_FAILURE); + } + /* + * Look up pm-components property and create components accordingly + * If that fails, fall back to backwards compatibility + */ + if ((compp = pm_autoconfig(dip, &error)) == NULL) { + /* + * If error is set, the property existed but was not well formed + */ + if (error || (style == PM_STYLE_NEW)) { + DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_FAILED; + return (DDI_FAILURE); + } + /* + * If they don't have the pm-components property, then we + * want the old "no pm until PM_SET_DEVICE_THRESHOLDS ioctl" + * behavior driver must have called pm_create_components, and + * we need to flesh out dummy components + */ + if ((cmpts = PM_NUMCMPTS(dip)) == 0) { + /* + * Not really failure, but we don't want the + * caller to treat it as success + */ + return (DDI_FAILURE); + } + DEVI(dip)->devi_pm_flags |= PMC_BC; + e_pm_default_components(dip, cmpts); + for (i = 0; i < cmpts; i++) { + /* + * if normal power not set yet, we don't really know + * what *ANY* of the power values are. If normal + * power is set, then we assume for this backwards + * compatible case that the values are 0, normal power. + */ + norm = pm_get_normal_power(dip, i); + if (norm == (uint_t)-1) { + PMD(PMD_ERROR, ("%s: %s@%s(%s#%d)[%d]\n", pmf, + PM_DEVICE(dip), i)) + return (DDI_FAILURE); + } + /* + * Components of BC devices start at their normal power, + * so count them to be not at their lowest power. + */ + PM_INCR_NOTLOWEST(dip); + e_pm_default_levels(dip, PM_CP(dip, i), norm); + } + } else { + /* + * e_pm_create_components was called from pm_autoconfig(), it + * creates components with no descriptions (or known levels) + */ + cmpts = PM_NUMCMPTS(dip); + ASSERT(cmpts != 0); + pcp = compp; + p = DEVI(dip)->devi_pm_components; + for (i = 0; i < cmpts; i++, p++) { + p->pmc_comp = *pcp++; /* struct assignment */ + ASSERT(PM_CP(dip, i)->pmc_cur_pwr == 0); + e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN); + } + pm_set_device_threshold(dip, pm_system_idle_threshold, + PMC_DEF_THRESH); + kmem_free(compp, cmpts * sizeof (pm_comp_t)); + } + return (DDI_SUCCESS); +} + +/* + * Called from during or after the device's attach to let us know it is ready + * to play autopm. Look up the pm model and manage the device accordingly. + * Returns system call errno value. + * If DDI_ATTACH and DDI_DETACH were in same namespace, this would be + * a little cleaner + * + * Called with dip lock held, return with dip lock unheld. + */ + +int +e_pm_manage(dev_info_t *dip, int style) +{ + PMD_FUNC(pmf, "e_manage") + pm_info_t *info; + dev_info_t *pdip = ddi_get_parent(dip); + int pm_thresh_specd(dev_info_t *); + int count; + char *pathbuf; + + if (pm_premanage(dip, style) != DDI_SUCCESS) { + return (DDI_FAILURE); + } + PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip))) + ASSERT(PM_GET_PM_INFO(dip) == NULL); + info = kmem_zalloc(sizeof (pm_info_t), KM_SLEEP); + + /* + * Now set up parent's kidsupcnt. BC nodes are assumed to start + * out at their normal power, so they are "up", others start out + * unknown, which is effectively "up". Parent which want notification + * get kidsupcnt of 0 always. + */ + count = (PM_ISBC(dip)) ? 1 : PM_NUMCMPTS(dip); + if (count && pdip && !PM_WANTS_NOTIFICATION(pdip)) + e_pm_hold_rele_power(pdip, count); + + pm_set_pm_info(dip, info); + /* + * Apply any recorded thresholds + */ + (void) pm_thresh_specd(dip); + + /* + * Do dependency processing. + */ + pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, pathbuf); + pm_dispatch_to_dep_thread(PM_DEP_WK_ATTACH, pathbuf, pathbuf, + PM_DEP_NOWAIT, NULL, 0); + kmem_free(pathbuf, MAXPATHLEN); + + if (!PM_ISBC(dip)) { + mutex_enter(&pm_scan_lock); + if (autopm_enabled) { + pm_scan_init(dip); + mutex_exit(&pm_scan_lock); + pm_rescan(dip); + } else { + mutex_exit(&pm_scan_lock); + } + } + return (0); +} + +/* + * This is the obsolete exported interface for a driver to find out its + * "normal" (max) power. + * We only get components destroyed while no power management is + * going on (and the device is detached), so we don't need a mutex here + */ +int +pm_get_normal_power(dev_info_t *dip, int comp) +{ + + if (comp >= 0 && comp < PM_NUMCMPTS(dip)) { + return (PM_CP(dip, comp)->pmc_norm_pwr); + } + return (DDI_FAILURE); +} + +/* + * Fetches the current power level. Return DDI_SUCCESS or DDI_FAILURE. + */ +int +pm_get_current_power(dev_info_t *dip, int comp, int *levelp) +{ + if (comp >= 0 && comp < PM_NUMCMPTS(dip)) { + *levelp = PM_CURPOWER(dip, comp); + return (DDI_SUCCESS); + } + return (DDI_FAILURE); +} + +/* + * Returns current threshold of indicated component + */ +static int +cur_threshold(dev_info_t *dip, int comp) +{ + pm_component_t *cp = PM_CP(dip, comp); + int pwr; + + if (PM_ISBC(dip)) { + /* + * backwards compatible nodes only have one threshold + */ + return (cp->pmc_comp.pmc_thresh[1]); + } + pwr = cp->pmc_cur_pwr; + if (pwr == PM_LEVEL_UNKNOWN) { + int thresh; + if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH) + thresh = pm_default_nexus_threshold; + else + thresh = pm_system_idle_threshold; + return (thresh); + } + ASSERT(cp->pmc_comp.pmc_thresh); + return (cp->pmc_comp.pmc_thresh[pwr]); +} + +/* + * Compute next lower component power level given power index. + */ +static int +pm_next_lower_power(pm_component_t *cp, int pwrndx) +{ + int nxt_pwr; + + if (pwrndx == PM_LEVEL_UNKNOWN) { + nxt_pwr = cp->pmc_comp.pmc_lvals[0]; + } else { + pwrndx--; + ASSERT(pwrndx >= 0); + nxt_pwr = cp->pmc_comp.pmc_lvals[pwrndx]; + } + return (nxt_pwr); +} + +/* + * Bring all components of device to normal power + */ +int +pm_all_to_normal(dev_info_t *dip, pm_canblock_t canblock) +{ + PMD_FUNC(pmf, "all_to_normal") + int *normal; + int i, ncomps, result; + size_t size; + int changefailed = 0; + + PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip))) + ASSERT(PM_GET_PM_INFO(dip)); + if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) { + PMD(PMD_ALLNORM, ("%s: can't get norm pwrs for " + "%s@%s(%s#%d)\n", pmf, PM_DEVICE(dip))) + return (DDI_FAILURE); + } + ncomps = PM_NUMCMPTS(dip); + for (i = 0; i < ncomps; i++) { + if (pm_set_power(dip, i, normal[i], + PM_LEVEL_UPONLY, canblock, 0, &result) != DDI_SUCCESS) { + changefailed++; + PMD(PMD_ALLNORM | PMD_FAIL, ("%s: failed to set " + "%s@%s(%s#%d)[%d] to %d, errno %d\n", pmf, + PM_DEVICE(dip), i, normal[i], result)) + } + } + kmem_free(normal, size); + if (changefailed) { + PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) " + "to full power\n", pmf, changefailed, PM_DEVICE(dip))) + return (DDI_FAILURE); + } + return (DDI_SUCCESS); +} + +/* + * Returns true if all components of device are at normal power + */ +int +pm_all_at_normal(dev_info_t *dip) +{ + PMD_FUNC(pmf, "all_at_normal") + int *normal; + int i; + size_t size; + + PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip))) + if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) { + PMD(PMD_ALLNORM, ("%s: can't get normal power\n", pmf)) + return (DDI_FAILURE); + } + for (i = 0; i < PM_NUMCMPTS(dip); i++) { + int current = PM_CURPOWER(dip, i); + if (normal[i] > current) { + PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d) comp=%d, " + "norm=%d, cur=%d\n", pmf, PM_DEVICE(dip), i, + normal[i], current)) + break; + } + } + kmem_free(normal, size); + if (i != PM_NUMCMPTS(dip)) { + return (0); + } + return (1); +} + +static void +bring_wekeeps_up(char *keeper) +{ + PMD_FUNC(pmf, "bring_wekeeps_up") + int i; + pm_pdr_t *dp; + pm_info_t *wku_info; + char *kept_path; + dev_info_t *kept; + static void bring_pmdep_up(dev_info_t *, int); + + if (panicstr) { + return; + } + /* + * We process the request even if the keeper detaches because + * detach processing expects this to increment kidsupcnt of kept. + */ + PMD(PMD_BRING, ("%s: keeper= %s\n", pmf, keeper)) + for (dp = pm_dep_head; dp; dp = dp->pdr_next) { + if (strcmp(dp->pdr_keeper, keeper) != 0) + continue; + for (i = 0; i < dp->pdr_kept_count; i++) { + kept_path = dp->pdr_kept_paths[i]; + if (kept_path == NULL) + continue; + ASSERT(kept_path[0] != '\0'); + if ((kept = pm_name_to_dip(kept_path, 1)) == NULL) + continue; + wku_info = PM_GET_PM_INFO(kept); + if (wku_info == NULL) { + if (kept) + ddi_release_devi(kept); + continue; + } + /* + * Don't mess with it if it is being detached, it isn't + * safe to call its power entry point + */ + if (wku_info->pmi_dev_pm_state & PM_DETACHING) { + if (kept) + ddi_release_devi(kept); + continue; + } + bring_pmdep_up(kept, 1); + ddi_release_devi(kept); + } + } +} + +/* + * Bring up the 'kept' device passed as argument + */ +static void +bring_pmdep_up(dev_info_t *kept_dip, int hold) +{ + PMD_FUNC(pmf, "bring_pmdep_up") + int is_all_at_normal = 0; + + /* + * If the kept device has been unmanaged, do nothing. + */ + if (!PM_GET_PM_INFO(kept_dip)) + return; + + /* Just ignore DIRECT PM device till they are released. */ + if (!pm_processes_stopped && PM_ISDIRECT(kept_dip) && + !(is_all_at_normal = pm_all_at_normal(kept_dip))) { + PMD(PMD_BRING, ("%s: can't bring up PM_DIRECT %s@%s(%s#%d) " + "controlling process did something else\n", pmf, + PM_DEVICE(kept_dip))) + DEVI(kept_dip)->devi_pm_flags |= PMC_SKIP_BRINGUP; + return; + } + /* if we got here the keeper had a transition from OFF->ON */ + if (hold) + pm_hold_power(kept_dip); + + if (!is_all_at_normal) + (void) pm_all_to_normal(kept_dip, PM_CANBLOCK_FAIL); +} + +/* + * A bunch of stuff that belongs only to the next routine (or two) + */ + +static const char namestr[] = "NAME="; +static const int nameln = sizeof (namestr) - 1; +static const char pmcompstr[] = "pm-components"; + +struct pm_comp_pkg { + pm_comp_t *comp; + struct pm_comp_pkg *next; +}; + +#define isdigit(ch) ((ch) >= '0' && (ch) <= '9') + +#define isxdigit(ch) (isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \ + ((ch) >= 'A' && (ch) <= 'F')) + +/* + * Rather than duplicate this code ... + * (this code excerpted from the function that follows it) + */ +#define FINISH_COMP { \ + ASSERT(compp); \ + compp->pmc_lnames_sz = size; \ + tp = compp->pmc_lname_buf = kmem_alloc(size, KM_SLEEP); \ + compp->pmc_numlevels = level; \ + compp->pmc_lnames = kmem_alloc(level * sizeof (char *), KM_SLEEP); \ + compp->pmc_lvals = kmem_alloc(level * sizeof (int), KM_SLEEP); \ + compp->pmc_thresh = kmem_alloc(level * sizeof (int), KM_SLEEP); \ + /* copy string out of prop array into buffer */ \ + for (j = 0; j < level; j++) { \ + compp->pmc_thresh[j] = INT_MAX; /* only [0] sticks */ \ + compp->pmc_lvals[j] = lvals[j]; \ + (void) strcpy(tp, lnames[j]); \ + compp->pmc_lnames[j] = tp; \ + tp += lszs[j]; \ + } \ + ASSERT(tp > compp->pmc_lname_buf && tp <= \ + compp->pmc_lname_buf + compp->pmc_lnames_sz); \ + } + +/* + * Create (empty) component data structures. + */ +static void +e_pm_create_components(dev_info_t *dip, int num_components) +{ + struct pm_component *compp, *ocompp; + int i, size = 0; + + ASSERT(!PM_IAM_LOCKING_DIP(dip)); + ASSERT(!DEVI(dip)->devi_pm_components); + ASSERT(!(DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE)); + size = sizeof (struct pm_component) * num_components; + + compp = kmem_zalloc(size, KM_SLEEP); + ocompp = compp; + DEVI(dip)->devi_pm_comp_size = size; + DEVI(dip)->devi_pm_num_components = num_components; + PM_LOCK_BUSY(dip); + for (i = 0; i < num_components; i++) { + compp->pmc_timestamp = gethrestime_sec(); + compp->pmc_norm_pwr = (uint_t)-1; + compp++; + } + PM_UNLOCK_BUSY(dip); + DEVI(dip)->devi_pm_components = ocompp; + DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_DONE; +} + +/* + * Parse hex or decimal value from char string + */ +static char * +pm_parsenum(char *cp, int *valp) +{ + int ch, offset; + char numbuf[256]; + char *np = numbuf; + int value = 0; + + ch = *cp++; + if (isdigit(ch)) { + if (ch == '0') { + if ((ch = *cp++) == 'x' || ch == 'X') { + ch = *cp++; + while (isxdigit(ch)) { + *np++ = (char)ch; + ch = *cp++; + } + *np = 0; + cp--; + goto hexval; + } else { + goto digit; + } + } else { +digit: + while (isdigit(ch)) { + *np++ = (char)ch; + ch = *cp++; + } + *np = 0; + cp--; + goto decval; + } + } else + return (NULL); + +hexval: + for (np = numbuf; *np; np++) { + if (*np >= 'a' && *np <= 'f') + offset = 'a' - 10; + else if (*np >= 'A' && *np <= 'F') + offset = 'A' - 10; + else if (*np >= '0' && *np <= '9') + offset = '0'; + value *= 16; + value += *np - offset; + } + *valp = value; + return (cp); + +decval: + offset = '0'; + for (np = numbuf; *np; np++) { + value *= 10; + value += *np - offset; + } + *valp = value; + return (cp); +} + +/* + * Set max (previously documented as "normal") power. + */ +static void +e_pm_set_max_power(dev_info_t *dip, int component_number, int level) +{ + PM_CP(dip, component_number)->pmc_norm_pwr = level; +} + +/* + * Internal routine for destroying components + * It is called even when there might not be any, so it must be forgiving. + */ +static void +e_pm_destroy_components(dev_info_t *dip) +{ + int i; + struct pm_component *cp; + + ASSERT(!PM_IAM_LOCKING_DIP(dip)); + if (PM_NUMCMPTS(dip) == 0) + return; + cp = DEVI(dip)->devi_pm_components; + ASSERT(cp); + for (i = 0; i < PM_NUMCMPTS(dip); i++, cp++) { + int nlevels = cp->pmc_comp.pmc_numlevels; + kmem_free(cp->pmc_comp.pmc_lvals, nlevels * sizeof (int)); + kmem_free(cp->pmc_comp.pmc_thresh, nlevels * sizeof (int)); + /* + * For BC nodes, the rest is static in bc_comp, so skip it + */ + if (PM_ISBC(dip)) + continue; + kmem_free(cp->pmc_comp.pmc_name, cp->pmc_comp.pmc_name_sz); + kmem_free(cp->pmc_comp.pmc_lnames, nlevels * sizeof (char *)); + kmem_free(cp->pmc_comp.pmc_lname_buf, + cp->pmc_comp.pmc_lnames_sz); + } + kmem_free(DEVI(dip)->devi_pm_components, DEVI(dip)->devi_pm_comp_size); + DEVI(dip)->devi_pm_components = NULL; + DEVI(dip)->devi_pm_num_components = 0; + DEVI(dip)->devi_pm_flags &= + ~(PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED); +} + +/* + * Read the pm-components property (if there is one) and use it to set up + * components. Returns a pointer to an array of component structures if + * pm-components found and successfully parsed, else returns NULL. + * Sets error return *errp to true to indicate a failure (as opposed to no + * property being present). + */ +pm_comp_t * +pm_autoconfig(dev_info_t *dip, int *errp) +{ + PMD_FUNC(pmf, "autoconfig") + uint_t nelems; + char **pp; + pm_comp_t *compp = NULL; + int i, j, level, components = 0; + size_t size = 0; + struct pm_comp_pkg *p, *ptail; + struct pm_comp_pkg *phead = NULL; + int *lvals = NULL; + int *lszs = NULL; + int *np = NULL; + int npi = 0; + char **lnames = NULL; + char *cp, *tp; + pm_comp_t *ret = NULL; + + ASSERT(!PM_IAM_LOCKING_DIP(dip)); + *errp = 0; /* assume success */ + if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, + (char *)pmcompstr, &pp, &nelems) != DDI_PROP_SUCCESS) { + return (NULL); + } + + if (nelems < 3) { /* need at least one name and two levels */ + goto errout; + } + + /* + * pm_create_components is no longer allowed + */ + if (PM_NUMCMPTS(dip) != 0) { + PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) has %d comps\n", + pmf, PM_DEVICE(dip), PM_NUMCMPTS(dip))) + goto errout; + } + + lvals = kmem_alloc(nelems * sizeof (int), KM_SLEEP); + lszs = kmem_alloc(nelems * sizeof (int), KM_SLEEP); + lnames = kmem_alloc(nelems * sizeof (char *), KM_SLEEP); + np = kmem_alloc(nelems * sizeof (int), KM_SLEEP); + + level = 0; + phead = NULL; + for (i = 0; i < nelems; i++) { + cp = pp[i]; + if (!isdigit(*cp)) { /* must be name */ + if (strncmp(cp, namestr, nameln) != 0) { + goto errout; + } + if (i != 0) { + if (level == 0) { /* no level spec'd */ + PMD(PMD_ERROR, ("%s: no level spec'd\n", + pmf)) + goto errout; + } + np[npi++] = lvals[level - 1]; + /* finish up previous component levels */ + FINISH_COMP; + } + cp += nameln; + if (!*cp) { + PMD(PMD_ERROR, ("%s: nsa\n", pmf)) + goto errout; + } + p = kmem_zalloc(sizeof (*phead), KM_SLEEP); + if (phead == NULL) { + phead = ptail = p; + } else { + ptail->next = p; + ptail = p; + } + compp = p->comp = kmem_zalloc(sizeof (pm_comp_t), + KM_SLEEP); + compp->pmc_name_sz = strlen(cp) + 1; + compp->pmc_name = kmem_zalloc(compp->pmc_name_sz, + KM_SLEEP); + (void) strncpy(compp->pmc_name, cp, compp->pmc_name_sz); + components++; + level = 0; + } else { /* better be power level <num>=<name> */ +#ifdef DEBUG + tp = cp; +#endif + if (i == 0 || + (cp = pm_parsenum(cp, &lvals[level])) == NULL) { + PMD(PMD_ERROR, ("%s: parsenum(%s)\n", pmf, tp)) + goto errout; + } +#ifdef DEBUG + tp = cp; +#endif + if (*cp++ != '=' || !*cp) { + PMD(PMD_ERROR, ("%s: ex =, got %s\n", pmf, tp)) + goto errout; + } + + lszs[level] = strlen(cp) + 1; + size += lszs[level]; + lnames[level] = cp; /* points into prop string */ + level++; + } + } + np[npi++] = lvals[level - 1]; + if (level == 0) { /* ended with a name */ + PMD(PMD_ERROR, ("%s: ewn\n", pmf)) + goto errout; + } + FINISH_COMP; + + + /* + * Now we have a list of components--we have to return instead an + * array of them, but we can just copy the top level and leave + * the rest as is + */ + (void) e_pm_create_components(dip, components); + for (i = 0; i < components; i++) + e_pm_set_max_power(dip, i, np[i]); + + ret = kmem_zalloc(components * sizeof (pm_comp_t), KM_SLEEP); + for (i = 0, p = phead; i < components; i++) { + ASSERT(p); + /* + * Now sanity-check values: levels must be monotonically + * increasing + */ + if (p->comp->pmc_numlevels < 2) { + PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) only %d " + "levels\n", pmf, + p->comp->pmc_name, PM_DEVICE(dip), + p->comp->pmc_numlevels)) + goto errout; + } + for (j = 0; j < p->comp->pmc_numlevels; j++) { + if ((p->comp->pmc_lvals[j] < 0) || ((j > 0) && + (p->comp->pmc_lvals[j] <= + p->comp->pmc_lvals[j - 1]))) { + PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) " + "not mono. incr, %d follows %d\n", pmf, + p->comp->pmc_name, PM_DEVICE(dip), + p->comp->pmc_lvals[j], + p->comp->pmc_lvals[j - 1])) + goto errout; + } + } + ret[i] = *p->comp; /* struct assignment */ + for (j = 0; j < i; j++) { + /* + * Test for unique component names + */ + if (strcmp(ret[j].pmc_name, ret[i].pmc_name) == 0) { + PMD(PMD_ERROR, ("%s: %s of %s@%s(%s#%d) not " + "unique\n", pmf, ret[j].pmc_name, + PM_DEVICE(dip))) + goto errout; + } + } + ptail = p; + p = p->next; + phead = p; /* errout depends on phead making sense */ + kmem_free(ptail->comp, sizeof (*ptail->comp)); + kmem_free(ptail, sizeof (*ptail)); + } +out: + ddi_prop_free(pp); + if (lvals) + kmem_free(lvals, nelems * sizeof (int)); + if (lszs) + kmem_free(lszs, nelems * sizeof (int)); + if (lnames) + kmem_free(lnames, nelems * sizeof (char *)); + if (np) + kmem_free(np, nelems * sizeof (int)); + return (ret); + +errout: + e_pm_destroy_components(dip); + *errp = 1; /* signal failure */ + cmn_err(CE_CONT, "!pm: %s property ", pmcompstr); + for (i = 0; i < nelems - 1; i++) + cmn_err(CE_CONT, "!'%s', ", pp[i]); + if (nelems != 0) + cmn_err(CE_CONT, "!'%s'", pp[nelems - 1]); + cmn_err(CE_CONT, "! for %s@%s(%s#%d) is ill-formed.\n", PM_DEVICE(dip)); + for (p = phead; p; ) { + pm_comp_t *pp; + int n; + + ptail = p; + /* + * Free component data structures + */ + pp = p->comp; + n = pp->pmc_numlevels; + if (pp->pmc_name_sz) { + kmem_free(pp->pmc_name, pp->pmc_name_sz); + } + if (pp->pmc_lnames_sz) { + kmem_free(pp->pmc_lname_buf, pp->pmc_lnames_sz); + } + if (pp->pmc_lnames) { + kmem_free(pp->pmc_lnames, n * (sizeof (char *))); + } + if (pp->pmc_thresh) { + kmem_free(pp->pmc_thresh, n * (sizeof (int))); + } + if (pp->pmc_lvals) { + kmem_free(pp->pmc_lvals, n * (sizeof (int))); + } + p = ptail->next; + kmem_free(ptail, sizeof (*ptail)); + } + if (ret != NULL) + kmem_free(ret, components * sizeof (pm_comp_t)); + ret = NULL; + goto out; +} + +/* + * Set threshold values for a devices components by dividing the target + * threshold (base) by the number of transitions and assign each transition + * that threshold. This will get the entire device down in the target time if + * all components are idle and even if there are dependencies among components. + * + * Devices may well get powered all the way down before the target time, but + * at least the EPA will be happy. + */ +void +pm_set_device_threshold(dev_info_t *dip, int base, int flag) +{ + PMD_FUNC(pmf, "set_device_threshold") + int target_threshold = (base * 95) / 100; + int level, comp; /* loop counters */ + int transitions = 0; + int ncomp = PM_NUMCMPTS(dip); + int thresh; + int remainder; + pm_comp_t *pmc; + int i, circ; + + ASSERT(!PM_IAM_LOCKING_DIP(dip)); + PM_LOCK_DIP(dip); + /* + * First we handle the easy one. If we're setting the default + * threshold for a node with children, then we set it to the + * default nexus threshold (currently 0) and mark it as default + * nexus threshold instead + */ + if (PM_IS_NEXUS(dip)) { + if (flag == PMC_DEF_THRESH) { + PMD(PMD_THRESH, ("%s: [%s@%s(%s#%d) NEXDEF]\n", pmf, + PM_DEVICE(dip))) + thresh = pm_default_nexus_threshold; + for (comp = 0; comp < ncomp; comp++) { + pmc = &PM_CP(dip, comp)->pmc_comp; + for (level = 1; level < pmc->pmc_numlevels; + level++) { + pmc->pmc_thresh[level] = thresh; + } + } + DEVI(dip)->devi_pm_dev_thresh = + pm_default_nexus_threshold; + /* + * If the nexus node is being reconfigured back to + * the default threshold, adjust the notlowest count. + */ + if (DEVI(dip)->devi_pm_flags & + (PMC_DEV_THRESH|PMC_COMP_THRESH)) { + PM_LOCK_POWER(dip, &circ); + for (i = 0; i < PM_NUMCMPTS(dip); i++) { + if (PM_CURPOWER(dip, i) == 0) + continue; + mutex_enter(&pm_compcnt_lock); + ASSERT(pm_comps_notlowest); + pm_comps_notlowest--; + PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr " + "notlowest to %d\n", pmf, + PM_DEVICE(dip), pm_comps_notlowest)) + if (pm_comps_notlowest == 0) + pm_ppm_notify_all_lowest(dip, + PM_ALL_LOWEST); + mutex_exit(&pm_compcnt_lock); + } + PM_UNLOCK_POWER(dip, circ); + } + DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE; + DEVI(dip)->devi_pm_flags |= PMC_NEXDEF_THRESH; + PM_UNLOCK_DIP(dip); + return; + } else if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH) { + /* + * If the nexus node is being configured for a + * non-default threshold, include that node in + * the notlowest accounting. + */ + PM_LOCK_POWER(dip, &circ); + for (i = 0; i < PM_NUMCMPTS(dip); i++) { + if (PM_CURPOWER(dip, i) == 0) + continue; + mutex_enter(&pm_compcnt_lock); + if (pm_comps_notlowest == 0) + pm_ppm_notify_all_lowest(dip, + PM_NOT_ALL_LOWEST); + pm_comps_notlowest++; + PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr " + "notlowest to %d\n", pmf, + PM_DEVICE(dip), pm_comps_notlowest)) + mutex_exit(&pm_compcnt_lock); + } + PM_UNLOCK_POWER(dip, circ); + } + } + /* + * Compute the total number of transitions for all components + * of the device. Distribute the threshold evenly over them + */ + for (comp = 0; comp < ncomp; comp++) { + pmc = &PM_CP(dip, comp)->pmc_comp; + ASSERT(pmc->pmc_numlevels > 1); + transitions += pmc->pmc_numlevels - 1; + } + ASSERT(transitions); + thresh = target_threshold / transitions; + + for (comp = 0; comp < ncomp; comp++) { + pmc = &PM_CP(dip, comp)->pmc_comp; + for (level = 1; level < pmc->pmc_numlevels; level++) { + pmc->pmc_thresh[level] = thresh; + } + } + +#ifdef DEBUG + for (comp = 0; comp < ncomp; comp++) { + pmc = &PM_CP(dip, comp)->pmc_comp; + for (level = 1; level < pmc->pmc_numlevels; level++) { + PMD(PMD_THRESH, ("%s: thresh before %s@%s(%s#%d) " + "comp=%d, level=%d, %d\n", pmf, PM_DEVICE(dip), + comp, level, pmc->pmc_thresh[level])) + } + } +#endif + /* + * Distribute any remainder till they are all gone + */ + remainder = target_threshold - thresh * transitions; + level = 1; +#ifdef DEBUG + PMD(PMD_THRESH, ("%s: remainder=%d target_threshold=%d thresh=%d " + "trans=%d\n", pmf, remainder, target_threshold, thresh, + transitions)) +#endif + while (remainder > 0) { + comp = 0; + while (remainder && (comp < ncomp)) { + pmc = &PM_CP(dip, comp)->pmc_comp; + if (level < pmc->pmc_numlevels) { + pmc->pmc_thresh[level] += 1; + remainder--; + } + comp++; + } + level++; + } +#ifdef DEBUG + for (comp = 0; comp < ncomp; comp++) { + pmc = &PM_CP(dip, comp)->pmc_comp; + for (level = 1; level < pmc->pmc_numlevels; level++) { + PMD(PMD_THRESH, ("%s: thresh after %s@%s(%s#%d) " + "comp=%d level=%d, %d\n", pmf, PM_DEVICE(dip), + comp, level, pmc->pmc_thresh[level])) + } + } +#endif + ASSERT(PM_IAM_LOCKING_DIP(dip)); + DEVI(dip)->devi_pm_dev_thresh = base; + DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE; + DEVI(dip)->devi_pm_flags |= flag; + PM_UNLOCK_DIP(dip); +} + +/* + * Called when there is no old-style platform power management driver + */ +static int +ddi_no_platform_power(power_req_t *req) +{ + _NOTE(ARGUNUSED(req)) + return (DDI_FAILURE); +} + +/* + * This function calls the entry point supplied by the platform-specific + * pm driver to bring the device component 'pm_cmpt' to power level 'pm_level'. + * The use of global for getting the function name from platform-specific + * pm driver is not ideal, but it is simple and efficient. + * The previous property lookup was being done in the idle loop on swift + * systems without pmc chips and hurt deskbench performance as well as + * violating scheduler locking rules + */ +int (*pm_platform_power)(power_req_t *) = ddi_no_platform_power; + +/* + * Old obsolete interface for a device to request a power change (but only + * an increase in power) + */ +int +ddi_dev_is_needed(dev_info_t *dip, int cmpt, int level) +{ + return (pm_raise_power(dip, cmpt, level)); +} + +/* + * The old obsolete interface to platform power management. Only used by + * Gypsy platform and APM on X86. + */ +int +ddi_power(dev_info_t *dip, int pm_cmpt, int pm_level) +{ + power_req_t request; + + request.request_type = PMR_SET_POWER; + request.req.set_power_req.who = dip; + request.req.set_power_req.cmpt = pm_cmpt; + request.req.set_power_req.level = pm_level; + return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL)); +} + +/* + * A driver can invoke this from its detach routine when DDI_SUSPEND is + * passed. Returns true if subsequent processing could result in power being + * removed from the device. The arg is not currently used because it is + * implicit in the operation of cpr/DR. + */ +int +ddi_removing_power(dev_info_t *dip) +{ + _NOTE(ARGUNUSED(dip)) + return (pm_powering_down); +} + +/* + * Returns true if a device indicates that its parent handles suspend/resume + * processing for it. + */ +int +e_ddi_parental_suspend_resume(dev_info_t *dip) +{ + return (DEVI(dip)->devi_pm_flags & PMC_PARENTAL_SR); +} + +/* + * Called for devices which indicate that their parent does suspend/resume + * handling for them + */ +int +e_ddi_suspend(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + power_req_t request; + request.request_type = PMR_SUSPEND; + request.req.suspend_req.who = dip; + request.req.suspend_req.cmd = cmd; + return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL)); +} + +/* + * Called for devices which indicate that their parent does suspend/resume + * handling for them + */ +int +e_ddi_resume(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + power_req_t request; + request.request_type = PMR_RESUME; + request.req.resume_req.who = dip; + request.req.resume_req.cmd = cmd; + return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL)); +} + +/* + * Old obsolete exported interface for drivers to create components. + * This is now handled by exporting the pm-components property. + */ +int +pm_create_components(dev_info_t *dip, int num_components) +{ + PMD_FUNC(pmf, "pm_create_components") + + if (num_components < 1) + return (DDI_FAILURE); + + if (!DEVI_IS_ATTACHING(dip)) { + return (DDI_FAILURE); + } + + /* don't need to lock dip because attach is single threaded */ + if (DEVI(dip)->devi_pm_components) { + PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) already has %d\n", pmf, + PM_DEVICE(dip), PM_NUMCMPTS(dip))) + return (DDI_FAILURE); + } + e_pm_create_components(dip, num_components); + DEVI(dip)->devi_pm_flags |= PMC_BC; + e_pm_default_components(dip, num_components); + return (DDI_SUCCESS); +} + +/* + * Obsolete interface previously called by drivers to destroy their components + * at detach time. This is now done automatically. However, we need to keep + * this for the old drivers. + */ +void +pm_destroy_components(dev_info_t *dip) +{ + PMD_FUNC(pmf, "pm_destroy_components") + dev_info_t *pdip = ddi_get_parent(dip); + + PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf, + PM_DEVICE(dip))) + ASSERT(DEVI_IS_DETACHING(dip)); +#ifdef DEBUG + if (!PM_ISBC(dip)) + cmn_err(CE_WARN, "!driver exporting pm-components property " + "(%s@%s) calls pm_destroy_components", PM_NAME(dip), + PM_ADDR(dip)); +#endif + /* + * We ignore this unless this is an old-style driver, except for + * printing the message above + */ + if (PM_NUMCMPTS(dip) == 0 || !PM_ISBC(dip)) { + PMD(PMD_REMDEV, ("%s: ignore %s@%s(%s#%d)\n", pmf, + PM_DEVICE(dip))) + return; + } + ASSERT(PM_GET_PM_INFO(dip)); + + /* + * pm_unmanage will clear info pointer later, after dealing with + * dependencies + */ + ASSERT(!PM_GET_PM_SCAN(dip)); /* better be gone already */ + /* + * Now adjust parent's kidsupcnt. We check only comp 0. + * Parents that get notification are not adjusted because their + * kidsupcnt is always 0 (or 1 during probe and attach). + */ + if ((PM_CURPOWER(dip, 0) != 0) && pdip && !PM_WANTS_NOTIFICATION(pdip)) + pm_rele_power(pdip); +#ifdef DEBUG + else { + PMD(PMD_KIDSUP, ("%s: kuc stays %s@%s(%s#%d) comps gone\n", + pmf, PM_DEVICE(dip))) + } +#endif + e_pm_destroy_components(dip); + /* + * Forget we ever knew anything about the components of this device + */ + DEVI(dip)->devi_pm_flags &= + ~(PMC_BC | PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED); +} + +/* + * Exported interface for a driver to set a component busy. + */ +int +pm_busy_component(dev_info_t *dip, int cmpt) +{ + struct pm_component *cp; + + ASSERT(dip != NULL); + if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp)) + return (DDI_FAILURE); + PM_LOCK_BUSY(dip); + cp->pmc_busycount++; + cp->pmc_timestamp = 0; + PM_UNLOCK_BUSY(dip); + return (DDI_SUCCESS); +} + +/* + * Exported interface for a driver to set a component idle. + */ +int +pm_idle_component(dev_info_t *dip, int cmpt) +{ + PMD_FUNC(pmf, "pm_idle_component") + struct pm_component *cp; + pm_scan_t *scanp = PM_GET_PM_SCAN(dip); + + if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp)) + return (DDI_FAILURE); + + PM_LOCK_BUSY(dip); + if (cp->pmc_busycount) { + if (--(cp->pmc_busycount) == 0) + cp->pmc_timestamp = gethrestime_sec(); + } else { + cp->pmc_timestamp = gethrestime_sec(); + } + + PM_UNLOCK_BUSY(dip); + + /* + * if device becomes idle during idle down period, try scan it down + */ + if (scanp && PM_IS_PID(dip)) { + PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d) idle.\n", pmf, + PM_DEVICE(dip))) + pm_rescan(dip); + return (DDI_SUCCESS); + } + + /* + * handle scan not running with nexus threshold == 0 + */ + + if (PM_IS_NEXUS(dip) && (cp->pmc_busycount == 0)) { + pm_rescan(dip); + } + + return (DDI_SUCCESS); +} + +/* + * This is the old obsolete interface called by drivers to set their normal + * power. Thus we can't fix its behavior or return a value. + * This functionality is replaced by the pm-component property. + * We'll only get components destroyed while no power management is + * going on (and the device is detached), so we don't need a mutex here + */ +void +pm_set_normal_power(dev_info_t *dip, int comp, int level) +{ + PMD_FUNC(pmf, "set_normal_power") +#ifdef DEBUG + if (!PM_ISBC(dip)) + cmn_err(CE_WARN, "!call to pm_set_normal_power() by %s@%s " + "(driver exporting pm-components property) ignored", + PM_NAME(dip), PM_ADDR(dip)); +#endif + if (PM_ISBC(dip)) { + PMD(PMD_NORM, ("%s: %s@%s(%s#%d) set normal power comp=%d, " + "level=%d\n", pmf, PM_DEVICE(dip), comp, level)) + e_pm_set_max_power(dip, comp, level); + e_pm_default_levels(dip, PM_CP(dip, comp), level); + } +} + +/* + * Called on a successfully detached driver to free pm resources + */ +static void +pm_stop(dev_info_t *dip) +{ + PMD_FUNC(pmf, "stop") + dev_info_t *pdip = ddi_get_parent(dip); + + ASSERT(!PM_IAM_LOCKING_DIP(dip)); + /* stopping scan, destroy scan data structure */ + if (!PM_ISBC(dip)) { + pm_scan_stop(dip); + pm_scan_fini(dip); + } + + if (PM_GET_PM_INFO(dip) != NULL) { + if (pm_unmanage(dip) == DDI_SUCCESS) { + /* + * Old style driver may have called + * pm_destroy_components already, but just in case ... + */ + e_pm_destroy_components(dip); + } else { + PMD(PMD_FAIL, ("%s: can't pm_unmanage %s@%s(%s#%d)\n", + pmf, PM_DEVICE(dip))) + } + } else { + if (PM_NUMCMPTS(dip)) + e_pm_destroy_components(dip); + else { + if (DEVI(dip)->devi_pm_flags & PMC_NOPMKID) { + DEVI(dip)->devi_pm_flags &= ~PMC_NOPMKID; + if (pdip && !PM_WANTS_NOTIFICATION(pdip)) { + pm_rele_power(pdip); + } else if (pdip && MDI_VHCI(pdip)) { + (void) mdi_power(pdip, + MDI_PM_RELE_POWER, + (void *)dip, NULL, 0); + } + } + } + } +} + +/* + * The node is the subject of a reparse pm props ioctl. Throw away the old + * info and start over. + */ +int +e_new_pm_props(dev_info_t *dip) +{ + if (PM_GET_PM_INFO(dip) != NULL) { + pm_stop(dip); + + if (e_pm_manage(dip, PM_STYLE_NEW) != DDI_SUCCESS) { + return (DDI_FAILURE); + } + } + e_pm_props(dip); + return (DDI_SUCCESS); +} + +/* + * Device has been attached, so process its pm properties + */ +void +e_pm_props(dev_info_t *dip) +{ + char *pp; + int len; + int flags = 0; + int propflag = DDI_PROP_DONTPASS|DDI_PROP_CANSLEEP; + + /* + * It doesn't matter if we do this more than once, we should always + * get the same answers, and if not, then the last one in is the + * best one. + */ + if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-hardware-state", + (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) { + if (strcmp(pp, "needs-suspend-resume") == 0) { + flags = PMC_NEEDS_SR; + } else if (strcmp(pp, "no-suspend-resume") == 0) { + flags = PMC_NO_SR; + } else if (strcmp(pp, "parental-suspend-resume") == 0) { + flags = PMC_PARENTAL_SR; + } else { + cmn_err(CE_NOTE, "!device %s@%s has unrecognized " + "%s property value '%s'", PM_NAME(dip), + PM_ADDR(dip), "pm-hardware-state", pp); + } + kmem_free(pp, len); + } + /* + * This next segment (PMC_WANTS_NOTIFY) is in + * support of nexus drivers which will want to be involved in + * (or at least notified of) their child node's power level transitions. + * "pm-want-child-notification?" is defined by the parent. + */ + if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag, + "pm-want-child-notification?") && PM_HAS_BUS_POWER(dip)) + flags |= PMC_WANTS_NOTIFY; + ASSERT(PM_HAS_BUS_POWER(dip) || !ddi_prop_exists(DDI_DEV_T_ANY, + dip, propflag, "pm-want-child-notification?")); + if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag, + "no-involuntary-power-cycles")) + flags |= PMC_NO_INVOL; + /* devfs single threads us */ + DEVI(dip)->devi_pm_flags |= flags; +} + +/* + * This is the DDI_CTLOPS_POWER handler that is used when there is no ppm + * driver which has claimed a node. + * Sets old_power in arg struct. + */ +static int +pm_default_ctlops(dev_info_t *dip, dev_info_t *rdip, + ddi_ctl_enum_t ctlop, void *arg, void *result) +{ + _NOTE(ARGUNUSED(dip)) + PMD_FUNC(pmf, "ctlops") + power_req_t *reqp = (power_req_t *)arg; + int retval; + dev_info_t *target_dip; + int new_level, old_level, cmpt; +#ifdef DEBUG + char *format; +#endif + + /* + * The interface for doing the actual power level changes is now + * through the DDI_CTLOPS_POWER bus_ctl, so that we can plug in + * different platform-specific power control drivers. + * + * This driver implements the "default" version of this interface. + * If no ppm driver has been installed then this interface is called + * instead. + */ + ASSERT(dip == NULL); + switch (ctlop) { + case DDI_CTLOPS_POWER: + switch (reqp->request_type) { + case PMR_PPM_SET_POWER: + { + target_dip = reqp->req.ppm_set_power_req.who; + ASSERT(target_dip == rdip); + new_level = reqp->req.ppm_set_power_req.new_level; + cmpt = reqp->req.ppm_set_power_req.cmpt; + /* pass back old power for the PM_LEVEL_UNKNOWN case */ + old_level = PM_CURPOWER(target_dip, cmpt); + reqp->req.ppm_set_power_req.old_level = old_level; + retval = pm_power(target_dip, cmpt, new_level); + PMD(PMD_PPM, ("%s: PPM_SET_POWER %s@%s(%s#%d)[%d] %d->" + "%d %s\n", pmf, PM_DEVICE(target_dip), cmpt, + old_level, new_level, (retval == DDI_SUCCESS ? + "chd" : "no chg"))) + return (retval); + } + + case PMR_PPM_PRE_DETACH: + case PMR_PPM_POST_DETACH: + case PMR_PPM_PRE_ATTACH: + case PMR_PPM_POST_ATTACH: + case PMR_PPM_PRE_PROBE: + case PMR_PPM_POST_PROBE: + case PMR_PPM_PRE_RESUME: + case PMR_PPM_INIT_CHILD: + case PMR_PPM_UNINIT_CHILD: +#ifdef DEBUG + switch (reqp->request_type) { + case PMR_PPM_PRE_DETACH: + format = "%s: PMR_PPM_PRE_DETACH " + "%s@%s(%s#%d)\n"; + break; + case PMR_PPM_POST_DETACH: + format = "%s: PMR_PPM_POST_DETACH " + "%s@%s(%s#%d) rets %d\n"; + break; + case PMR_PPM_PRE_ATTACH: + format = "%s: PMR_PPM_PRE_ATTACH " + "%s@%s(%s#%d)\n"; + break; + case PMR_PPM_POST_ATTACH: + format = "%s: PMR_PPM_POST_ATTACH " + "%s@%s(%s#%d) rets %d\n"; + break; + case PMR_PPM_PRE_PROBE: + format = "%s: PMR_PPM_PRE_PROBE " + "%s@%s(%s#%d)\n"; + break; + case PMR_PPM_POST_PROBE: + format = "%s: PMR_PPM_POST_PROBE " + "%s@%s(%s#%d) rets %d\n"; + break; + case PMR_PPM_PRE_RESUME: + format = "%s: PMR_PPM_PRE_RESUME " + "%s@%s(%s#%d) rets %d\n"; + break; + case PMR_PPM_INIT_CHILD: + format = "%s: PMR_PPM_INIT_CHILD " + "%s@%s(%s#%d)\n"; + break; + case PMR_PPM_UNINIT_CHILD: + format = "%s: PMR_PPM_UNINIT_CHILD " + "%s@%s(%s#%d)\n"; + break; + default: + break; + } + PMD(PMD_PPM, (format, pmf, PM_DEVICE(rdip), + reqp->req.ppm_config_req.result)) +#endif + return (DDI_SUCCESS); + + case PMR_PPM_POWER_CHANGE_NOTIFY: + /* + * Nothing for us to do + */ + ASSERT(reqp->req.ppm_notify_level_req.who == rdip); + PMD(PMD_PPM, ("%s: PMR_PPM_POWER_CHANGE_NOTIFY " + "%s@%s(%s#%d)[%d] %d->%d\n", pmf, + PM_DEVICE(reqp->req.ppm_notify_level_req.who), + reqp->req.ppm_notify_level_req.cmpt, + PM_CURPOWER(reqp->req.ppm_notify_level_req.who, + reqp->req.ppm_notify_level_req.cmpt), + reqp->req.ppm_notify_level_req.new_level)) + return (DDI_SUCCESS); + + case PMR_PPM_UNMANAGE: + PMD(PMD_PPM, ("%s: PMR_PPM_UNMANAGE %s@%s(%s#%d)\n", + pmf, PM_DEVICE(rdip))) + return (DDI_SUCCESS); + + case PMR_PPM_LOCK_POWER: + pm_lock_power_single(reqp->req.ppm_lock_power_req.who, + reqp->req.ppm_lock_power_req.circp); + return (DDI_SUCCESS); + + case PMR_PPM_UNLOCK_POWER: + pm_unlock_power_single( + reqp->req.ppm_unlock_power_req.who, + reqp->req.ppm_unlock_power_req.circ); + return (DDI_SUCCESS); + + case PMR_PPM_TRY_LOCK_POWER: + *(int *)result = pm_try_locking_power_single( + reqp->req.ppm_lock_power_req.who, + reqp->req.ppm_lock_power_req.circp); + return (DDI_SUCCESS); + + case PMR_PPM_POWER_LOCK_OWNER: + target_dip = reqp->req.ppm_power_lock_owner_req.who; + ASSERT(target_dip == rdip); + reqp->req.ppm_power_lock_owner_req.owner = + DEVI(rdip)->devi_busy_thread; + return (DDI_SUCCESS); + default: + PMD(PMD_ERROR, ("%s: default!\n", pmf)) + return (DDI_FAILURE); + } + + default: + PMD(PMD_ERROR, ("%s: unknown\n", pmf)) + return (DDI_FAILURE); + } +} + +/* + * We overload the bus_ctl ops here--perhaps we ought to have a distinct + * power_ops struct for this functionality instead? + * However, we only ever do this on a ppm driver. + */ +int +pm_ctlops(dev_info_t *d, dev_info_t *r, ddi_ctl_enum_t op, void *a, void *v) +{ + int (*fp)(); + + /* if no ppm handler, call the default routine */ + if (d == NULL) { + return (pm_default_ctlops(d, r, op, a, v)); + } + if (!d || !r) + return (DDI_FAILURE); + ASSERT(DEVI(d)->devi_ops && DEVI(d)->devi_ops->devo_bus_ops && + DEVI(d)->devi_ops->devo_bus_ops->bus_ctl); + + fp = DEVI(d)->devi_ops->devo_bus_ops->bus_ctl; + return ((*fp)(d, r, op, a, v)); +} + +/* + * Called on a node when attach completes or the driver makes its first pm + * call (whichever comes first). + * In the attach case, device may not be power manageable at all. + * Don't need to lock the dip because we're single threaded by the devfs code + */ +static int +pm_start(dev_info_t *dip) +{ + PMD_FUNC(pmf, "start") + int ret; + dev_info_t *pdip = ddi_get_parent(dip); + int e_pm_manage(dev_info_t *, int); + void pm_noinvol_specd(dev_info_t *dip); + + e_pm_props(dip); + pm_noinvol_specd(dip); + /* + * If this dip has already been processed, don't mess with it + * (but decrement the speculative count we did above, as whatever + * code put it under pm already will have dealt with it) + */ + if (PM_GET_PM_INFO(dip)) { + PMD(PMD_KIDSUP, ("%s: pm already done for %s@%s(%s#%d)\n", + pmf, PM_DEVICE(dip))) + return (0); + } + ret = e_pm_manage(dip, PM_STYLE_UNKNOWN); + + if (PM_GET_PM_INFO(dip) == NULL) { + /* + * keep the kidsupcount increment as is + */ + DEVI(dip)->devi_pm_flags |= PMC_NOPMKID; + if (pdip && !PM_WANTS_NOTIFICATION(pdip)) { + pm_hold_power(pdip); + } else if (pdip && MDI_VHCI(pdip)) { + (void) mdi_power(pdip, MDI_PM_HOLD_POWER, + (void *)dip, NULL, 0); + } + + PMD(PMD_KIDSUP, ("%s: pm of %s@%s(%s#%d) failed, parent " + "left up\n", pmf, PM_DEVICE(dip))) + } + + return (ret); +} + +/* + * Keep a list of recorded thresholds. For now we just keep a list and + * search it linearly. We don't expect too many entries. Can always hash it + * later if we need to. + */ +void +pm_record_thresh(pm_thresh_rec_t *rp) +{ + pm_thresh_rec_t *pptr, *ptr; + + ASSERT(*rp->ptr_physpath); + rw_enter(&pm_thresh_rwlock, RW_WRITER); + for (pptr = NULL, ptr = pm_thresh_head; + ptr; pptr = ptr, ptr = ptr->ptr_next) { + if (strcmp(rp->ptr_physpath, ptr->ptr_physpath) == 0) { + /* replace this one */ + rp->ptr_next = ptr->ptr_next; + if (pptr) { + pptr->ptr_next = rp; + } else { + pm_thresh_head = rp; + } + rw_exit(&pm_thresh_rwlock); + kmem_free(ptr, ptr->ptr_size); + return; + } + continue; + } + /* + * There was not a match in the list, insert this one in front + */ + if (pm_thresh_head) { + rp->ptr_next = pm_thresh_head; + pm_thresh_head = rp; + } else { + rp->ptr_next = NULL; + pm_thresh_head = rp; + } + rw_exit(&pm_thresh_rwlock); +} + +/* + * Create a new dependency record and hang a new dependency entry off of it + */ +pm_pdr_t * +newpdr(char *kept, char *keeps, int isprop) +{ + size_t size = strlen(kept) + strlen(keeps) + 2 + sizeof (pm_pdr_t); + pm_pdr_t *p = kmem_zalloc(size, KM_SLEEP); + p->pdr_size = size; + p->pdr_isprop = isprop; + p->pdr_kept_paths = NULL; + p->pdr_kept_count = 0; + p->pdr_kept = (char *)((intptr_t)p + sizeof (pm_pdr_t)); + (void) strcpy(p->pdr_kept, kept); + p->pdr_keeper = (char *)((intptr_t)p->pdr_kept + strlen(kept) + 1); + (void) strcpy(p->pdr_keeper, keeps); + ASSERT((intptr_t)p->pdr_keeper + strlen(p->pdr_keeper) + 1 <= + (intptr_t)p + size); + ASSERT((intptr_t)p->pdr_kept + strlen(p->pdr_kept) + 1 <= + (intptr_t)p + size); + return (p); +} + +/* + * Keep a list of recorded dependencies. We only keep the + * keeper -> kept list for simplification. At this point We do not + * care about whether the devices are attached or not yet, + * this would be done in pm_keeper() and pm_kept(). + * If a PM_RESET_PM happens, then we tear down and forget the dependencies, + * and it is up to the user to issue the ioctl again if they want it + * (e.g. pmconfig) + * Returns true if dependency already exists in the list. + */ +int +pm_record_keeper(char *kept, char *keeper, int isprop) +{ + PMD_FUNC(pmf, "record_keeper") + pm_pdr_t *npdr, *ppdr, *pdr; + + PMD(PMD_KEEPS, ("%s: %s, %s\n", pmf, kept, keeper)) + ASSERT(kept && keeper); +#ifdef DEBUG + if (pm_debug & PMD_KEEPS) + prdeps("pm_record_keeper entry"); +#endif + for (ppdr = NULL, pdr = pm_dep_head; pdr; + ppdr = pdr, pdr = pdr->pdr_next) { + PMD(PMD_KEEPS, ("%s: check %s, %s\n", pmf, pdr->pdr_kept, + pdr->pdr_keeper)) + if (strcmp(kept, pdr->pdr_kept) == 0 && + strcmp(keeper, pdr->pdr_keeper) == 0) { + PMD(PMD_KEEPS, ("%s: match\n", pmf)) + return (1); + } + } + /* + * We did not find any match, so we have to make an entry + */ + npdr = newpdr(kept, keeper, isprop); + if (ppdr) { + ASSERT(ppdr->pdr_next == NULL); + ppdr->pdr_next = npdr; + } else { + ASSERT(pm_dep_head == NULL); + pm_dep_head = npdr; + } +#ifdef DEBUG + if (pm_debug & PMD_KEEPS) + prdeps("pm_record_keeper after new record"); +#endif + if (!isprop) + pm_unresolved_deps++; + else + pm_prop_deps++; + return (0); +} + +/* + * Look up this device in the set of devices we've seen ioctls for + * to see if we are holding a threshold spec for it. If so, make it so. + * At ioctl time, we were given the physical path of the device. + */ +int +pm_thresh_specd(dev_info_t *dip) +{ + void pm_apply_recorded_thresh(dev_info_t *, pm_thresh_rec_t *); + char *path = 0; + char pathbuf[MAXNAMELEN]; + pm_thresh_rec_t *rp; + + path = ddi_pathname(dip, pathbuf); + + rw_enter(&pm_thresh_rwlock, RW_READER); + for (rp = pm_thresh_head; rp; rp = rp->ptr_next) { + if (strcmp(rp->ptr_physpath, path) != 0) + continue; + pm_apply_recorded_thresh(dip, rp); + rw_exit(&pm_thresh_rwlock); + return (1); + } + rw_exit(&pm_thresh_rwlock); + return (0); +} + +static int +pm_set_keeping(dev_info_t *keeper, dev_info_t *kept) +{ + PMD_FUNC(pmf, "set_keeping") + pm_info_t *kept_info; + int j, up = 0, circ; + void prdeps(char *); + + PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), kept=%s@%s(%s#%d)\n", pmf, + PM_DEVICE(keeper), PM_DEVICE(kept))) +#ifdef DEBUG + if (pm_debug & PMD_KEEPS) + prdeps("Before PAD\n"); +#endif + ASSERT(keeper != kept); + if (PM_GET_PM_INFO(keeper) == NULL) { + cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device " + "%s@%s(%s#%d), but the latter is not power managed", + PM_DEVICE(keeper), PM_DEVICE(kept)); + PMD((PMD_FAIL | PMD_KEEPS), ("%s: keeper %s@%s(%s#%d) is not" + "power managed\n", pmf, PM_DEVICE(keeper))) + return (0); + } + kept_info = PM_GET_PM_INFO(kept); + ASSERT(kept_info); + PM_LOCK_POWER(keeper, &circ); + for (j = 0; j < PM_NUMCMPTS(keeper); j++) { + if (PM_CURPOWER(keeper, j)) { + up++; + break; + } + } + if (up) { + /* Bringup and maintain a hold on the kept */ + PMD(PMD_KEEPS, ("%s: place a hold on kept %s@%s(%s#%d)\n", pmf, + PM_DEVICE(kept))) + bring_pmdep_up(kept, 1); + } + PM_UNLOCK_POWER(keeper, circ); +#ifdef DEBUG + if (pm_debug & PMD_KEEPS) + prdeps("After PAD\n"); +#endif + return (1); +} + +/* + * Should this device keep up another device? + * Look up this device in the set of devices we've seen ioctls for + * to see if we are holding a dependency spec for it. If so, make it so. + * Because we require the kept device to be attached already in order to + * make the list entry (and hold it), we only need to look for keepers. + * At ioctl time, we were given the physical path of the device. + */ +int +pm_keeper(char *keeper) +{ + PMD_FUNC(pmf, "keeper") + int pm_apply_recorded_dep(dev_info_t *, pm_pdr_t *); + dev_info_t *dip; + pm_pdr_t *dp; + dev_info_t *kept = NULL; + int ret = 0; + int i; + + if (!pm_unresolved_deps && !pm_prop_deps) + return (0); + ASSERT(keeper != NULL); + dip = pm_name_to_dip(keeper, 1); + if (dip == NULL) + return (0); + PMD(PMD_KEEPS, ("%s: keeper=%s\n", pmf, keeper)) + for (dp = pm_dep_head; dp; dp = dp->pdr_next) { + if (!dp->pdr_isprop) { + if (!pm_unresolved_deps) + continue; + PMD(PMD_KEEPS, ("%s: keeper %s\n", pmf, dp->pdr_keeper)) + if (dp->pdr_satisfied) { + PMD(PMD_KEEPS, ("%s: satisfied\n", pmf)) + continue; + } + if (strcmp(dp->pdr_keeper, keeper) == 0) { + ret += pm_apply_recorded_dep(dip, dp); + } + } else { + if (strcmp(dp->pdr_keeper, keeper) != 0) + continue; + for (i = 0; i < dp->pdr_kept_count; i++) { + if (dp->pdr_kept_paths[i] == NULL) + continue; + kept = pm_name_to_dip(dp->pdr_kept_paths[i], 1); + if (kept == NULL) + continue; + ASSERT(ddi_prop_exists(DDI_DEV_T_ANY, kept, + DDI_PROP_DONTPASS, dp->pdr_kept)); + PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), " + "kept=%s@%s(%s#%d) keptcnt=%d\n", + pmf, PM_DEVICE(dip), PM_DEVICE(kept), + dp->pdr_kept_count)) + if (kept != dip) { + ret += pm_set_keeping(dip, kept); + } + ddi_release_devi(kept); + } + + } + } + ddi_release_devi(dip); + return (ret); +} + +/* + * Should this device be kept up by another device? + * Look up all dependency recorded from PM_ADD_DEPENDENT and + * PM_ADD_DEPENDENT_PROPERTY ioctls. Record down on the keeper's + * kept device lists. + */ +static int +pm_kept(char *keptp) +{ + PMD_FUNC(pmf, "kept") + pm_pdr_t *dp; + int found = 0; + int ret = 0; + dev_info_t *keeper; + dev_info_t *kept; + size_t length; + int i; + char **paths; + char *path; + + ASSERT(keptp != NULL); + kept = pm_name_to_dip(keptp, 1); + if (kept == NULL) + return (0); + PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(kept))) + for (dp = pm_dep_head; dp; dp = dp->pdr_next) { + if (dp->pdr_isprop) { + PMD(PMD_KEEPS, ("%s: property %s\n", pmf, dp->pdr_kept)) + if (ddi_prop_exists(DDI_DEV_T_ANY, kept, + DDI_PROP_DONTPASS, dp->pdr_kept)) { + /* + * Dont allow self dependency. + */ + if (strcmp(dp->pdr_keeper, keptp) == 0) + continue; + keeper = pm_name_to_dip(dp->pdr_keeper, 1); + if (keeper == NULL) + continue; + PMD(PMD_KEEPS, ("%s: adding to kepts path list " + "%p\n", pmf, (void *)kept)) +#ifdef DEBUG + if (pm_debug & PMD_DEP) + prdeps("Before Adding from pm_kept\n"); +#endif + /* + * Add ourselves to the dip list. + */ + if (dp->pdr_kept_count == 0) { + length = strlen(keptp) + 1; + path = + kmem_alloc(length, KM_SLEEP); + paths = kmem_alloc(sizeof (char **), + KM_SLEEP); + (void) strcpy(path, keptp); + paths[0] = path; + dp->pdr_kept_paths = paths; + dp->pdr_kept_count++; + } else { + /* Check to see if already on list */ + for (i = 0; i < dp->pdr_kept_count; + i++) { + if (strcmp(keptp, + dp->pdr_kept_paths[i]) + == 0) { + found++; + break; + } + } + if (found) { + ddi_release_devi(keeper); + continue; + } + length = dp->pdr_kept_count * + sizeof (char **); + paths = kmem_alloc( + length + sizeof (char **), + KM_SLEEP); + if (dp->pdr_kept_count) { + bcopy(dp->pdr_kept_paths, + paths, length); + kmem_free(dp->pdr_kept_paths, + length); + } + dp->pdr_kept_paths = paths; + length = strlen(keptp) + 1; + path = + kmem_alloc(length, KM_SLEEP); + (void) strcpy(path, keptp); + dp->pdr_kept_paths[i] = path; + dp->pdr_kept_count++; + } +#ifdef DEBUG + if (pm_debug & PMD_DEP) + prdeps("After from pm_kept\n"); +#endif + if (keeper) { + ret += pm_set_keeping(keeper, kept); + ddi_release_devi(keeper); + } + } + } else { + /* + * pm_keeper would be called later to do + * the actual pm_set_keeping. + */ + PMD(PMD_KEEPS, ("%s: adding to kepts path list %p\n", + pmf, (void *)kept)) +#ifdef DEBUG + if (pm_debug & PMD_DEP) + prdeps("Before Adding from pm_kept\n"); +#endif + if (strcmp(keptp, dp->pdr_kept) == 0) { + if (dp->pdr_kept_paths == NULL) { + length = strlen(keptp) + 1; + path = + kmem_alloc(length, KM_SLEEP); + paths = kmem_alloc(sizeof (char **), + KM_SLEEP); + (void) strcpy(path, keptp); + paths[0] = path; + dp->pdr_kept_paths = paths; + dp->pdr_kept_count++; + } + } +#ifdef DEBUG + if (pm_debug & PMD_DEP) + prdeps("After from pm_kept\n"); +#endif + } + } + ddi_release_devi(kept); + return (ret); +} + +/* + * Apply a recorded dependency. dp specifies the dependency, and + * keeper is already known to be the device that keeps up the other (kept) one. + * We have to the whole tree for the "kept" device, then apply + * the dependency (which may already be applied). + */ +int +pm_apply_recorded_dep(dev_info_t *keeper, pm_pdr_t *dp) +{ + PMD_FUNC(pmf, "apply_recorded_dep") + dev_info_t *kept = NULL; + int ret = 0; + char *keptp = NULL; + + /* + * Device to Device dependency can only be 1 to 1. + */ + if (dp->pdr_kept_paths == NULL) + return (0); + keptp = dp->pdr_kept_paths[0]; + if (keptp == NULL) + return (0); + ASSERT(*keptp != '\0'); + kept = pm_name_to_dip(keptp, 1); + if (kept == NULL) + return (0); + if (kept) { + PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf, + dp->pdr_keeper, keptp)) + if (pm_set_keeping(keeper, kept)) { + ASSERT(dp->pdr_satisfied == 0); + dp->pdr_satisfied = 1; + ASSERT(pm_unresolved_deps); + pm_unresolved_deps--; + ret++; + } + } + ddi_release_devi(kept); + + return (ret); +} + +/* + * Called from common/io/pm.c + */ +int +pm_cur_power(pm_component_t *cp) +{ + return (cur_power(cp)); +} + +/* + * External interface to sanity-check a power level. + */ +int +pm_valid_power(dev_info_t *dip, int comp, int level) +{ + PMD_FUNC(pmf, "valid_power") + + if (comp >= 0 && comp < PM_NUMCMPTS(dip) && level >= 0) + return (e_pm_valid_power(dip, comp, level)); + else { + PMD(PMD_FAIL, ("%s: comp=%d, ncomp=%d, level=%d\n", + pmf, comp, PM_NUMCMPTS(dip), level)) + return (0); + } +} + +/* + * Called when a device that is direct power managed needs to change state. + * This routine arranges to block the request until the process managing + * the device makes the change (or some other incompatible change) or + * the process closes /dev/pm. + */ +static int +pm_block(dev_info_t *dip, int comp, int newpower, int oldpower) +{ + pm_rsvp_t *new = kmem_zalloc(sizeof (*new), KM_SLEEP); + int ret = 0; + void pm_dequeue_blocked(pm_rsvp_t *); + void pm_enqueue_blocked(pm_rsvp_t *); + + ASSERT(!pm_processes_stopped); + ASSERT(PM_IAM_LOCKING_DIP(dip)); + new->pr_dip = dip; + new->pr_comp = comp; + new->pr_newlevel = newpower; + new->pr_oldlevel = oldpower; + cv_init(&new->pr_cv, NULL, CV_DEFAULT, NULL); + mutex_enter(&pm_rsvp_lock); + pm_enqueue_blocked(new); + pm_enqueue_notify(PSC_PENDING_CHANGE, dip, comp, newpower, oldpower, + PM_CANBLOCK_BLOCK); + PM_UNLOCK_DIP(dip); + /* + * truss may make the cv_wait_sig return prematurely + */ + while (ret == 0) { + /* + * Normally there will be no user context involved, but if + * there is (e.g. we are here via an ioctl call to a driver) + * then we should allow the process to abort the request, + * or we get an unkillable process if the same thread does + * PM_DIRECT_PM and pm_raise_power + */ + if (cv_wait_sig(&new->pr_cv, &pm_rsvp_lock) == 0) { + ret = PMP_FAIL; + } else { + ret = new->pr_retval; + } + } + pm_dequeue_blocked(new); + mutex_exit(&pm_rsvp_lock); + cv_destroy(&new->pr_cv); + kmem_free(new, sizeof (*new)); + return (ret); +} + +/* + * Returns true if the process is interested in power level changes (has issued + * PM_GET_STATE_CHANGE ioctl). + */ +int +pm_interest_registered(int clone) +{ + ASSERT(clone >= 0 && clone < PM_MAX_CLONE - 1); + return (pm_interest[clone]); +} + +/* + * Process with clone has just done PM_DIRECT_PM on dip, or has asked to + * watch all state transitions (dip == NULL). Set up data + * structs to communicate with process about state changes. + */ +void +pm_register_watcher(int clone, dev_info_t *dip) +{ + pscc_t *p; + psce_t *psce; + static void pm_enqueue_pscc(pscc_t *, pscc_t **); + + /* + * We definitely need a control struct, then we have to search to see + * there is already an entries struct (in the dip != NULL case). + */ + pscc_t *pscc = kmem_zalloc(sizeof (*pscc), KM_SLEEP); + pscc->pscc_clone = clone; + pscc->pscc_dip = dip; + + if (dip) { + int found = 0; + rw_enter(&pm_pscc_direct_rwlock, RW_WRITER); + for (p = pm_pscc_direct; p; p = p->pscc_next) { + /* + * Already an entry for this clone, so just use it + * for the new one (for the case where a single + * process is watching multiple devices) + */ + if (p->pscc_clone == clone) { + ASSERT(p->pscc_dip != dip); + pscc->pscc_entries = p->pscc_entries; + pscc->pscc_entries->psce_references++; + found++; + } + } + if (!found) { /* create a new one */ + psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP); + mutex_init(&psce->psce_lock, NULL, MUTEX_DEFAULT, NULL); + psce->psce_first = + kmem_zalloc(sizeof (pm_state_change_t) * PSCCOUNT, + KM_SLEEP); + psce->psce_in = psce->psce_out = psce->psce_first; + psce->psce_last = &psce->psce_first[PSCCOUNT - 1]; + psce->psce_references = 1; + pscc->pscc_entries = psce; + } + pm_enqueue_pscc(pscc, &pm_pscc_direct); + rw_exit(&pm_pscc_direct_rwlock); + } else { + ASSERT(!pm_interest_registered(clone)); + rw_enter(&pm_pscc_interest_rwlock, RW_WRITER); +#ifdef DEBUG + for (p = pm_pscc_interest; p; p = p->pscc_next) { + /* + * Should not be an entry for this clone! + */ + ASSERT(p->pscc_clone != clone); + } +#endif + psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP); + psce->psce_first = kmem_zalloc(sizeof (pm_state_change_t) * + PSCCOUNT, KM_SLEEP); + psce->psce_in = psce->psce_out = psce->psce_first; + psce->psce_last = &psce->psce_first[PSCCOUNT - 1]; + psce->psce_references = 1; + pscc->pscc_entries = psce; + pm_enqueue_pscc(pscc, &pm_pscc_interest); + pm_interest[clone] = 1; + rw_exit(&pm_pscc_interest_rwlock); + } +} + +/* + * Remove the given entry from the blocked list + */ +void +pm_dequeue_blocked(pm_rsvp_t *p) +{ + ASSERT(MUTEX_HELD(&pm_rsvp_lock)); + if (pm_blocked_list == p) { + ASSERT(p->pr_prev == NULL); + if (p->pr_next != NULL) + p->pr_next->pr_prev = NULL; + pm_blocked_list = p->pr_next; + } else { + ASSERT(p->pr_prev != NULL); + p->pr_prev->pr_next = p->pr_next; + if (p->pr_next != NULL) + p->pr_next->pr_prev = p->pr_prev; + } +} + +/* + * Remove the given control struct from the given list + */ +static void +pm_dequeue_pscc(pscc_t *p, pscc_t **list) +{ + if (*list == p) { + ASSERT(p->pscc_prev == NULL); + if (p->pscc_next != NULL) + p->pscc_next->pscc_prev = NULL; + *list = p->pscc_next; + } else { + ASSERT(p->pscc_prev != NULL); + p->pscc_prev->pscc_next = p->pscc_next; + if (p->pscc_next != NULL) + p->pscc_next->pscc_prev = p->pscc_prev; + } +} + +/* + * Stick the control struct specified on the front of the list + */ +static void +pm_enqueue_pscc(pscc_t *p, pscc_t **list) +{ + pscc_t *h; /* entry at head of list */ + if ((h = *list) == NULL) { + *list = p; + ASSERT(p->pscc_next == NULL); + ASSERT(p->pscc_prev == NULL); + } else { + p->pscc_next = h; + ASSERT(h->pscc_prev == NULL); + h->pscc_prev = p; + ASSERT(p->pscc_prev == NULL); + *list = p; + } +} + +/* + * If dip is NULL, process is closing "clone" clean up all its registrations. + * Otherwise only clean up those for dip because process is just giving up + * control of a direct device. + */ +void +pm_deregister_watcher(int clone, dev_info_t *dip) +{ + pscc_t *p, *pn; + psce_t *psce; + int found = 0; + + if (dip == NULL) { + rw_enter(&pm_pscc_interest_rwlock, RW_WRITER); + for (p = pm_pscc_interest; p; p = pn) { + pn = p->pscc_next; + if (p->pscc_clone == clone) { + pm_dequeue_pscc(p, &pm_pscc_interest); + psce = p->pscc_entries; + ASSERT(psce->psce_references == 1); + mutex_destroy(&psce->psce_lock); + kmem_free(psce->psce_first, + sizeof (pm_state_change_t) * PSCCOUNT); + kmem_free(psce, sizeof (*psce)); + kmem_free(p, sizeof (*p)); + } + } + pm_interest[clone] = 0; + rw_exit(&pm_pscc_interest_rwlock); + } + found = 0; + rw_enter(&pm_pscc_direct_rwlock, RW_WRITER); + for (p = pm_pscc_direct; p; p = pn) { + pn = p->pscc_next; + if ((dip && p->pscc_dip == dip) || + (dip == NULL && clone == p->pscc_clone)) { + ASSERT(clone == p->pscc_clone); + found++; + /* + * Remove from control list + */ + pm_dequeue_pscc(p, &pm_pscc_direct); + /* + * If we're the last reference, free the + * entries struct. + */ + psce = p->pscc_entries; + ASSERT(psce); + if (psce->psce_references == 1) { + kmem_free(psce->psce_first, + PSCCOUNT * sizeof (pm_state_change_t)); + kmem_free(psce, sizeof (*psce)); + } else { + psce->psce_references--; + } + kmem_free(p, sizeof (*p)); + } + } + ASSERT(dip == NULL || found); + rw_exit(&pm_pscc_direct_rwlock); +} + +/* + * Search the indicated list for an entry that matches clone, and return a + * pointer to it. To be interesting, the entry must have something ready to + * be passed up to the controlling process. + * The returned entry will be locked upon return from this call. + */ +static psce_t * +pm_psc_find_clone(int clone, pscc_t **list, krwlock_t *lock) +{ + pscc_t *p; + psce_t *psce; + rw_enter(lock, RW_READER); + for (p = *list; p; p = p->pscc_next) { + if (clone == p->pscc_clone) { + psce = p->pscc_entries; + mutex_enter(&psce->psce_lock); + if (psce->psce_out->size) { + rw_exit(lock); + return (psce); + } else { + mutex_exit(&psce->psce_lock); + } + } + } + rw_exit(lock); + return (NULL); +} + +/* + * Find an entry for a particular clone in the direct list. + */ +psce_t * +pm_psc_clone_to_direct(int clone) +{ + static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *); + return (pm_psc_find_clone(clone, &pm_pscc_direct, + &pm_pscc_direct_rwlock)); +} + +/* + * Find an entry for a particular clone in the interest list. + */ +psce_t * +pm_psc_clone_to_interest(int clone) +{ + static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *); + return (pm_psc_find_clone(clone, &pm_pscc_interest, + &pm_pscc_interest_rwlock)); +} + +/* + * Put the given entry at the head of the blocked list + */ +void +pm_enqueue_blocked(pm_rsvp_t *p) +{ + ASSERT(MUTEX_HELD(&pm_rsvp_lock)); + ASSERT(p->pr_next == NULL); + ASSERT(p->pr_prev == NULL); + if (pm_blocked_list != NULL) { + p->pr_next = pm_blocked_list; + ASSERT(pm_blocked_list->pr_prev == NULL); + pm_blocked_list->pr_prev = p; + pm_blocked_list = p; + } else { + pm_blocked_list = p; + } +} + +/* + * Sets every power managed device back to its default threshold + */ +void +pm_all_to_default_thresholds(void) +{ + ddi_walk_devs(ddi_root_node(), pm_set_dev_thr_walk, + (void *) &pm_system_idle_threshold); +} + +static int +pm_set_dev_thr_walk(dev_info_t *dip, void *arg) +{ + int thr = (int)(*(int *)arg); + + if (!PM_GET_PM_INFO(dip)) + return (DDI_WALK_CONTINUE); + pm_set_device_threshold(dip, thr, PMC_DEF_THRESH); + return (DDI_WALK_CONTINUE); +} + +/* + * Returns the current threshold value (in seconds) for the indicated component + */ +int +pm_current_threshold(dev_info_t *dip, int comp, int *threshp) +{ + if (comp < 0 || comp >= PM_NUMCMPTS(dip)) { + return (DDI_FAILURE); + } else { + *threshp = cur_threshold(dip, comp); + return (DDI_SUCCESS); + } +} + +/* + * To be called when changing the power level of a component of a device. + * On some platforms, changing power on one device may require that power + * be changed on other, related devices in the same transaction. Thus, we + * always pass this request to the platform power manager so that all the + * affected devices will be locked. + */ +void +pm_lock_power(dev_info_t *dip, int *circp) +{ + power_req_t power_req; + int result; + + power_req.request_type = PMR_PPM_LOCK_POWER; + power_req.req.ppm_lock_power_req.who = dip; + power_req.req.ppm_lock_power_req.circp = circp; + (void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result); +} + +/* + * Release the lock (or locks) acquired to change the power of a device. + * See comments for pm_lock_power. + */ +void +pm_unlock_power(dev_info_t *dip, int circ) +{ + power_req_t power_req; + int result; + + power_req.request_type = PMR_PPM_UNLOCK_POWER; + power_req.req.ppm_unlock_power_req.who = dip; + power_req.req.ppm_unlock_power_req.circ = circ; + (void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result); +} + + +/* + * Attempt (without blocking) to acquire the lock(s) needed to change the + * power of a component of a device. See comments for pm_lock_power. + * + * Return: 1 if lock(s) acquired, 0 if not. + */ +int +pm_try_locking_power(dev_info_t *dip, int *circp) +{ + power_req_t power_req; + int result; + + power_req.request_type = PMR_PPM_TRY_LOCK_POWER; + power_req.req.ppm_lock_power_req.who = dip; + power_req.req.ppm_lock_power_req.circp = circp; + (void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result); + return (result); +} + + +/* + * Lock power state of a device. + * + * The implementation handles a special case where another thread may have + * acquired the lock and created/launched this thread to do the work. If + * the lock cannot be acquired immediately, we check to see if this thread + * is registered as a borrower of the lock. If so, we may proceed without + * the lock. This assumes that the lending thread blocks on the completion + * of this thread. + * + * Note 1: for use by ppm only. + * + * Note 2: On failing to get the lock immediately, we search lock_loan list + * for curthread (as borrower of the lock). On a hit, we check that the + * lending thread already owns the lock we want. It is safe to compare + * devi_busy_thread and thread id of the lender because in the == case (the + * only one we care about) we know that the owner is blocked. Similarly, + * If we find that curthread isn't registered as a lock borrower, it is safe + * to use the blocking call (ndi_devi_enter) because we know that if we + * weren't already listed as a borrower (upstream on the call stack) we won't + * become one. + */ +void +pm_lock_power_single(dev_info_t *dip, int *circp) +{ + lock_loan_t *cur; + + /* if the lock is available, we are done. */ + if (ndi_devi_tryenter(dip, circp)) + return; + + mutex_enter(&pm_loan_lock); + /* see if our thread is registered as a lock borrower. */ + for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next) + if (cur->pmlk_borrower == curthread) + break; + mutex_exit(&pm_loan_lock); + + /* if this thread not already registered, it is safe to block */ + if (cur == NULL) + ndi_devi_enter(dip, circp); + else { + /* registered: does lender own the lock we want? */ + if (cur->pmlk_lender == DEVI(dip)->devi_busy_thread) { + ASSERT(cur->pmlk_dip == NULL || cur->pmlk_dip == dip); + cur->pmlk_dip = dip; + } else /* no: just block for it */ + ndi_devi_enter(dip, circp); + + } +} + +/* + * Drop the lock on the device's power state. See comment for + * pm_lock_power_single() for special implementation considerations. + * + * Note: for use by ppm only. + */ +void +pm_unlock_power_single(dev_info_t *dip, int circ) +{ + lock_loan_t *cur; + + /* optimization: mutex not needed to check empty list */ + if (lock_loan_head.pmlk_next == NULL) { + ndi_devi_exit(dip, circ); + return; + } + + mutex_enter(&pm_loan_lock); + /* see if our thread is registered as a lock borrower. */ + for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next) + if (cur->pmlk_borrower == curthread) + break; + mutex_exit(&pm_loan_lock); + + if (cur == NULL || cur->pmlk_dip != dip) + /* we acquired the lock directly, so return it */ + ndi_devi_exit(dip, circ); +} + +/* + * Try to take the lock for changing the power level of a component. + * + * Note: for use by ppm only. + */ +int +pm_try_locking_power_single(dev_info_t *dip, int *circp) +{ + return (ndi_devi_tryenter(dip, circp)); +} + +#ifdef DEBUG +/* + * The following are used only to print out data structures for debugging + */ +void +prdeps(char *msg) +{ + + pm_pdr_t *rp; + int i; + + pm_log("pm_dep_head %s %p\n", msg, (void *)pm_dep_head); + for (rp = pm_dep_head; rp; rp = rp->pdr_next) { + pm_log("%p: %s keeper %s, kept %s, kept count %d, next %p\n", + (void *)rp, (rp->pdr_isprop ? "property" : "device"), + rp->pdr_keeper, rp->pdr_kept, rp->pdr_kept_count, + (void *)rp->pdr_next); + if (rp->pdr_kept_count != 0) { + pm_log("kept list = "); + i = 0; + while (i < rp->pdr_kept_count) { + pm_log("%s ", rp->pdr_kept_paths[i]); + i++; + } + pm_log("\n"); + } + } +} + +void +pr_noinvol(char *hdr) +{ + pm_noinvol_t *ip; + + pm_log("%s\n", hdr); + rw_enter(&pm_noinvol_rwlock, RW_READER); + for (ip = pm_noinvol_head; ip; ip = ip->ni_next) + pm_log("\tmaj %d, flags %x, noinvolpm %d %s\n", + ip->ni_major, ip->ni_flags, ip->ni_noinvolpm, ip->ni_path); + rw_exit(&pm_noinvol_rwlock); +} +#endif + +/* + * Attempt to apply the thresholds indicated by rp to the node specified by + * dip. + */ +void +pm_apply_recorded_thresh(dev_info_t *dip, pm_thresh_rec_t *rp) +{ + PMD_FUNC(pmf, "apply_recorded_thresh") + int i, j; + int comps = PM_NUMCMPTS(dip); + struct pm_component *cp; + pm_pte_t *ep; + int pm_valid_thresh(dev_info_t *, pm_thresh_rec_t *); + + PMD(PMD_THRESH, ("%s: part: %s@%s(%s#%d), rp %p, %s\n", pmf, + PM_DEVICE(dip), (void *)rp, rp->ptr_physpath)) + PM_LOCK_DIP(dip); + if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip) || !pm_valid_thresh(dip, rp)) { + PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_GET_PM_INFO %p\n", + pmf, PM_DEVICE(dip), (void*)PM_GET_PM_INFO(dip))) + PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_ISBC %d\n", + pmf, PM_DEVICE(dip), PM_ISBC(dip))) + PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) pm_valid_thresh %d\n", + pmf, PM_DEVICE(dip), pm_valid_thresh(dip, rp))) + PM_UNLOCK_DIP(dip); + return; + } + + ep = rp->ptr_entries; + /* + * Here we do the special case of a device threshold + */ + if (rp->ptr_numcomps == 0) { /* PM_SET_DEVICE_THRESHOLD product */ + ASSERT(ep && ep->pte_numthresh == 1); + PMD(PMD_THRESH, ("%s: set dev thr %s@%s(%s#%d) to 0x%x\n", + pmf, PM_DEVICE(dip), ep->pte_thresh[0])) + PM_UNLOCK_DIP(dip); + pm_set_device_threshold(dip, ep->pte_thresh[0], PMC_DEV_THRESH); + if (autopm_enabled) + pm_rescan(dip); + return; + } + for (i = 0; i < comps; i++) { + cp = PM_CP(dip, i); + for (j = 0; j < ep->pte_numthresh; j++) { + PMD(PMD_THRESH, ("%s: set thr %d for %s@%s(%s#%d)[%d] " + "to %x\n", pmf, j, PM_DEVICE(dip), + i, ep->pte_thresh[j])) + cp->pmc_comp.pmc_thresh[j + 1] = ep->pte_thresh[j]; + } + ep++; + } + DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE; + DEVI(dip)->devi_pm_flags |= PMC_COMP_THRESH; + PM_UNLOCK_DIP(dip); + + if (autopm_enabled) + pm_rescan(dip); +} + +/* + * Returns true if the threshold specified by rp could be applied to dip + * (that is, the number of components and transitions are the same) + */ +int +pm_valid_thresh(dev_info_t *dip, pm_thresh_rec_t *rp) +{ + PMD_FUNC(pmf, "valid_thresh") + int comps, i; + pm_component_t *cp; + pm_pte_t *ep; + + if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip)) { + PMD(PMD_ERROR, ("%s: %s: no pm_info or BC\n", pmf, + rp->ptr_physpath)) + return (0); + } + /* + * Special case: we represent the PM_SET_DEVICE_THRESHOLD case by + * an entry with numcomps == 0, (since we don't know how many + * components there are in advance). This is always a valid + * spec. + */ + if (rp->ptr_numcomps == 0) { + ASSERT(rp->ptr_entries && rp->ptr_entries->pte_numthresh == 1); + return (1); + } + if (rp->ptr_numcomps != (comps = PM_NUMCMPTS(dip))) { + PMD(PMD_ERROR, ("%s: comp # mm (dip %d cmd %d) for %s\n", + pmf, PM_NUMCMPTS(dip), rp->ptr_numcomps, rp->ptr_physpath)) + return (0); + } + ep = rp->ptr_entries; + for (i = 0; i < comps; i++) { + cp = PM_CP(dip, i); + if ((ep + i)->pte_numthresh != + cp->pmc_comp.pmc_numlevels - 1) { + PMD(PMD_ERROR, ("%s: %s[%d]: thresh=%d, record=%d\n", + pmf, rp->ptr_physpath, i, + cp->pmc_comp.pmc_numlevels - 1, + (ep + i)->pte_numthresh)) + return (0); + } + } + return (1); +} + +/* + * Remove any recorded threshold for device physpath + * We know there will be at most one. + */ +void +pm_unrecord_threshold(char *physpath) +{ + pm_thresh_rec_t *pptr, *ptr; + + rw_enter(&pm_thresh_rwlock, RW_WRITER); + for (pptr = NULL, ptr = pm_thresh_head; ptr; ptr = ptr->ptr_next) { + if (strcmp(physpath, ptr->ptr_physpath) == 0) { + if (pptr) { + pptr->ptr_next = ptr->ptr_next; + } else { + ASSERT(pm_thresh_head == ptr); + pm_thresh_head = ptr->ptr_next; + } + kmem_free(ptr, ptr->ptr_size); + break; + } + pptr = ptr; + } + rw_exit(&pm_thresh_rwlock); +} + +/* + * Discard all recorded thresholds. We are returning to the default pm state. + */ +void +pm_discard_thresholds(void) +{ + pm_thresh_rec_t *rp; + rw_enter(&pm_thresh_rwlock, RW_WRITER); + while (pm_thresh_head) { + rp = pm_thresh_head; + pm_thresh_head = rp->ptr_next; + kmem_free(rp, rp->ptr_size); + } + rw_exit(&pm_thresh_rwlock); +} + +/* + * Discard all recorded dependencies. We are returning to the default pm state. + */ +void +pm_discard_dependencies(void) +{ + pm_pdr_t *rp; + int i; + size_t length; + +#ifdef DEBUG + if (pm_debug & PMD_DEP) + prdeps("Before discard\n"); +#endif + ddi_walk_devs(ddi_root_node(), pm_discard_dep_walk, NULL); + +#ifdef DEBUG + if (pm_debug & PMD_DEP) + prdeps("After discard\n"); +#endif + while (pm_dep_head) { + rp = pm_dep_head; + if (!rp->pdr_isprop) { + ASSERT(rp->pdr_satisfied == 0); + ASSERT(pm_unresolved_deps); + pm_unresolved_deps--; + } else { + ASSERT(pm_prop_deps); + pm_prop_deps--; + } + pm_dep_head = rp->pdr_next; + if (rp->pdr_kept_count) { + for (i = 0; i < rp->pdr_kept_count; i++) { + length = strlen(rp->pdr_kept_paths[i]) + 1; + kmem_free(rp->pdr_kept_paths[i], length); + } + kmem_free(rp->pdr_kept_paths, + rp->pdr_kept_count * sizeof (char **)); + } + kmem_free(rp, rp->pdr_size); + } +} + + +static int +pm_discard_dep_walk(dev_info_t *dip, void *arg) +{ + _NOTE(ARGUNUSED(arg)) + char *pathbuf; + + if (PM_GET_PM_INFO(dip) == NULL) + return (DDI_WALK_CONTINUE); + pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, pathbuf); + pm_free_keeper(pathbuf, 0); + kmem_free(pathbuf, MAXPATHLEN); + return (DDI_WALK_CONTINUE); +} + +static int +pm_kept_walk(dev_info_t *dip, void *arg) +{ + _NOTE(ARGUNUSED(arg)) + char *pathbuf; + + pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, pathbuf); + (void) pm_kept(pathbuf); + kmem_free(pathbuf, MAXPATHLEN); + + return (DDI_WALK_CONTINUE); +} + +static int +pm_keeper_walk(dev_info_t *dip, void *arg) +{ + _NOTE(ARGUNUSED(arg)) + char *pathbuf; + + pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, pathbuf); + (void) pm_keeper(pathbuf); + kmem_free(pathbuf, MAXPATHLEN); + + return (DDI_WALK_CONTINUE); +} + +static char * +pdw_type_decode(int type) +{ + switch (type) { + case PM_DEP_WK_POWER_ON: + return ("power on"); + case PM_DEP_WK_POWER_OFF: + return ("power off"); + case PM_DEP_WK_DETACH: + return ("detach"); + case PM_DEP_WK_REMOVE_DEP: + return ("remove dep"); + case PM_DEP_WK_BRINGUP_SELF: + return ("bringup self"); + case PM_DEP_WK_RECORD_KEEPER: + return ("add dependent"); + case PM_DEP_WK_RECORD_KEEPER_PROP: + return ("add dependent property"); + case PM_DEP_WK_KEPT: + return ("kept"); + case PM_DEP_WK_KEEPER: + return ("keeper"); + case PM_DEP_WK_ATTACH: + return ("attach"); + case PM_DEP_WK_CHECK_KEPT: + return ("check kept"); + case PM_DEP_WK_CPR_SUSPEND: + return ("suspend"); + case PM_DEP_WK_CPR_RESUME: + return ("resume"); + default: + return ("unknown"); + } + +} + +static void +pm_rele_dep(char *keeper) +{ + PMD_FUNC(pmf, "rele_dep") + pm_pdr_t *dp; + char *kept_path = NULL; + dev_info_t *kept = NULL; + int count = 0; + + for (dp = pm_dep_head; dp; dp = dp->pdr_next) { + if (strcmp(dp->pdr_keeper, keeper) != 0) + continue; + for (count = 0; count < dp->pdr_kept_count; count++) { + kept_path = dp->pdr_kept_paths[count]; + if (kept_path == NULL) + continue; + kept = pm_name_to_dip(kept_path, 1); + if (kept) { + PMD(PMD_KEEPS, ("%s: release kept=%s@%s(%s#%d) " + "of keeper=%s\n", pmf, PM_DEVICE(kept), + keeper)) + ASSERT(DEVI(kept)->devi_pm_kidsupcnt > 0); + pm_rele_power(kept); + ddi_release_devi(kept); + } + } + } +} + +/* + * Called when we are just released from direct PM. Bring ourself up + * if our keeper is up since dependency is not honored while a kept + * device is under direct PM. + */ +static void +pm_bring_self_up(char *keptpath) +{ + PMD_FUNC(pmf, "bring_self_up") + dev_info_t *kept; + dev_info_t *keeper; + pm_pdr_t *dp; + int i, j; + int up = 0, circ; + + kept = pm_name_to_dip(keptpath, 1); + if (kept == NULL) + return; + PMD(PMD_KEEPS, ("%s: kept=%s@%s(%s#%d)\n", pmf, PM_DEVICE(kept))) + for (dp = pm_dep_head; dp; dp = dp->pdr_next) { + if (dp->pdr_kept_count == 0) + continue; + for (i = 0; i < dp->pdr_kept_count; i++) { + if (strcmp(dp->pdr_kept_paths[i], keptpath) != 0) + continue; + keeper = pm_name_to_dip(dp->pdr_keeper, 1); + if (keeper) { + PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d)\n", + pmf, PM_DEVICE(keeper))) + PM_LOCK_POWER(keeper, &circ); + for (j = 0; j < PM_NUMCMPTS(keeper); + j++) { + if (PM_CURPOWER(keeper, j)) { + PMD(PMD_KEEPS, ("%s: comp=" + "%d is up\n", pmf, j)) + up++; + } + } + if (up) { + if (PM_SKBU(kept)) + DEVI(kept)->devi_pm_flags &= + ~PMC_SKIP_BRINGUP; + bring_pmdep_up(kept, 1); + } + PM_UNLOCK_POWER(keeper, circ); + ddi_release_devi(keeper); + } + } + } + ddi_release_devi(kept); +} + +static void +pm_process_dep_request(pm_dep_wk_t *work) +{ + PMD_FUNC(pmf, "dep_req") + int ret; + + PMD(PMD_DEP, ("%s: work=%s\n", pmf, + pdw_type_decode(work->pdw_type))) + PMD(PMD_DEP, ("%s: keeper=%s, kept=%s\n", pmf, + (work->pdw_keeper ? work->pdw_keeper : "NULL"), + (work->pdw_kept ? work->pdw_kept : "NULL"))) + + switch (work->pdw_type) { + case PM_DEP_WK_POWER_ON: + /* Bring up the kept devices and put a hold on them */ + bring_wekeeps_up(work->pdw_keeper); + break; + case PM_DEP_WK_POWER_OFF: + /* Release the kept devices */ + pm_rele_dep(work->pdw_keeper); + break; + case PM_DEP_WK_DETACH: + pm_free_keeps(work->pdw_keeper, work->pdw_pwr); + break; + case PM_DEP_WK_REMOVE_DEP: + pm_discard_dependencies(); + break; + case PM_DEP_WK_BRINGUP_SELF: + /* + * We deferred satisfying our dependency till now, so satisfy + * it again and bring ourselves up. + */ + pm_bring_self_up(work->pdw_kept); + break; + case PM_DEP_WK_RECORD_KEEPER: + (void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 0); + ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL); + ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL); + break; + case PM_DEP_WK_RECORD_KEEPER_PROP: + (void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 1); + ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL); + ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL); + break; + case PM_DEP_WK_KEPT: + ret = pm_kept(work->pdw_kept); + PMD(PMD_DEP, ("%s: PM_DEP_WK_KEPT: pm_kept returns %d\n", pmf, + ret)) + break; + case PM_DEP_WK_KEEPER: + ret = pm_keeper(work->pdw_keeper); + PMD(PMD_DEP, ("%s: PM_DEP_WK_KEEPER: pm_keeper returns %d\n", + pmf, ret)) + break; + case PM_DEP_WK_ATTACH: + ret = pm_keeper(work->pdw_keeper); + PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_keeper returns %d\n", + pmf, ret)) + ret = pm_kept(work->pdw_kept); + PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_kept returns %d\n", + pmf, ret)) + break; + case PM_DEP_WK_CHECK_KEPT: + ret = pm_is_kept(work->pdw_kept); + PMD(PMD_DEP, ("%s: PM_DEP_WK_CHECK_KEPT: kept=%s, ret=%d\n", + pmf, work->pdw_kept, ret)) + break; + case PM_DEP_WK_CPR_SUSPEND: + pm_discard_dependencies(); + break; + case PM_DEP_WK_CPR_RESUME: + ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL); + ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL); + break; + default: + ASSERT(0); + break; + } + /* + * Free the work structure if the requester is not waiting + * Otherwise it is the requester's responsiblity to free it. + */ + if (!work->pdw_wait) { + if (work->pdw_keeper) + kmem_free(work->pdw_keeper, + strlen(work->pdw_keeper) + 1); + if (work->pdw_kept) + kmem_free(work->pdw_kept, strlen(work->pdw_kept) + 1); + kmem_free(work, sizeof (pm_dep_wk_t)); + } else { + /* + * Notify requester if it is waiting for it. + */ + work->pdw_ret = ret; + work->pdw_done = 1; + cv_signal(&work->pdw_cv); + } +} + +/* + * Process PM dependency requests. + */ +static void +pm_dep_thread(void) +{ + pm_dep_wk_t *work; + callb_cpr_t cprinfo; + + CALLB_CPR_INIT(&cprinfo, &pm_dep_thread_lock, callb_generic_cpr, + "pm_dep_thread"); + for (;;) { + mutex_enter(&pm_dep_thread_lock); + if (pm_dep_thread_workq == NULL) { + CALLB_CPR_SAFE_BEGIN(&cprinfo); + cv_wait(&pm_dep_thread_cv, &pm_dep_thread_lock); + CALLB_CPR_SAFE_END(&cprinfo, &pm_dep_thread_lock); + } + work = pm_dep_thread_workq; + pm_dep_thread_workq = work->pdw_next; + if (pm_dep_thread_tail == work) + pm_dep_thread_tail = work->pdw_next; + mutex_exit(&pm_dep_thread_lock); + pm_process_dep_request(work); + + } + /*NOTREACHED*/ +} + +/* + * Set the power level of the indicated device to unknown (if it is not a + * backwards compatible device), as it has just been resumed, and it won't + * know if the power was removed or not. Adjust parent's kidsupcnt if necessary. + */ +void +pm_forget_power_level(dev_info_t *dip) +{ + dev_info_t *pdip = ddi_get_parent(dip); + int i, count = 0; + + if (!PM_ISBC(dip)) { + for (i = 0; i < PM_NUMCMPTS(dip); i++) + count += (PM_CURPOWER(dip, i) == 0); + + if (count && pdip && !PM_WANTS_NOTIFICATION(pdip)) + e_pm_hold_rele_power(pdip, count); + + /* + * Count this as a power cycle if we care + */ + if (DEVI(dip)->devi_pm_volpmd && + PM_CP(dip, 0)->pmc_cur_pwr == 0) + DEVI(dip)->devi_pm_volpmd = 0; + for (i = 0; i < PM_NUMCMPTS(dip); i++) + e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN); + } +} + +/* + * This function advises the caller whether it should make a power-off + * transition at this time or not. If the transition is not advised + * at this time, the time that the next power-off transition can + * be made from now is returned through "intervalp" pointer. + * This function returns: + * + * 1 power-off advised + * 0 power-off not advised, intervalp will point to seconds from + * now that a power-off is advised. If it is passed the number + * of years that policy specifies the device should last, + * a large number is returned as the time interval. + * -1 error + */ +int +pm_trans_check(struct pm_trans_data *datap, time_t *intervalp) +{ + PMD_FUNC(pmf, "pm_trans_check") + char dbuf[DC_SCSI_MFR_LEN]; + struct pm_scsi_cycles *scp; + int service_years, service_weeks, full_years; + time_t now, service_seconds, tdiff; + time_t within_year, when_allowed; + char *ptr; + int lower_bound_cycles, upper_bound_cycles, cycles_allowed; + int cycles_diff, cycles_over; + + if (datap == NULL) { + PMD(PMD_TCHECK, ("%s: NULL data pointer!\n", pmf)) + return (-1); + } + + if (datap->format == DC_SCSI_FORMAT) { + /* + * Power cycles of the scsi drives are distributed + * over 5 years with the following percentage ratio: + * + * 30%, 25%, 20%, 15%, and 10% + * + * The power cycle quota for each year is distributed + * linearly through out the year. The equation for + * determining the expected cycles is: + * + * e = a * (n / y) + * + * e = expected cycles + * a = allocated cycles for this year + * n = number of seconds since beginning of this year + * y = number of seconds in a year + * + * Note that beginning of the year starts the day that + * the drive has been put on service. + * + * If the drive has passed its expected cycles, we + * can determine when it can start to power cycle + * again to keep it on track to meet the 5-year + * life expectancy. The equation for determining + * when to power cycle is: + * + * w = y * (c / a) + * + * w = when it can power cycle again + * y = number of seconds in a year + * c = current number of cycles + * a = allocated cycles for the year + * + */ + char pcnt[DC_SCSI_NPY] = { 30, 55, 75, 90, 100 }; + + scp = &datap->un.scsi_cycles; + PMD(PMD_TCHECK, ("%s: format=%d, lifemax=%d, ncycles=%d, " + "svc_date=%s, svc_flag=%d\n", pmf, datap->format, + scp->lifemax, scp->ncycles, scp->svc_date, scp->flag)) + if (scp->ncycles < 0 || scp->flag != 0) { + PMD(PMD_TCHECK, ("%s: ncycles < 0 || flag != 0\n", pmf)) + return (-1); + } + + if (scp->ncycles > scp->lifemax) { + *intervalp = (LONG_MAX / hz); + return (0); + } + + /* + * convert service date to time_t + */ + bcopy(scp->svc_date, dbuf, DC_SCSI_YEAR_LEN); + dbuf[DC_SCSI_YEAR_LEN] = '\0'; + ptr = dbuf; + service_years = stoi(&ptr) - EPOCH_YEAR; + bcopy(&scp->svc_date[DC_SCSI_YEAR_LEN], dbuf, + DC_SCSI_WEEK_LEN); + dbuf[DC_SCSI_WEEK_LEN] = '\0'; + + /* + * scsi standard does not specify WW data, + * could be (00-51) or (01-52) + */ + ptr = dbuf; + service_weeks = stoi(&ptr); + if (service_years < 0 || + service_weeks < 0 || service_weeks > 52) { + PMD(PMD_TCHECK, ("%s: service year %d and week %d\n", + pmf, service_years, service_weeks)) + return (-1); + } + + /* + * calculate service date in seconds-since-epoch, + * adding one day for each leap-year. + * + * (years-since-epoch + 2) fixes integer truncation, + * example: (8) leap-years during [1972, 2000] + * (2000 - 1970) = 30; and (30 + 2) / 4 = 8; + */ + service_seconds = (service_years * DC_SPY) + + (service_weeks * DC_SPW) + + (((service_years + 2) / 4) * DC_SPD); + + now = gethrestime_sec(); + /* + * since the granularity of 'svc_date' is day not second, + * 'now' should be rounded up to full day. + */ + now = ((now + DC_SPD -1) / DC_SPD) * DC_SPD; + if (service_seconds > now) { + PMD(PMD_TCHECK, ("%s: service date (%ld) later " + "than now (%ld)!\n", pmf, service_seconds, now)) + return (-1); + } + + tdiff = now - service_seconds; + PMD(PMD_TCHECK, ("%s: age is %ld sec\n", pmf, tdiff)) + + /* + * NOTE - Leap years are not considered in the calculations + * below. + */ + full_years = (tdiff / DC_SPY); + if ((full_years >= DC_SCSI_NPY) && + (scp->ncycles <= scp->lifemax)) + return (1); + + /* + * Determine what is the normal cycle usage for the + * device at the beginning and the end of this year. + */ + lower_bound_cycles = (!full_years) ? 0 : + ((scp->lifemax * pcnt[full_years - 1]) / 100); + upper_bound_cycles = (scp->lifemax * pcnt[full_years]) / 100; + + if (scp->ncycles <= lower_bound_cycles) + return (1); + + /* + * The linear slope that determines how many cycles + * are allowed this year is number of seconds + * passed this year over total number of seconds in a year. + */ + cycles_diff = (upper_bound_cycles - lower_bound_cycles); + within_year = (tdiff % DC_SPY); + cycles_allowed = lower_bound_cycles + + (((uint64_t)cycles_diff * (uint64_t)within_year) / DC_SPY); + PMD(PMD_TCHECK, ("%s: lived %d yrs and %ld secs\n", pmf, + full_years, within_year)) + PMD(PMD_TCHECK, ("%s: # of cycles allowed %d\n", pmf, + cycles_allowed)) + + if (scp->ncycles <= cycles_allowed) + return (1); + + /* + * The transition is not advised now but we can + * determine when the next transition can be made. + * + * Depending on how many cycles the device has been + * over-used, we may need to skip years with + * different percentage quota in order to determine + * when the next transition can be made. + */ + cycles_over = (scp->ncycles - lower_bound_cycles); + while (cycles_over > cycles_diff) { + full_years++; + if (full_years >= DC_SCSI_NPY) { + *intervalp = (LONG_MAX / hz); + return (0); + } + cycles_over -= cycles_diff; + lower_bound_cycles = upper_bound_cycles; + upper_bound_cycles = + (scp->lifemax * pcnt[full_years]) / 100; + cycles_diff = (upper_bound_cycles - lower_bound_cycles); + } + + /* + * The linear slope that determines when the next transition + * can be made is the relative position of used cycles within a + * year over total number of cycles within that year. + */ + when_allowed = service_seconds + (full_years * DC_SPY) + + (((uint64_t)DC_SPY * (uint64_t)cycles_over) / cycles_diff); + *intervalp = (when_allowed - now); + if (*intervalp > (LONG_MAX / hz)) + *intervalp = (LONG_MAX / hz); + PMD(PMD_TCHECK, ("%s: no cycle is allowed in %ld secs\n", pmf, + *intervalp)) + return (0); + } + + PMD(PMD_TCHECK, ("%s: unknown format!\n", pmf)) + return (-1); +} + +/* + * Nexus drivers call into pm framework to indicate which child driver is about + * to be installed. In some platforms, ppm may need to configure the hardware + * for successful installation of a driver. + */ +int +pm_init_child(dev_info_t *dip) +{ + power_req_t power_req; + + ASSERT(ddi_binding_name(dip)); + ASSERT(ddi_get_name_addr(dip)); + pm_ppm_claim(dip); + if (pm_ppm_claimed(dip)) { /* if ppm driver claims the node */ + power_req.request_type = PMR_PPM_INIT_CHILD; + power_req.req.ppm_config_req.who = dip; + ASSERT(PPM(dip) != NULL); + return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, + NULL)); + } else { +#ifdef DEBUG + /* pass it to the default handler so we can debug things */ + power_req.request_type = PMR_PPM_INIT_CHILD; + power_req.req.ppm_config_req.who = dip; + (void) pm_ctlops(NULL, dip, + DDI_CTLOPS_POWER, &power_req, NULL); +#endif + } + return (DDI_SUCCESS); +} + +/* + * Bring parent of a node that is about to be probed up to full power, and + * arrange for it to stay up until pm_post_probe() or pm_post_attach() decide + * it is time to let it go down again + */ +void +pm_pre_probe(dev_info_t *dip, pm_ppm_cookie_t *cp) +{ + int result; + power_req_t power_req; + + bzero(cp, sizeof (*cp)); + cp->ppc_dip = dip; + + pm_ppm_claim(dip); + if (pm_ppm_claimed(dip)) { /* if ppm driver claims the node */ + power_req.request_type = PMR_PPM_PRE_PROBE; + power_req.req.ppm_config_req.who = dip; + ASSERT(PPM(dip) != NULL); + (void) pm_ctlops(PPM(dip), dip, + DDI_CTLOPS_POWER, &power_req, &result); + cp->ppc_ppm = PPM(dip); + } else { +#ifdef DEBUG + /* pass it to the default handler so we can debug things */ + power_req.request_type = PMR_PPM_PRE_PROBE; + power_req.req.ppm_config_req.who = dip; + (void) pm_ctlops(NULL, dip, + DDI_CTLOPS_POWER, &power_req, &result); +#endif + cp->ppc_ppm = NULL; + } +} + +int +pm_pre_config(dev_info_t *dip, char *devnm) +{ + PMD_FUNC(pmf, "pre_config") + int ret; + + if (MDI_VHCI(dip)) { + PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip))) + ret = mdi_power(dip, MDI_PM_PRE_CONFIG, NULL, devnm, 0); + return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE); + } else if (!PM_GET_PM_INFO(dip)) + return (DDI_SUCCESS); + + PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip))) + pm_hold_power(dip); + ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK); + if (ret != DDI_SUCCESS) + pm_rele_power(dip); + return (ret); +} + +/* + * This routine is called by devfs during its walk to unconfigue a node. + * If the call is due to auto mod_unloads and the dip is not at its + * full power, we return DDI_FAILURE to terminate the walk, otherwise + * return DDI_SUCCESS. + */ +int +pm_pre_unconfig(dev_info_t *dip, int flags, int *held, char *devnm) +{ + PMD_FUNC(pmf, "pre_unconfig") + int ret; + + if (MDI_VHCI(dip)) { + PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf, + PM_DEVICE(dip), flags)) + ret = mdi_power(dip, MDI_PM_PRE_UNCONFIG, held, devnm, flags); + return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE); + } else if (!PM_GET_PM_INFO(dip)) + return (DDI_SUCCESS); + + PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf, PM_DEVICE(dip), + flags)) + *held = 0; + + /* + * If the dip is a leaf node, don't power it up. + */ + if (!ddi_get_child(dip)) + return (DDI_SUCCESS); + + /* + * Do not power up the node if it is called due to auto-modunload. + */ + if ((flags & NDI_AUTODETACH) && !pm_all_at_normal(dip)) + return (DDI_FAILURE); + + pm_hold_power(dip); + *held = 1; + ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK); + if (ret != DDI_SUCCESS) { + pm_rele_power(dip); + *held = 0; + } + return (ret); +} + +/* + * Notify ppm of attach action. Parent is already held at full power by + * probe action. + */ +void +pm_pre_attach(dev_info_t *dip, pm_ppm_cookie_t *cp, ddi_attach_cmd_t cmd) +{ + static char *me = "pm_pre_attach"; + power_req_t power_req; + int result; + + /* + * Initialize and fill in the PPM cookie + */ + bzero(cp, sizeof (*cp)); + cp->ppc_cmd = (int)cmd; + cp->ppc_ppm = PPM(dip); + cp->ppc_dip = dip; + + /* + * DDI_ATTACH and DDI_RESUME cmds need to call platform specific + * Power Management stuff. DDI_RESUME also has to purge it's + * powerlevel information. + */ + switch (cmd) { + case DDI_ATTACH: + if (cp->ppc_ppm) { /* if ppm driver claims the node */ + power_req.request_type = PMR_PPM_PRE_ATTACH; + power_req.req.ppm_config_req.who = dip; + ASSERT(PPM(dip)); + (void) pm_ctlops(cp->ppc_ppm, dip, DDI_CTLOPS_POWER, + &power_req, &result); + } +#ifdef DEBUG + else { + power_req.request_type = PMR_PPM_PRE_ATTACH; + power_req.req.ppm_config_req.who = dip; + (void) pm_ctlops(NULL, dip, + DDI_CTLOPS_POWER, &power_req, &result); + } +#endif + break; + case DDI_RESUME: + pm_forget_power_level(dip); + + if (cp->ppc_ppm) { /* if ppm driver claims the node */ + power_req.request_type = PMR_PPM_PRE_RESUME; + power_req.req.resume_req.who = cp->ppc_dip; + power_req.req.resume_req.cmd = + (ddi_attach_cmd_t)cp->ppc_cmd; + ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm); + (void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip, + DDI_CTLOPS_POWER, &power_req, &result); + } +#ifdef DEBUG + else { + power_req.request_type = PMR_PPM_PRE_RESUME; + power_req.req.resume_req.who = cp->ppc_dip; + power_req.req.resume_req.cmd = + (ddi_attach_cmd_t)cp->ppc_cmd; + (void) pm_ctlops(NULL, cp->ppc_dip, + DDI_CTLOPS_POWER, &power_req, &result); + } +#endif + break; + + case DDI_PM_RESUME: + break; + + default: + panic(me); + } +} + +/* + * Nexus drivers call into pm framework to indicate which child driver is + * being uninstalled. In some platforms, ppm may need to reconfigure the + * hardware since the device driver is no longer installed. + */ +int +pm_uninit_child(dev_info_t *dip) +{ + power_req_t power_req; + + ASSERT(ddi_binding_name(dip)); + ASSERT(ddi_get_name_addr(dip)); + pm_ppm_claim(dip); + if (pm_ppm_claimed(dip)) { /* if ppm driver claims the node */ + power_req.request_type = PMR_PPM_UNINIT_CHILD; + power_req.req.ppm_config_req.who = dip; + ASSERT(PPM(dip)); + return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, + NULL)); + } else { +#ifdef DEBUG + /* pass it to the default handler so we can debug things */ + power_req.request_type = PMR_PPM_UNINIT_CHILD; + power_req.req.ppm_config_req.who = dip; + (void) pm_ctlops(NULL, dip, DDI_CTLOPS_POWER, &power_req, NULL); +#endif + } + return (DDI_SUCCESS); +} +/* + * Decrement kidsupcnt so scan can turn the parent back off if it is idle + * Also notify ppm of result of probe if there is a ppm that cares + */ +void +pm_post_probe(pm_ppm_cookie_t *cp, int ret, int probe_failed) +{ + _NOTE(ARGUNUSED(probe_failed)) + int result; + power_req_t power_req; + + if (cp->ppc_ppm) { /* if ppm driver claims the node */ + power_req.request_type = PMR_PPM_POST_PROBE; + power_req.req.ppm_config_req.who = cp->ppc_dip; + power_req.req.ppm_config_req.result = ret; + ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm); + (void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip, DDI_CTLOPS_POWER, + &power_req, &result); + } +#ifdef DEBUG + else { + power_req.request_type = PMR_PPM_POST_PROBE; + power_req.req.ppm_config_req.who = cp->ppc_dip; + power_req.req.ppm_config_req.result = ret; + (void) pm_ctlops(NULL, cp->ppc_dip, DDI_CTLOPS_POWER, + &power_req, &result); + } +#endif +} + +void +pm_post_config(dev_info_t *dip, char *devnm) +{ + PMD_FUNC(pmf, "post_config") + + if (MDI_VHCI(dip)) { + PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip))) + (void) mdi_power(dip, MDI_PM_POST_CONFIG, NULL, devnm, 0); + return; + } else if (!PM_GET_PM_INFO(dip)) + return; + + PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip))) + pm_rele_power(dip); +} + +void +pm_post_unconfig(dev_info_t *dip, int held, char *devnm) +{ + PMD_FUNC(pmf, "post_unconfig") + + if (MDI_VHCI(dip)) { + PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf, + PM_DEVICE(dip), held)) + (void) mdi_power(dip, MDI_PM_POST_UNCONFIG, &held, devnm, 0); + return; + } else if (!PM_GET_PM_INFO(dip)) + return; + + PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf, PM_DEVICE(dip), + held)) + if (!held) + return; + /* + * We have held power in pre_unconfig, release it here. + */ + pm_rele_power(dip); +} + +/* + * Notify ppm of result of attach if there is a ppm that cares + */ +void +pm_post_attach(pm_ppm_cookie_t *cp, int ret) +{ + int result; + power_req_t power_req; + dev_info_t *dip; + + if (cp->ppc_cmd != DDI_ATTACH) + return; + + dip = cp->ppc_dip; + + if (ret == DDI_SUCCESS) { + /* + * Attach succeeded, so proceed to doing post-attach pm tasks + */ + if (PM_GET_PM_INFO(dip) == NULL) + (void) pm_start(dip); + } else { + /* + * Attach may have got pm started before failing + */ + pm_stop(dip); + } + + if (cp->ppc_ppm) { /* if ppm driver claims the node */ + power_req.request_type = PMR_PPM_POST_ATTACH; + power_req.req.ppm_config_req.who = cp->ppc_dip; + power_req.req.ppm_config_req.result = ret; + ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm); + (void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip, + DDI_CTLOPS_POWER, &power_req, &result); + } +#ifdef DEBUG + else { + power_req.request_type = PMR_PPM_POST_ATTACH; + power_req.req.ppm_config_req.who = cp->ppc_dip; + power_req.req.ppm_config_req.result = ret; + (void) pm_ctlops(NULL, cp->ppc_dip, + DDI_CTLOPS_POWER, &power_req, &result); + } +#endif +} + +/* + * Notify ppm of attach action. Parent is already held at full power by + * probe action. + */ +void +pm_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, pm_ppm_cookie_t *cp) +{ + int result; + power_req_t power_req; + + bzero(cp, sizeof (*cp)); + cp->ppc_dip = dip; + cp->ppc_cmd = (int)cmd; + + switch (cmd) { + case DDI_DETACH: + pm_detaching(dip); /* suspend pm while detaching */ + if (pm_ppm_claimed(dip)) { /* if ppm driver claims node */ + power_req.request_type = PMR_PPM_PRE_DETACH; + power_req.req.ppm_config_req.who = dip; + ASSERT(PPM(dip)); + (void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, + &power_req, &result); + cp->ppc_ppm = PPM(dip); + } else { +#ifdef DEBUG + /* pass to the default handler so we can debug things */ + power_req.request_type = PMR_PPM_PRE_DETACH; + power_req.req.ppm_config_req.who = dip; + (void) pm_ctlops(NULL, dip, + DDI_CTLOPS_POWER, &power_req, &result); +#endif + cp->ppc_ppm = NULL; + } + break; + + default: + break; + } +} + +/* + * Dip is either a leaf node that exported "no-involuntary-power-cycles" prop., + * (if devi_pm_noinvol count is 0) or an ancestor of such a node. We need to + * make an entry to record the details, which includes certain flag settings. + */ +static void +pm_record_invol_path(char *path, int flags, int noinvolpm, int volpmd, + int wasvolpmd, major_t major) +{ + PMD_FUNC(pmf, "record_invol_path") + major_t pm_path_to_major(char *); + size_t plen; + pm_noinvol_t *ip, *np, *pp; + pp = NULL; + + plen = strlen(path) + 1; + np = kmem_zalloc(sizeof (*np), KM_SLEEP); + np->ni_size = plen; + np->ni_path = kmem_alloc(plen, KM_SLEEP); + np->ni_noinvolpm = noinvolpm; + np->ni_volpmd = volpmd; + np->ni_wasvolpmd = wasvolpmd; + np->ni_flags = flags; + (void) strcpy(np->ni_path, path); + /* + * If we haven't actually seen the node attached, it is hard to figure + * out its major. If we could hold the node by path, we would be much + * happier here. + */ + if (major == (major_t)-1) { + np->ni_major = pm_path_to_major(path); + } else { + np->ni_major = major; + } + rw_enter(&pm_noinvol_rwlock, RW_WRITER); + for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) { + int comp = strcmp(path, ip->ni_path); + if (comp < 0) { + PMD(PMD_NOINVOL, ("%s: %s insert before %s\n", + pmf, path, ip->ni_path)) + /* insert before current entry */ + np->ni_next = ip; + if (pp) { + pp->ni_next = np; + } else { + pm_noinvol_head = np; + } + rw_exit(&pm_noinvol_rwlock); +#ifdef DEBUG + if (pm_debug & PMD_NOINVOL) + pr_noinvol("record_invol_path exit0"); +#endif + return; + } else if (comp == 0) { + panic("%s already in pm_noinvol list", path); + } + } + /* + * If we did not find an entry in the list that this should go before, + * then it must go at the end + */ + if (pp) { + PMD(PMD_NOINVOL, ("%s: %s append after %s\n", pmf, path, + pp->ni_path)) + ASSERT(pp->ni_next == 0); + pp->ni_next = np; + } else { + PMD(PMD_NOINVOL, ("%s: %s added to end-of-list\n", pmf, path)) + ASSERT(!pm_noinvol_head); + pm_noinvol_head = np; + } + rw_exit(&pm_noinvol_rwlock); +#ifdef DEBUG + if (pm_debug & PMD_NOINVOL) + pr_noinvol("record_invol_path exit"); +#endif +} + +void +pm_record_invol(dev_info_t *dip) +{ + char *pathbuf; + int pm_all_components_off(dev_info_t *); + int volpmd = (PM_NUMCMPTS(dip) > 0) && pm_all_components_off(dip); + + pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, pathbuf); + + pm_record_invol_path(pathbuf, (DEVI(dip)->devi_pm_flags & + (PMC_NO_INVOL | PMC_CONSOLE_FB)), DEVI(dip)->devi_pm_noinvolpm, + DEVI(dip)->devi_pm_volpmd, volpmd, PM_MAJOR(dip)); + + /* + * If this child's detach will be holding up its ancestors, then we + * allow for an exception to that if all children of this type have + * gone down voluntarily. + * Now walk down the tree incrementing devi_pm_noinvolpm + */ + (void) pm_noinvol_update(PM_BP_NOINVOL_DETACH, 0, volpmd, pathbuf, + dip); + kmem_free(pathbuf, MAXPATHLEN); +} + +void +pm_post_detach(pm_ppm_cookie_t *cp, int ret) +{ + dev_info_t *dip = cp->ppc_dip; + int result; + power_req_t power_req; + + switch (cp->ppc_cmd) { + case DDI_DETACH: + if (cp->ppc_ppm) { /* if ppm driver claims the node */ + power_req.request_type = PMR_PPM_POST_DETACH; + power_req.req.ppm_config_req.who = cp->ppc_dip; + power_req.req.ppm_config_req.result = ret; + ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm); + (void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip, + DDI_CTLOPS_POWER, &power_req, &result); + } +#ifdef DEBUG + else { + power_req.request_type = PMR_PPM_POST_DETACH; + power_req.req.ppm_config_req.who = cp->ppc_dip; + power_req.req.ppm_config_req.result = ret; + (void) pm_ctlops(NULL, cp->ppc_dip, + DDI_CTLOPS_POWER, &power_req, &result); + } +#endif + if (ret == DDI_SUCCESS) { + /* + * For hotplug detach we assume it is *really* gone + */ + if (cp->ppc_cmd == DDI_DETACH && + ((DEVI(dip)->devi_pm_flags & + (PMC_NO_INVOL | PMC_CONSOLE_FB)) || + DEVI(dip)->devi_pm_noinvolpm)) + pm_record_invol(dip); + DEVI(dip)->devi_pm_flags &= + ~(PMC_NO_INVOL | PMC_NOINVOL_DONE); + + /* + * If console fb is detaching, then we don't need to + * worry any more about it going off (pm_detaching has + * brought up all components) + */ + if (PM_IS_CFB(dip)) { + mutex_enter(&pm_cfb_lock); + ASSERT(cfb_dip_detaching); + ASSERT(cfb_dip == NULL); + ASSERT(pm_cfb_comps_off == 0); + cfb_dip_detaching = NULL; + mutex_exit(&pm_cfb_lock); + } + pm_stop(dip); /* make it permanent */ + } else { + if (PM_IS_CFB(dip)) { + mutex_enter(&pm_cfb_lock); + ASSERT(cfb_dip_detaching); + ASSERT(cfb_dip == NULL); + ASSERT(pm_cfb_comps_off == 0); + cfb_dip = cfb_dip_detaching; + cfb_dip_detaching = NULL; + mutex_exit(&pm_cfb_lock); + } + pm_detach_failed(dip); /* resume power management */ + } + break; + case DDI_PM_SUSPEND: + break; + case DDI_SUSPEND: + break; /* legal, but nothing to do */ + default: +#ifdef DEBUG + panic("pm_post_detach: unrecognized cmd %d for detach", + cp->ppc_cmd); + /*NOTREACHED*/ +#else + break; +#endif + } +} + +/* + * Called after vfs_mountroot has got the clock started to fix up timestamps + * that were set when root bush drivers attached. hresttime was 0 then, so the + * devices look busy but have a 0 busycnt + */ +int +pm_adjust_timestamps(dev_info_t *dip, void *arg) +{ + _NOTE(ARGUNUSED(arg)) + + pm_info_t *info = PM_GET_PM_INFO(dip); + struct pm_component *cp; + int i; + + if (!info) + return (DDI_WALK_CONTINUE); + PM_LOCK_BUSY(dip); + for (i = 0; i < PM_NUMCMPTS(dip); i++) { + cp = PM_CP(dip, i); + if (cp->pmc_timestamp == 0 && cp->pmc_busycount == 0) + cp->pmc_timestamp = gethrestime_sec(); + } + PM_UNLOCK_BUSY(dip); + return (DDI_WALK_CONTINUE); +} + +/* + * Called at attach time to see if the device being attached has a record in + * the no involuntary power cycles list. If so, we do some bookkeeping on the + * parents and set a flag in the dip + */ +void +pm_noinvol_specd(dev_info_t *dip) +{ + PMD_FUNC(pmf, "noinvol_specd") + char *pathbuf; + pm_noinvol_t *ip, *pp = NULL; + int wasvolpmd; + int found = 0; + + if (DEVI(dip)->devi_pm_flags & PMC_NOINVOL_DONE) + return; + DEVI(dip)->devi_pm_flags |= PMC_NOINVOL_DONE; + pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, pathbuf); + + PM_LOCK_DIP(dip); + DEVI(dip)->devi_pm_volpmd = 0; + DEVI(dip)->devi_pm_noinvolpm = 0; + rw_enter(&pm_noinvol_rwlock, RW_READER); + for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) { + PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n", + pmf, pathbuf, ip->ni_path)) + if (strcmp(pathbuf, ip->ni_path) == 0) { + found++; + break; + } + } + rw_exit(&pm_noinvol_rwlock); + if (!found) { + PM_UNLOCK_DIP(dip); + kmem_free(pathbuf, MAXPATHLEN); + return; + } + rw_enter(&pm_noinvol_rwlock, RW_WRITER); + pp = NULL; + for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) { + PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n", + pmf, pathbuf, ip->ni_path)) + if (strcmp(pathbuf, ip->ni_path) == 0) { + ip->ni_flags &= ~PMC_DRIVER_REMOVED; + DEVI(dip)->devi_pm_flags |= ip->ni_flags; + /* + * Handle special case of console fb + */ + if (PM_IS_CFB(dip)) { + mutex_enter(&pm_cfb_lock); + cfb_dip = dip; + PMD(PMD_CFB, ("%s: %s@%s(%s#%d) setting " + "cfb_dip\n", pmf, PM_DEVICE(dip))) + mutex_exit(&pm_cfb_lock); + } + DEVI(dip)->devi_pm_noinvolpm = ip->ni_noinvolpm; + ASSERT((DEVI(dip)->devi_pm_flags & + (PMC_NO_INVOL | PMC_CONSOLE_FB)) || + DEVI(dip)->devi_pm_noinvolpm); + DEVI(dip)->devi_pm_volpmd = ip->ni_volpmd; + PMD(PMD_NOINVOL, ("%s: noinvol=%d, volpmd=%d, " + "wasvolpmd=%d, flags=%x, path=%s\n", pmf, + ip->ni_noinvolpm, ip->ni_volpmd, + ip->ni_wasvolpmd, ip->ni_flags, ip->ni_path)) + /* + * free the entry in hopes the list will now be empty + * and we won't have to search it any more until the + * device detaches + */ + if (pp) { + PMD(PMD_NOINVOL, ("%s: free %s, prev %s\n", + pmf, ip->ni_path, pp->ni_path)) + pp->ni_next = ip->ni_next; + } else { + PMD(PMD_NOINVOL, ("%s: free %s head\n", + pmf, ip->ni_path)) + ASSERT(pm_noinvol_head == ip); + pm_noinvol_head = ip->ni_next; + } + PM_UNLOCK_DIP(dip); + wasvolpmd = ip->ni_wasvolpmd; + rw_exit(&pm_noinvol_rwlock); + kmem_free(ip->ni_path, ip->ni_size); + kmem_free(ip, sizeof (*ip)); + /* + * Now walk up the tree decrementing devi_pm_noinvolpm + * (and volpmd if appropriate) + */ + (void) pm_noinvol_update(PM_BP_NOINVOL_ATTACH, 0, + wasvolpmd, pathbuf, dip); +#ifdef DEBUG + if (pm_debug & PMD_NOINVOL) + pr_noinvol("noinvol_specd exit"); +#endif + kmem_free(pathbuf, MAXPATHLEN); + return; + } + } + kmem_free(pathbuf, MAXPATHLEN); + rw_exit(&pm_noinvol_rwlock); + PM_UNLOCK_DIP(dip); +} + +int +pm_all_components_off(dev_info_t *dip) +{ + int i; + pm_component_t *cp; + + for (i = 0; i < PM_NUMCMPTS(dip); i++) { + cp = PM_CP(dip, i); + if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN || + cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]) + return (0); + } + return (1); /* all off */ +} + +/* + * Make sure that all "no involuntary power cycles" devices are attached. + * Called before doing a cpr suspend to make sure the driver has a say about + * the power cycle + */ +int +pm_reattach_noinvol(void) +{ + PMD_FUNC(pmf, "reattach_noinvol") + pm_noinvol_t *ip; + char *path; + dev_info_t *dip; + + /* + * Prevent the modunload thread from unloading any modules until we + * have completely stopped all kernel threads. + */ + modunload_disable(); + for (ip = pm_noinvol_head; ip; ip = ip->ni_next) { + /* + * Forget we'v ever seen any entry + */ + ip->ni_persistent = 0; + } +restart: + rw_enter(&pm_noinvol_rwlock, RW_READER); + for (ip = pm_noinvol_head; ip; ip = ip->ni_next) { + major_t maj; + maj = ip->ni_major; + path = ip->ni_path; + if (path != NULL && !(ip->ni_flags & PMC_DRIVER_REMOVED)) { + if (ip->ni_persistent) { + /* + * If we weren't able to make this entry + * go away, then we give up, as + * holding/attaching the driver ought to have + * resulted in this entry being deleted + */ + PMD(PMD_NOINVOL, ("%s: can't reattach %s " + "(%s|%d)\n", pmf, ip->ni_path, + ddi_major_to_name(maj), (int)maj)) + cmn_err(CE_WARN, "cpr: unable to reattach %s ", + ip->ni_path); + modunload_enable(); + rw_exit(&pm_noinvol_rwlock); + return (0); + } + ip->ni_persistent++; + rw_exit(&pm_noinvol_rwlock); + PMD(PMD_NOINVOL, ("%s: holding %s\n", pmf, path)) + dip = e_ddi_hold_devi_by_path(path, 0); + if (dip == NULL) { + PMD(PMD_NOINVOL, ("%s: can't hold (%s|%d)\n", + pmf, path, (int)maj)) + cmn_err(CE_WARN, "cpr: unable to hold %s " + "driver", path); + modunload_enable(); + return (0); + } else { + PMD(PMD_DHR, ("%s: release %s\n", pmf, path)) + /* + * Since the modunload thread is stopped, we + * don't have to keep the driver held, which + * saves a ton of bookkeeping + */ + ddi_release_devi(dip); + goto restart; + } + } else { + PMD(PMD_NOINVOL, ("%s: skip %s; unknown major\n", + pmf, ip->ni_path)) + continue; + } + } + rw_exit(&pm_noinvol_rwlock); + return (1); +} + +void +pm_reattach_noinvol_fini(void) +{ + modunload_enable(); +} + +/* + * Display pm support code + */ + + +/* + * console frame-buffer power-mgmt gets enabled when debugging + * services are not present or console fbpm override is set + */ +void +pm_cfb_setup(const char *stdout_path) +{ + PMD_FUNC(pmf, "cfb_setup") + extern int obpdebug; + char *devname; + dev_info_t *dip; + int devname_len; + extern dev_info_t *fbdip; + + /* + * By virtue of this function being called (from consconfig), + * we know stdout is a framebuffer. + */ + stdout_is_framebuffer = 1; + + if (obpdebug || (boothowto & RB_DEBUG)) { + if (pm_cfb_override == 0) { + /* + * Console is frame buffer, but we want to suppress + * pm on it because of debugging setup + */ + pm_cfb_enabled = 0; + cmn_err(CE_NOTE, "Kernel debugger present: disabling " + "console power management."); + /* + * however, we still need to know which is the console + * fb in order to suppress pm on it + */ + } else { + cmn_err(CE_WARN, "Kernel debugger present: see " + "kmdb(1M) for interaction with power management."); + } + } +#ifdef DEBUG + /* + * IF console is fb and is power managed, don't do prom_printfs from + * pm debug macro + */ + if (pm_cfb_enabled) { + if (pm_debug) + prom_printf("pm debug output will be to log only\n"); + pm_divertdebug++; + } +#endif + devname = i_ddi_strdup((char *)stdout_path, KM_SLEEP); + devname_len = strlen(devname) + 1; + PMD(PMD_CFB, ("%s: stripped %s\n", pmf, devname)) + /* if the driver is attached */ + if ((dip = fbdip) != NULL) { + PMD(PMD_CFB, ("%s: attached: %s@%s(%s#%d)\n", pmf, + PM_DEVICE(dip))) + /* + * We set up here as if the driver were power manageable in case + * we get a later attach of a pm'able driver (which would result + * in a panic later) + */ + cfb_dip = dip; + DEVI(dip)->devi_pm_flags |= (PMC_CONSOLE_FB | PMC_NO_INVOL); + PMD(PMD_CFB, ("%s: cfb_dip -> %s@%s(%s#%d)\n", pmf, + PM_DEVICE(dip))) +#ifdef DEBUG + if (!(PM_GET_PM_INFO(dip) != NULL && PM_NUMCMPTS(dip))) { + PMD(PMD_CFB, ("%s: %s@%s(%s#%d) not power-managed\n", + pmf, PM_DEVICE(dip))) + } +#endif + } else { + char *ep; + PMD(PMD_CFB, ("%s: pntd %s failed\n", pmf, devname)) + pm_record_invol_path(devname, + (PMC_CONSOLE_FB | PMC_NO_INVOL), 1, 0, 0, + (major_t)-1); + for (ep = strrchr(devname, '/'); ep != devname; + ep = strrchr(devname, '/')) { + PMD(PMD_CFB, ("%s: devname %s\n", pmf, devname)) + *ep = '\0'; + dip = pm_name_to_dip(devname, 0); + if (dip != NULL) { + /* + * Walk up the tree incrementing + * devi_pm_noinvolpm + */ + (void) pm_noinvol_update(PM_BP_NOINVOL_CFB, + 0, 0, devname, dip); + break; + } else { + pm_record_invol_path(devname, + PMC_NO_INVOL, 1, 0, 0, (major_t)-1); + } + } + } + kmem_free(devname, devname_len); +} + +void +pm_cfb_rele(void) +{ + mutex_enter(&pm_cfb_lock); + /* + * this call isn't using the console any more, it is ok to take it + * down if the count goes to 0 + */ + cfb_inuse--; + mutex_exit(&pm_cfb_lock); +} + +/* + * software interrupt handler for fbpm; this function exists because we can't + * bring up the frame buffer power from above lock level. So if we need to, + * we instead schedule a softint that runs this routine and takes us into + * debug_enter (a bit delayed from the original request, but avoiding a panic). + */ +static uint_t +pm_cfb_softint(caddr_t int_handler_arg) +{ + _NOTE(ARGUNUSED(int_handler_arg)) + int rval = DDI_INTR_UNCLAIMED; + + mutex_enter(&pm_cfb_lock); + if (pm_soft_pending) { + mutex_exit(&pm_cfb_lock); + debug_enter((char *)NULL); + /* acquired in debug_enter before calling pm_cfb_trigger */ + pm_cfb_rele(); + mutex_enter(&pm_cfb_lock); + pm_soft_pending = 0; + mutex_exit(&pm_cfb_lock); + rval = DDI_INTR_CLAIMED; + } else + mutex_exit(&pm_cfb_lock); + + return (rval); +} + +void +pm_cfb_setup_intr(void) +{ + PMD_FUNC(pmf, "cfb_setup_intr") + extern void prom_set_outfuncs(void (*)(void), void (*)(void)); + void pm_cfb_check_and_powerup(void); + + if (!stdout_is_framebuffer) { + PMD(PMD_CFB, ("%s: console not fb\n", pmf)) + return; + } + mutex_init(&pm_cfb_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8)); +#ifdef DEBUG + mutex_init(&pm_debug_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8)); +#endif + /* + * setup software interrupt handler + */ + if (ddi_add_softintr(ddi_root_node(), DDI_SOFTINT_HIGH, &pm_soft_id, + NULL, NULL, pm_cfb_softint, NULL) != DDI_SUCCESS) + panic("pm: unable to register soft intr."); + + prom_set_outfuncs(pm_cfb_check_and_powerup, pm_cfb_rele); +} + +/* + * Checks to see if it is safe to write to the console wrt power management + * (i.e. if the console is a framebuffer, then it must be at full power) + * returns 1 when power is off (power-up is needed) + * returns 0 when power is on (power-up not needed) + */ +int +pm_cfb_check_and_hold(void) +{ + /* + * cfb_dip is set iff console is a power manageable frame buffer + * device + */ + extern int modrootloaded; + + mutex_enter(&pm_cfb_lock); + cfb_inuse++; + ASSERT(cfb_inuse); /* wrap? */ + if (modrootloaded && cfb_dip) { + /* + * don't power down the frame buffer, the prom is using it + */ + if (pm_cfb_comps_off) { + mutex_exit(&pm_cfb_lock); + return (1); + } + } + mutex_exit(&pm_cfb_lock); + return (0); +} + +/* + * turn on cfb power (which is known to be off). + * Must be called below lock level! + */ +void +pm_cfb_powerup(void) +{ + pm_info_t *info; + int norm; + int ccount, ci; + int unused; +#ifdef DEBUG + /* + * Can't reenter prom_prekern, so suppress pm debug messages + * (still go to circular buffer). + */ + mutex_enter(&pm_debug_lock); + pm_divertdebug++; + mutex_exit(&pm_debug_lock); +#endif + info = PM_GET_PM_INFO(cfb_dip); + ASSERT(info); + + ccount = PM_NUMCMPTS(cfb_dip); + for (ci = 0; ci < ccount; ci++) { + norm = pm_get_normal_power(cfb_dip, ci); + (void) pm_set_power(cfb_dip, ci, norm, PM_LEVEL_UPONLY, + PM_CANBLOCK_BYPASS, 0, &unused); + } +#ifdef DEBUG + mutex_enter(&pm_debug_lock); + pm_divertdebug--; + mutex_exit(&pm_debug_lock); +#endif +} + +/* + * Check if the console framebuffer is powered up. If not power it up. + * Note: Calling pm_cfb_check_and_hold has put a hold on the power state which + * must be released by calling pm_cfb_rele when the console fb operation + * is completed. + */ +void +pm_cfb_check_and_powerup(void) +{ + if (pm_cfb_check_and_hold()) + pm_cfb_powerup(); +} + +/* + * Trigger a low level interrupt to power up console frame buffer. + */ +void +pm_cfb_trigger(void) +{ + if (cfb_dip == NULL) + return; + + mutex_enter(&pm_cfb_lock); + /* + * If machine appears to be hung, pulling the keyboard connector of + * the console will cause a high level interrupt and go to debug_enter. + * But, if the fb is powered down, this routine will be called to bring + * it up (by generating a softint to do the work). If soft interrupts + * are not running, and the keyboard connector is pulled again, the + * following code detects this condition and calls panic which allows + * the fb to be brought up from high level. + * + * If two nearly simultaneous calls to debug_enter occur (both from + * high level) the code described above will cause a panic. + */ + if (lbolt <= pm_soft_pending) { + panicstr = "pm_cfb_trigger: lbolt not advancing"; + panic(panicstr); /* does a power up at any intr level */ + /* NOTREACHED */ + } + pm_soft_pending = lbolt; + mutex_exit(&pm_cfb_lock); + ddi_trigger_softintr(pm_soft_id); +} + +major_t +pm_path_to_major(char *path) +{ + PMD_FUNC(pmf, "path_to_major") + char *np, *ap, *bp; + major_t ret; + size_t len; + static major_t i_path_to_major(char *, char *); + + PMD(PMD_NOINVOL, ("%s: %s\n", pmf, path)) + + np = strrchr(path, '/'); + if (np != NULL) + np++; + else + np = path; + len = strlen(np) + 1; + bp = kmem_alloc(len, KM_SLEEP); + (void) strcpy(bp, np); + if ((ap = strchr(bp, '@')) != NULL) { + *ap = '\0'; + } + PMD(PMD_NOINVOL, ("%s: %d\n", pmf, ddi_name_to_major(np))) + ret = i_path_to_major(path, np); + kmem_free(bp, len); + return (ret); +} + +#ifdef DEBUG + +char *pm_msgp; +char *pm_bufend; +char *pm_msgbuf = NULL; +int pm_logpages = 2; + +#define PMLOGPGS pm_logpages + +/*PRINTFLIKE1*/ +void +pm_log(const char *fmt, ...) +{ + va_list adx; + size_t size; + + mutex_enter(&pm_debug_lock); + if (pm_msgbuf == NULL) { + pm_msgbuf = kmem_zalloc(mmu_ptob(PMLOGPGS), KM_SLEEP); + pm_bufend = pm_msgbuf + mmu_ptob(PMLOGPGS) - 1; + pm_msgp = pm_msgbuf; + } + va_start(adx, fmt); + size = vsnprintf(NULL, 0, fmt, adx) + 1; + va_end(adx); + va_start(adx, fmt); + if (size > (pm_bufend - pm_msgp)) { /* wraps */ + bzero(pm_msgp, pm_bufend - pm_msgp); + (void) vsnprintf(pm_msgbuf, size, fmt, adx); + if (!pm_divertdebug) + prom_printf("%s", pm_msgp); + pm_msgp = pm_msgbuf + size; + } else { + (void) vsnprintf(pm_msgp, size, fmt, adx); + if (!pm_divertdebug) + prom_printf("%s", pm_msgp); + pm_msgp += size; + } + va_end(adx); + mutex_exit(&pm_debug_lock); +} +#endif /* DEBUG */ + +/* + * We want to save the state of any directly pm'd devices over the suspend/ + * resume process so that we can put them back the way the controlling + * process left them. + */ +void +pm_save_direct_levels(void) +{ + pm_processes_stopped = 1; + ddi_walk_devs(ddi_root_node(), pm_save_direct_lvl_walk, 0); +} + +static int +pm_save_direct_lvl_walk(dev_info_t *dip, void *arg) +{ + _NOTE(ARGUNUSED(arg)) + int i; + int *ip; + pm_info_t *info = PM_GET_PM_INFO(dip); + + if (!info) + return (DDI_WALK_CONTINUE); + + if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) { + if (PM_NUMCMPTS(dip) > 2) { + info->pmi_lp = kmem_alloc(PM_NUMCMPTS(dip) * + sizeof (int), KM_SLEEP); + ip = info->pmi_lp; + } else { + ip = info->pmi_levels; + } + /* autopm and processes are stopped, ok not to lock power */ + for (i = 0; i < PM_NUMCMPTS(dip); i++) + *ip++ = PM_CURPOWER(dip, i); + /* + * There is a small window between stopping the + * processes and setting pm_processes_stopped where + * a driver could get hung up in a pm_raise_power() + * call. Free any such driver now. + */ + pm_proceed(dip, PMP_RELEASE, -1, -1); + } + + return (DDI_WALK_CONTINUE); +} + +void +pm_restore_direct_levels(void) +{ + /* + * If cpr didn't call pm_save_direct_levels, (because stopping user + * threads failed) then we don't want to try to restore them + */ + if (!pm_processes_stopped) + return; + + ddi_walk_devs(ddi_root_node(), pm_restore_direct_lvl_walk, 0); + pm_processes_stopped = 0; +} + +static int +pm_restore_direct_lvl_walk(dev_info_t *dip, void *arg) +{ + _NOTE(ARGUNUSED(arg)) + PMD_FUNC(pmf, "restore_direct_lvl_walk") + int i, nc, result; + int *ip; + + pm_info_t *info = PM_GET_PM_INFO(dip); + if (!info) + return (DDI_WALK_CONTINUE); + + if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) { + if ((nc = PM_NUMCMPTS(dip)) > 2) { + ip = &info->pmi_lp[nc - 1]; + } else { + ip = &info->pmi_levels[nc - 1]; + } + /* + * Because fb drivers fail attempts to turn off the + * fb when the monitor is on, but treat a request to + * turn on the monitor as a request to turn on the + * fb too, we process components in descending order + * Because autopm is disabled and processes aren't + * running, it is ok to examine current power outside + * of the power lock + */ + for (i = nc - 1; i >= 0; i--, ip--) { + if (PM_CURPOWER(dip, i) == *ip) + continue; + if (pm_set_power(dip, i, *ip, PM_LEVEL_EXACT, + PM_CANBLOCK_BYPASS, 0, &result) != + DDI_SUCCESS) { + cmn_err(CE_WARN, "cpr: unable " + "to restore power level of " + "component %d of directly " + "power manged device %s@%s" + " to %d", + i, PM_NAME(dip), + PM_ADDR(dip), *ip); + PMD(PMD_FAIL, ("%s: failed to restore " + "%s@%s(%s#%d)[%d] exact(%d)->%d, " + "errno %d\n", pmf, PM_DEVICE(dip), i, + PM_CURPOWER(dip, i), *ip, result)) + } + } + if (nc > 2) { + kmem_free(info->pmi_lp, nc * sizeof (int)); + info->pmi_lp = NULL; + } + } + return (DDI_WALK_CONTINUE); +} + +/* + * Stolen from the bootdev module + * attempt to convert a path to a major number + */ +static major_t +i_path_to_major(char *path, char *leaf_name) +{ + extern major_t path_to_major(char *pathname); + major_t maj; + + if ((maj = path_to_major(path)) == (major_t)-1) { + maj = ddi_name_to_major(leaf_name); + } + + return (maj); +} + +/* + * When user calls rem_drv, we need to forget no-involuntary-power-cycles state + * An entry in the list means that the device is detached, so we need to + * adjust its ancestors as if they had just seen this attach, and any detached + * ancestors need to have their list entries adjusted. + */ +void +pm_driver_removed(major_t major) +{ + static void i_pm_driver_removed(major_t major); + + /* + * Serialize removal of drivers. This is to keep ancestors of + * a node that is being deleted from getting deleted and added back + * with different counters. + */ + mutex_enter(&pm_remdrv_lock); + i_pm_driver_removed(major); + mutex_exit(&pm_remdrv_lock); +} + +/* + * This routine is called recursively by pm_noinvol_process_ancestors() + */ +static void +i_pm_driver_removed(major_t major) +{ + PMD_FUNC(pmf, "driver_removed") + static void adjust_ancestors(char *, int); + static int pm_is_noinvol_ancestor(pm_noinvol_t *); + static void pm_noinvol_process_ancestors(char *); + pm_noinvol_t *ip, *pp = NULL; + int wasvolpmd; + ASSERT(major != (major_t)-1); + PMD(PMD_NOINVOL, ("%s: %s\n", pmf, ddi_major_to_name(major))) +again: + rw_enter(&pm_noinvol_rwlock, RW_WRITER); + for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) { + if (major != ip->ni_major) + continue; + /* + * If it is an ancestor of no-invol node, which is + * not removed, skip it. This is to cover the case of + * ancestor removed without removing its descendants. + */ + if (pm_is_noinvol_ancestor(ip)) { + ip->ni_flags |= PMC_DRIVER_REMOVED; + continue; + } + wasvolpmd = ip->ni_wasvolpmd; + /* + * remove the entry from the list + */ + if (pp) { + PMD(PMD_NOINVOL, ("%s: freeing %s, prev is %s\n", + pmf, ip->ni_path, pp->ni_path)) + pp->ni_next = ip->ni_next; + } else { + PMD(PMD_NOINVOL, ("%s: free %s head\n", pmf, + ip->ni_path)) + ASSERT(pm_noinvol_head == ip); + pm_noinvol_head = ip->ni_next; + } + rw_exit(&pm_noinvol_rwlock); + adjust_ancestors(ip->ni_path, wasvolpmd); + /* + * Had an ancestor been removed before this node, it would have + * been skipped. Adjust the no-invol counters for such skipped + * ancestors. + */ + pm_noinvol_process_ancestors(ip->ni_path); + kmem_free(ip->ni_path, ip->ni_size); + kmem_free(ip, sizeof (*ip)); + goto again; + } + rw_exit(&pm_noinvol_rwlock); +} + +/* + * returns 1, if *aip is a ancestor of a no-invol node + * 0, otherwise + */ +static int +pm_is_noinvol_ancestor(pm_noinvol_t *aip) +{ + pm_noinvol_t *ip; + + ASSERT(strlen(aip->ni_path) != 0); + for (ip = pm_noinvol_head; ip; ip = ip->ni_next) { + if (ip == aip) + continue; + /* + * To be an ancestor, the path must be an initial substring of + * the descendent, and end just before a '/' in the + * descendent's path. + */ + if ((strstr(ip->ni_path, aip->ni_path) == ip->ni_path) && + (ip->ni_path[strlen(aip->ni_path)] == '/')) + return (1); + } + return (0); +} + +#define PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip)) +/* + * scan through the pm_noinvolpm list adjusting ancestors of the current + * node; Modifies string *path. + */ +static void +adjust_ancestors(char *path, int wasvolpmd) +{ + PMD_FUNC(pmf, "adjust_ancestors") + char *cp; + pm_noinvol_t *lp; + pm_noinvol_t *pp = NULL; + major_t locked = (major_t)UINT_MAX; + dev_info_t *dip; + char *pathbuf; + size_t pathbuflen = strlen(path) + 1; + + /* + * First we look up the ancestor's dip. If we find it, then we + * adjust counts up the tree + */ + PMD(PMD_NOINVOL, ("%s: %s wasvolpmd %d\n", pmf, path, wasvolpmd)) + pathbuf = kmem_alloc(pathbuflen, KM_SLEEP); + (void) strcpy(pathbuf, path); + cp = strrchr(pathbuf, '/'); + if (cp == NULL) { + /* if no ancestors, then nothing to do */ + kmem_free(pathbuf, pathbuflen); + return; + } + *cp = '\0'; + dip = pm_name_to_dip(pathbuf, 1); + if (dip != NULL) { + locked = PM_MAJOR(dip); + + (void) pm_noinvol_update(PM_BP_NOINVOL_REMDRV, 0, wasvolpmd, + path, dip); + + if (locked != (major_t)UINT_MAX) + ddi_release_devi(dip); + } else { + char *apath; + size_t len = strlen(pathbuf) + 1; + int lock_held = 1; + + /* + * Now check for ancestors that exist only in the list + */ + apath = kmem_alloc(len, KM_SLEEP); + (void) strcpy(apath, pathbuf); + rw_enter(&pm_noinvol_rwlock, RW_WRITER); + for (lp = pm_noinvol_head; lp; pp = lp, lp = lp->ni_next) { + /* + * This can only happen once. Since we have to drop + * the lock, we need to extract the relevant info. + */ + if (strcmp(pathbuf, lp->ni_path) == 0) { + PMD(PMD_NOINVOL, ("%s: %s no %d -> %d\n", pmf, + lp->ni_path, lp->ni_noinvolpm, + lp->ni_noinvolpm - 1)) + lp->ni_noinvolpm--; + if (wasvolpmd && lp->ni_volpmd) { + PMD(PMD_NOINVOL, ("%s: %s vol %d -> " + "%d\n", pmf, lp->ni_path, + lp->ni_volpmd, lp->ni_volpmd - 1)) + lp->ni_volpmd--; + } + /* + * remove the entry from the list, if there + * are no more no-invol descendants and node + * itself is not a no-invol node. + */ + if (!(lp->ni_noinvolpm || + (lp->ni_flags & PMC_NO_INVOL))) { + ASSERT(lp->ni_volpmd == 0); + if (pp) { + PMD(PMD_NOINVOL, ("%s: freeing " + "%s, prev is %s\n", pmf, + lp->ni_path, pp->ni_path)) + pp->ni_next = lp->ni_next; + } else { + PMD(PMD_NOINVOL, ("%s: free %s " + "head\n", pmf, lp->ni_path)) + ASSERT(pm_noinvol_head == lp); + pm_noinvol_head = lp->ni_next; + } + lock_held = 0; + rw_exit(&pm_noinvol_rwlock); + adjust_ancestors(apath, wasvolpmd); + /* restore apath */ + (void) strcpy(apath, pathbuf); + kmem_free(lp->ni_path, lp->ni_size); + kmem_free(lp, sizeof (*lp)); + } + break; + } + } + if (lock_held) + rw_exit(&pm_noinvol_rwlock); + adjust_ancestors(apath, wasvolpmd); + kmem_free(apath, len); + } + kmem_free(pathbuf, pathbuflen); +} + +/* + * Do no-invol processing for any ancestors i.e. adjust counters of ancestors, + * which were skipped even though their drivers were removed. + */ +static void +pm_noinvol_process_ancestors(char *path) +{ + pm_noinvol_t *lp; + + rw_enter(&pm_noinvol_rwlock, RW_READER); + for (lp = pm_noinvol_head; lp; lp = lp->ni_next) { + if (strstr(path, lp->ni_path) && + (lp->ni_flags & PMC_DRIVER_REMOVED)) { + rw_exit(&pm_noinvol_rwlock); + i_pm_driver_removed(lp->ni_major); + return; + } + } + rw_exit(&pm_noinvol_rwlock); +} + +/* + * Returns true if (detached) device needs to be kept up because it exported the + * "no-involuntary-power-cycles" property or we're pretending it did (console + * fb case) or it is an ancestor of such a device and has used up the "one + * free cycle" allowed when all such leaf nodes have voluntarily powered down + * upon detach. In any event, we need an exact hit on the path or we return + * false. + */ +int +pm_noinvol_detached(char *path) +{ + PMD_FUNC(pmf, "noinvol_detached") + pm_noinvol_t *ip; + int ret = 0; + + rw_enter(&pm_noinvol_rwlock, RW_READER); + for (ip = pm_noinvol_head; ip; ip = ip->ni_next) { + if (strcmp(path, ip->ni_path) == 0) { + if (ip->ni_flags & PMC_CONSOLE_FB) { + PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB " + "%s\n", pmf, path)) + ret = 1; + break; + } +#ifdef DEBUG + if (ip->ni_noinvolpm != ip->ni_volpmd) + PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s" + "\n", pmf, ip->ni_noinvolpm, ip->ni_volpmd, + path)) +#endif + ret = (ip->ni_noinvolpm != ip->ni_volpmd); + break; + } + } + rw_exit(&pm_noinvol_rwlock); + return (ret); +} + +int +pm_is_cfb(dev_info_t *dip) +{ + return (dip == cfb_dip); +} + +#ifdef DEBUG +/* + * Return true if all components of the console frame buffer are at + * "normal" power, i.e., fully on. For the case where the console is not + * a framebuffer, we also return true + */ +int +pm_cfb_is_up(void) +{ + return (pm_cfb_comps_off == 0); +} +#endif + +/* + * Preventing scan from powering down the node by incrementing the + * kidsupcnt. + */ +void +pm_hold_power(dev_info_t *dip) +{ + e_pm_hold_rele_power(dip, 1); +} + +/* + * Releasing the hold by decrementing the kidsupcnt allowing scan + * to power down the node if all conditions are met. + */ +void +pm_rele_power(dev_info_t *dip) +{ + e_pm_hold_rele_power(dip, -1); +} + +/* + * A wrapper of pm_all_to_normal() to power up a dip + * to its normal level + */ +int +pm_powerup(dev_info_t *dip) +{ + PMD_FUNC(pmf, "pm_powerup") + + PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip))) + ASSERT(!(servicing_interrupt())); + + /* + * in case this node is not already participating pm + */ + if (!PM_GET_PM_INFO(dip)) { + if (!DEVI_IS_ATTACHING(dip)) + return (DDI_SUCCESS); + if (pm_start(dip) != DDI_SUCCESS) + return (DDI_FAILURE); + if (!PM_GET_PM_INFO(dip)) + return (DDI_SUCCESS); + } + + return (pm_all_to_normal(dip, PM_CANBLOCK_BLOCK)); +} + +int +pm_rescan_walk(dev_info_t *dip, void *arg) +{ + _NOTE(ARGUNUSED(arg)) + + if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip)) + return (DDI_WALK_CONTINUE); + + /* + * Currently pm_cpr_callb/resume code is the only caller + * and it needs to make sure that stopped scan get + * reactivated. Otherwise, rescan walk needn't reactive + * stopped scan. + */ + pm_scan_init(dip); + + (void) pm_rescan(dip); + return (DDI_WALK_CONTINUE); +} + +static dev_info_t * +pm_get_next_descendent(dev_info_t *dip, dev_info_t *tdip) +{ + dev_info_t *wdip, *pdip; + + for (wdip = tdip; wdip != dip; wdip = pdip) { + pdip = ddi_get_parent(wdip); + if (pdip == dip) + return (wdip); + } + return (NULL); +} + +int +pm_busop_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op, + void *arg, void *result) +{ + PMD_FUNC(pmf, "bp_bus_power") + dev_info_t *cdip; + pm_info_t *cinfo; + pm_bp_child_pwrchg_t *bpc; + pm_sp_misc_t *pspm; + pm_bp_nexus_pwrup_t *bpn; + pm_bp_child_pwrchg_t new_bpc; + pm_bp_noinvol_t *bpi; + dev_info_t *tdip; + char *pathbuf; + int ret = DDI_SUCCESS; + int errno = 0; + pm_component_t *cp; + + PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip), + pm_decode_op(op))) + switch (op) { + case BUS_POWER_CHILD_PWRCHG: + bpc = (pm_bp_child_pwrchg_t *)arg; + pspm = (pm_sp_misc_t *)bpc->bpc_private; + tdip = bpc->bpc_dip; + cdip = pm_get_next_descendent(dip, tdip); + cinfo = PM_GET_PM_INFO(cdip); + if (cdip != tdip) { + /* + * If the node is an involved parent, it needs to + * power up the node as it is needed. There is nothing + * else the framework can do here. + */ + if (PM_WANTS_NOTIFICATION(cdip)) { + PMD(PMD_SET, ("%s: call bus_power for " + "%s@%s(%s#%d)\n", pmf, PM_DEVICE(cdip))) + return ((*PM_BUS_POWER_FUNC(cdip))(cdip, + impl_arg, op, arg, result)); + } + ASSERT(pspm->pspm_direction == PM_LEVEL_UPONLY || + pspm->pspm_direction == PM_LEVEL_DOWNONLY || + pspm->pspm_direction == PM_LEVEL_EXACT); + /* + * we presume that the parent needs to be up in + * order for the child to change state (either + * because it must already be on if the child is on + * (and the pm_all_to_normal_nexus() will be a nop) + * or because it will need to be on for the child + * to come on; so we make the call regardless + */ + pm_hold_power(cdip); + if (cinfo) { + pm_canblock_t canblock = pspm->pspm_canblock; + ret = pm_all_to_normal_nexus(cdip, canblock); + if (ret != DDI_SUCCESS) { + pm_rele_power(cdip); + return (ret); + } + } + PMD(PMD_SET, ("%s: walk down to %s@%s(%s#%d)\n", pmf, + PM_DEVICE(cdip))) + ret = pm_busop_bus_power(cdip, impl_arg, op, arg, + result); + pm_rele_power(cdip); + } else { + ret = pm_busop_set_power(cdip, impl_arg, op, arg, + result); + } + return (ret); + + case BUS_POWER_NEXUS_PWRUP: + bpn = (pm_bp_nexus_pwrup_t *)arg; + pspm = (pm_sp_misc_t *)bpn->bpn_private; + + if (!e_pm_valid_info(dip, NULL) || + !e_pm_valid_comp(dip, bpn->bpn_comp, &cp) || + !e_pm_valid_power(dip, bpn->bpn_comp, bpn->bpn_level)) { + PMD(PMD_SET, ("%s: %s@%s(%s#%d) has no pm info; EIO\n", + pmf, PM_DEVICE(dip))) + *pspm->pspm_errnop = EIO; + *(int *)result = DDI_FAILURE; + return (DDI_FAILURE); + } + + ASSERT(bpn->bpn_dip == dip); + PMD(PMD_SET, ("%s: nexus powerup for %s@%s(%s#%d)\n", pmf, + PM_DEVICE(dip))) + new_bpc.bpc_dip = dip; + pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + new_bpc.bpc_path = ddi_pathname(dip, pathbuf); + new_bpc.bpc_comp = bpn->bpn_comp; + new_bpc.bpc_olevel = PM_CURPOWER(dip, bpn->bpn_comp); + new_bpc.bpc_nlevel = bpn->bpn_level; + new_bpc.bpc_private = bpn->bpn_private; + ((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_direction = + PM_LEVEL_UPONLY; + ((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_errnop = + &errno; + ret = pm_busop_set_power(dip, impl_arg, BUS_POWER_CHILD_PWRCHG, + (void *)&new_bpc, result); + kmem_free(pathbuf, MAXPATHLEN); + return (ret); + + case BUS_POWER_NOINVOL: + bpi = (pm_bp_noinvol_t *)arg; + tdip = bpi->bpni_dip; + cdip = pm_get_next_descendent(dip, tdip); + + /* In case of rem_drv, the leaf node has been removed */ + if (cdip == NULL) + return (DDI_SUCCESS); + + cinfo = PM_GET_PM_INFO(cdip); + if (cdip != tdip) { + if (PM_WANTS_NOTIFICATION(cdip)) { + PMD(PMD_NOINVOL, + ("%s: call bus_power for %s@%s(%s#%d)\n", + pmf, PM_DEVICE(cdip))) + ret = (*PM_BUS_POWER_FUNC(cdip)) + (cdip, NULL, op, arg, result); + if ((cinfo) && (ret == DDI_SUCCESS)) + (void) pm_noinvol_update_node(cdip, + bpi); + return (ret); + } else { + PMD(PMD_NOINVOL, + ("%s: walk down to %s@%s(%s#%d)\n", pmf, + PM_DEVICE(cdip))) + ret = pm_busop_bus_power(cdip, NULL, op, + arg, result); + /* + * Update the current node. + */ + if ((cinfo) && (ret == DDI_SUCCESS)) + (void) pm_noinvol_update_node(cdip, + bpi); + return (ret); + } + } else { + /* + * For attach, detach, power up: + * Do nothing for leaf node since its + * counts are already updated. + * For CFB and driver removal, since the + * path and the target dip passed in is up to and incl. + * the immediate ancestor, need to do the update. + */ + PMD(PMD_NOINVOL, ("%s: target %s@%s(%s#%d) is " + "reached\n", pmf, PM_DEVICE(cdip))) + if (cinfo && ((bpi->bpni_cmd == PM_BP_NOINVOL_REMDRV) || + (bpi->bpni_cmd == PM_BP_NOINVOL_CFB))) + (void) pm_noinvol_update_node(cdip, bpi); + return (DDI_SUCCESS); + } + + default: + PMD(PMD_SET, ("%s: operation %d is not supported!\n", pmf, op)) + return (DDI_FAILURE); + } +} + +static int +pm_busop_set_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op, + void *arg, void *resultp) +{ + _NOTE(ARGUNUSED(impl_arg)) + PMD_FUNC(pmf, "bp_set_power") + pm_ppm_devlist_t *devl; + int clevel, circ; +#ifdef DEBUG + int circ_db, ccirc_db; +#endif + int ret = DDI_SUCCESS; + dev_info_t *cdip; + pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg; + pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private; + pm_canblock_t canblock = pspm->pspm_canblock; + int scan = pspm->pspm_scan; + int comp = bpc->bpc_comp; + int olevel = bpc->bpc_olevel; + int nlevel = bpc->bpc_nlevel; + int comps_off_incr = 0; + dev_info_t *pdip = ddi_get_parent(dip); + int dodeps; + int direction = pspm->pspm_direction; + int *errnop = pspm->pspm_errnop; + char *dir = pm_decode_direction(direction); + int *iresp = (int *)resultp; + time_t idletime, thresh; + pm_component_t *cp = PM_CP(dip, comp); + int work_type; + + *iresp = DDI_SUCCESS; + *errnop = 0; + ASSERT(op == BUS_POWER_CHILD_PWRCHG); + PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip), + pm_decode_op(op))) + + /* + * The following set of conditions indicate we are here to handle a + * driver's pm_[raise|lower]_power request, but the device is being + * power managed (PM_DIRECT_PM) by a user process. For that case + * we want to pm_block and pass a status back to the caller based + * on whether the controlling process's next activity on the device + * matches the current request or not. This distinction tells + * downstream functions to avoid calling into a driver or changing + * the framework's power state. To actually block, we need: + * + * PM_ISDIRECT(dip) + * no reason to block unless a process is directly controlling dev + * direction != PM_LEVEL_EXACT + * EXACT is used by controlling proc's PM_SET_CURRENT_POWER ioctl + * !pm_processes_stopped + * don't block if controlling proc already be stopped for cpr + * canblock != PM_CANBLOCK_BYPASS + * our caller must not have explicitly prevented blocking + */ + if (direction != PM_LEVEL_EXACT && canblock != PM_CANBLOCK_BYPASS) { + PM_LOCK_DIP(dip); + while (PM_ISDIRECT(dip) && !pm_processes_stopped) { + /* releases dip lock */ + ret = pm_busop_match_request(dip, bpc); + if (ret == EAGAIN) { + PM_LOCK_DIP(dip); + continue; + } + return (*iresp = ret); + } + PM_UNLOCK_DIP(dip); + } + /* BC device is never scanned, so power will stick until we are done */ + if (PM_ISBC(dip) && comp != 0 && nlevel != 0 && + direction != PM_LEVEL_DOWNONLY) { + int nrmpwr0 = pm_get_normal_power(dip, 0); + if (pm_set_power(dip, 0, nrmpwr0, direction, + canblock, 0, resultp) != DDI_SUCCESS) { + /* *resultp set by pm_set_power */ + return (DDI_FAILURE); + } + } + if (PM_WANTS_NOTIFICATION(pdip)) { + PMD(PMD_SET, ("%s: pre_notify %s@%s(%s#%d) for child " + "%s@%s(%s#%d)\n", pmf, PM_DEVICE(pdip), PM_DEVICE(dip))) + ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL, + BUS_POWER_PRE_NOTIFICATION, bpc, resultp); + if (ret != DDI_SUCCESS) { + PMD(PMD_SET, ("%s: failed to pre_notify %s@%s(%s#%d)\n", + pmf, PM_DEVICE(pdip))) + return (DDI_FAILURE); + } + } else { + /* + * Since we don't know what the actual power level is, + * we place a power hold on the parent no matter what + * component and level is changing. + */ + pm_hold_power(pdip); + } + PM_LOCK_POWER(dip, &circ); + clevel = PM_CURPOWER(dip, comp); + PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, olvl=%d, nlvl=%d, clvl=%d, " + "dir=%s\n", pmf, PM_DEVICE(dip), comp, bpc->bpc_olevel, nlevel, + clevel, dir)) + switch (direction) { + case PM_LEVEL_UPONLY: + /* Powering up */ + if (clevel >= nlevel) { + PMD(PMD_SET, ("%s: current level is already " + "at or above the requested level.\n", pmf)) + *iresp = DDI_SUCCESS; + ret = DDI_SUCCESS; + goto post_notify; + } + break; + case PM_LEVEL_EXACT: + /* specific level request */ + if (clevel == nlevel && !PM_ISBC(dip)) { + PMD(PMD_SET, ("%s: current level is already " + "at the requested level.\n", pmf)) + *iresp = DDI_SUCCESS; + ret = DDI_SUCCESS; + goto post_notify; + } else if (PM_IS_CFB(dip) && (nlevel < clevel)) { + PMD(PMD_CFB, ("%s: powerdown of console\n", pmf)) + if (!pm_cfb_enabled) { + PMD(PMD_ERROR | PMD_CFB, + ("%s: !pm_cfb_enabled, fails\n", pmf)) + *errnop = EINVAL; + *iresp = DDI_FAILURE; + ret = DDI_FAILURE; + goto post_notify; + } + mutex_enter(&pm_cfb_lock); + while (cfb_inuse) { + mutex_exit(&pm_cfb_lock); + if (delay_sig(1) == EINTR) { + ret = DDI_FAILURE; + *iresp = DDI_FAILURE; + *errnop = EINTR; + goto post_notify; + } + mutex_enter(&pm_cfb_lock); + } + mutex_exit(&pm_cfb_lock); + } + break; + case PM_LEVEL_DOWNONLY: + /* Powering down */ + thresh = cur_threshold(dip, comp); + idletime = gethrestime_sec() - cp->pmc_timestamp; + if (scan && ((PM_KUC(dip) != 0) || + (cp->pmc_busycount > 0) || (idletime < thresh))) { +#ifdef DEBUG + if (DEVI(dip)->devi_pm_kidsupcnt != 0) + PMD(PMD_SET, ("%s: scan failed: " + "kidsupcnt != 0\n", pmf)) + if (cp->pmc_busycount > 0) + PMD(PMD_SET, ("%s: scan failed: " + "device become busy\n", pmf)) + if (idletime < thresh) + PMD(PMD_SET, ("%s: scan failed: device " + "hasn't been idle long enough\n", pmf)) +#endif + *iresp = DDI_FAILURE; + *errnop = EBUSY; + ret = DDI_FAILURE; + goto post_notify; + } else if (clevel != PM_LEVEL_UNKNOWN && clevel <= nlevel) { + PMD(PMD_SET, ("%s: current level is already at " + "or below the requested level.\n", pmf)) + *iresp = DDI_SUCCESS; + ret = DDI_SUCCESS; + goto post_notify; + } + break; + } + + if (PM_IS_CFB(dip) && (comps_off_incr = + calc_cfb_comps_incr(dip, comp, clevel, nlevel)) > 0) { + /* + * Pre-adjust pm_cfb_comps_off if lowering a console fb + * component from full power. Remember that we tried to + * lower power in case it fails and we need to back out + * the adjustment. + */ + update_comps_off(comps_off_incr, dip); + PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d cfb_comps_off->%d\n", + pmf, PM_DEVICE(dip), comp, clevel, nlevel, + pm_cfb_comps_off)) + } + + if ((*iresp = power_dev(dip, + comp, nlevel, clevel, canblock, &devl)) == DDI_SUCCESS) { +#ifdef DEBUG + /* + * All descendents of this node should already be powered off. + */ + if (PM_CURPOWER(dip, comp) == 0) { + pm_desc_pwrchk_t pdpchk; + pdpchk.pdpc_dip = dip; + pdpchk.pdpc_par_involved = PM_WANTS_NOTIFICATION(dip); + ndi_devi_enter(dip, &circ_db); + for (cdip = ddi_get_child(dip); cdip != NULL; + cdip = ddi_get_next_sibling(cdip)) { + ndi_devi_enter(cdip, &ccirc_db); + ddi_walk_devs(cdip, pm_desc_pwrchk_walk, + (void *)&pdpchk); + ndi_devi_exit(cdip, ccirc_db); + } + ndi_devi_exit(dip, circ_db); + } +#endif + /* + * Post-adjust pm_cfb_comps_off if we brought an fb component + * back up to full power. + */ + if (PM_IS_CFB(dip) && comps_off_incr < 0) { + update_comps_off(comps_off_incr, dip); + PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d " + "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip), + comp, clevel, nlevel, pm_cfb_comps_off)) + } + dodeps = 0; + if (POWERING_OFF(clevel, nlevel)) { + if (PM_ISBC(dip)) { + dodeps = (comp == 0); + } else { + int i; + dodeps = 1; + for (i = 0; i < PM_NUMCMPTS(dip); i++) { + /* if some component still on */ + if (PM_CURPOWER(dip, i)) { + dodeps = 0; + break; + } + } + } + if (dodeps) + work_type = PM_DEP_WK_POWER_OFF; + } else if (POWERING_ON(clevel, nlevel)) { + if (PM_ISBC(dip)) { + dodeps = (comp == 0); + } else { + int i; + dodeps = 1; + for (i = 0; i < PM_NUMCMPTS(dip); i++) { + if (i == comp) + continue; + if (PM_CURPOWER(dip, i) > 0) { + dodeps = 0; + break; + } + } + } + if (dodeps) + work_type = PM_DEP_WK_POWER_ON; + } + + if (dodeps) { + char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + (void) ddi_pathname(dip, pathbuf); + pm_dispatch_to_dep_thread(work_type, pathbuf, NULL, + PM_DEP_NOWAIT, NULL, 0); + kmem_free(pathbuf, MAXPATHLEN); + } + if ((PM_CURPOWER(dip, comp) == nlevel) && pm_watchers()) { + int old; + + /* If old power cached during deadlock, use it. */ + old = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ? + cp->pmc_phc_pwr : olevel); + mutex_enter(&pm_rsvp_lock); + pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, nlevel, + old, canblock); + pm_enqueue_notify_others(&devl, canblock); + mutex_exit(&pm_rsvp_lock); + } + + /* + * If we are coming from a scan, don't do it again, + * else we can have infinite loops. + */ + if (!scan) + pm_rescan(dip); + } else { + /* if we incremented pm_comps_off_count, but failed */ + if (comps_off_incr > 0) { + update_comps_off(-comps_off_incr, dip); + PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d " + "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip), + comp, clevel, nlevel, pm_cfb_comps_off)) + } + *errnop = EIO; + } + +post_notify: + /* + * This thread may have been in deadlock with pm_power_has_changed. + * Before releasing power lock, clear the flag which marks this + * condition. + */ + cp->pmc_flags &= ~PM_PHC_WHILE_SET_POWER; + + /* + * Update the old power level in the bus power structure with the + * actual power level before the transition was made to the new level. + * Some involved parents depend on this information to keep track of + * their children's power transition. + */ + if (*iresp != DDI_FAILURE) + bpc->bpc_olevel = clevel; + + if (PM_WANTS_NOTIFICATION(pdip)) { + ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL, + BUS_POWER_POST_NOTIFICATION, bpc, resultp); + PM_UNLOCK_POWER(dip, circ); + PMD(PMD_SET, ("%s: post_notify %s@%s(%s#%d) for " + "child %s@%s(%s#%d), ret=%d\n", pmf, PM_DEVICE(pdip), + PM_DEVICE(dip), ret)) + } else { + nlevel = cur_power(cp); /* in case phc deadlock updated pwr */ + PM_UNLOCK_POWER(dip, circ); + /* + * Now that we know what power transition has occurred + * (if any), release the power hold. Leave the hold + * in effect in the case of OFF->ON transition. + */ + if (!(clevel == 0 && nlevel > 0 && + (!PM_ISBC(dip) || comp == 0))) + pm_rele_power(pdip); + /* + * If the power transition was an ON->OFF transition, + * remove the power hold from the parent. + */ + if ((clevel > 0 || clevel == PM_LEVEL_UNKNOWN) && + nlevel == 0 && (!PM_ISBC(dip) || comp == 0)) + pm_rele_power(pdip); + } + if (*iresp != DDI_SUCCESS || ret != DDI_SUCCESS) + return (DDI_FAILURE); + else + return (DDI_SUCCESS); +} + +/* + * If an app (SunVTS or Xsun) has taken control, then block until it + * gives it up or makes the requested power level change, unless + * we have other instructions about blocking. Returns DDI_SUCCESS, + * DDI_FAILURE or EAGAIN (owner released device from directpm). + */ +static int +pm_busop_match_request(dev_info_t *dip, void *arg) +{ + PMD_FUNC(pmf, "bp_match_request") + pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg; + pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private; + int comp = bpc->bpc_comp; + int nlevel = bpc->bpc_nlevel; + pm_canblock_t canblock = pspm->pspm_canblock; + int direction = pspm->pspm_direction; + int clevel, circ; + + ASSERT(PM_IAM_LOCKING_DIP(dip)); + PM_LOCK_POWER(dip, &circ); + clevel = PM_CURPOWER(dip, comp); + PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, nlvl=%d, clvl=%d\n", + pmf, PM_DEVICE(dip), comp, nlevel, clevel)) + if (direction == PM_LEVEL_UPONLY) { + if (clevel >= nlevel) { + PM_UNLOCK_POWER(dip, circ); + PM_UNLOCK_DIP(dip); + return (DDI_SUCCESS); + } + } else if (clevel == nlevel) { + PM_UNLOCK_POWER(dip, circ); + PM_UNLOCK_DIP(dip); + return (DDI_SUCCESS); + } + if (canblock == PM_CANBLOCK_FAIL) { + PM_UNLOCK_POWER(dip, circ); + PM_UNLOCK_DIP(dip); + return (DDI_FAILURE); + } + if (canblock == PM_CANBLOCK_BLOCK) { + /* + * To avoid a deadlock, we must not hold the + * power lock when we pm_block. + */ + PM_UNLOCK_POWER(dip, circ); + PMD(PMD_SET, ("%s: blocking\n", pmf)) + /* pm_block releases dip lock */ + switch (pm_block(dip, comp, nlevel, clevel)) { + case PMP_RELEASE: + return (EAGAIN); + case PMP_SUCCEED: + return (DDI_SUCCESS); + case PMP_FAIL: + return (DDI_FAILURE); + } + } else { + ASSERT(0); + } + _NOTE(NOTREACHED); + return (DDI_FAILURE); /* keep gcc happy */ +} + +static int +pm_all_to_normal_nexus(dev_info_t *dip, pm_canblock_t canblock) +{ + PMD_FUNC(pmf, "all_to_normal_nexus") + int *normal; + int i, ncomps; + size_t size; + int changefailed = 0; + int ret, result = DDI_SUCCESS; + pm_bp_nexus_pwrup_t bpn; + pm_sp_misc_t pspm; + + ASSERT(PM_GET_PM_INFO(dip)); + PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip))) + if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) { + PMD(PMD_ALLNORM, ("%s: can't get norm pwrs\n", pmf)) + return (DDI_FAILURE); + } + ncomps = PM_NUMCMPTS(dip); + for (i = 0; i < ncomps; i++) { + bpn.bpn_dip = dip; + bpn.bpn_comp = i; + bpn.bpn_level = normal[i]; + pspm.pspm_canblock = canblock; + pspm.pspm_scan = 0; + bpn.bpn_private = &pspm; + ret = pm_busop_bus_power(dip, NULL, BUS_POWER_NEXUS_PWRUP, + (void *)&bpn, (void *)&result); + if (ret != DDI_SUCCESS || result != DDI_SUCCESS) { + PMD(PMD_FAIL | PMD_ALLNORM, ("%s: %s@%s(%s#%d)[%d] " + "->%d failure result %d\n", pmf, PM_DEVICE(dip), + i, normal[i], result)) + changefailed++; + } + } + kmem_free(normal, size); + if (changefailed) { + PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) " + "full power\n", pmf, changefailed, PM_DEVICE(dip))) + return (DDI_FAILURE); + } + return (DDI_SUCCESS); +} + +int +pm_noinvol_update(int subcmd, int volpmd, int wasvolpmd, char *path, + dev_info_t *tdip) +{ + PMD_FUNC(pmf, "noinvol_update") + pm_bp_noinvol_t args; + int ret; + int result = DDI_SUCCESS; + + args.bpni_path = path; + args.bpni_dip = tdip; + args.bpni_cmd = subcmd; + args.bpni_wasvolpmd = wasvolpmd; + args.bpni_volpmd = volpmd; + PMD(PMD_NOINVOL, ("%s: update for path %s tdip %p subcmd %d " + "volpmd %d wasvolpmd %d\n", pmf, + path, (void *)tdip, subcmd, wasvolpmd, volpmd)) + ret = pm_busop_bus_power(ddi_root_node(), NULL, BUS_POWER_NOINVOL, + &args, &result); + return (ret); +} + +void +pm_noinvol_update_node(dev_info_t *dip, pm_bp_noinvol_t *req) +{ + PMD_FUNC(pmf, "noinvol_update_node") + + PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip))) + switch (req->bpni_cmd) { + case PM_BP_NOINVOL_ATTACH: + PMD(PMD_NOINVOL, ("%s: PM_PB_NOINVOL_ATTACH %s@%s(%s#%d) " + "noinvol %d->%d\n", pmf, PM_DEVICE(dip), + DEVI(dip)->devi_pm_noinvolpm, + DEVI(dip)->devi_pm_noinvolpm - 1)) + ASSERT(DEVI(dip)->devi_pm_noinvolpm); + PM_LOCK_DIP(dip); + DEVI(dip)->devi_pm_noinvolpm--; + if (req->bpni_wasvolpmd) { + PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_ATTACH " + "%s@%s(%s#%d) volpmd %d->%d\n", pmf, + PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd, + DEVI(dip)->devi_pm_volpmd - 1)) + if (DEVI(dip)->devi_pm_volpmd) + DEVI(dip)->devi_pm_volpmd--; + } + PM_UNLOCK_DIP(dip); + break; + + case PM_BP_NOINVOL_DETACH: + PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH %s@%s(%s#%d) " + "noinvolpm %d->%d\n", pmf, PM_DEVICE(dip), + DEVI(dip)->devi_pm_noinvolpm, + DEVI(dip)->devi_pm_noinvolpm + 1)) + PM_LOCK_DIP(dip); + DEVI(dip)->devi_pm_noinvolpm++; + if (req->bpni_wasvolpmd) { + PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH " + "%s@%s(%s#%d) volpmd %d->%d\n", pmf, + PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd, + DEVI(dip)->devi_pm_volpmd + 1)) + DEVI(dip)->devi_pm_volpmd++; + } + PM_UNLOCK_DIP(dip); + break; + + case PM_BP_NOINVOL_REMDRV: + PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) " + "noinvol %d->%d\n", pmf, PM_DEVICE(dip), + DEVI(dip)->devi_pm_noinvolpm, + DEVI(dip)->devi_pm_noinvolpm - 1)) + ASSERT(DEVI(dip)->devi_pm_noinvolpm); + PM_LOCK_DIP(dip); + DEVI(dip)->devi_pm_noinvolpm--; + if (req->bpni_wasvolpmd) { + PMD(PMD_NOINVOL, + ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) " + "volpmd %d->%d\n", pmf, PM_DEVICE(dip), + DEVI(dip)->devi_pm_volpmd, + DEVI(dip)->devi_pm_volpmd - 1)) + /* + * A power up could come in between and + * clear the volpmd, if that's the case, + * volpmd would be clear. + */ + if (DEVI(dip)->devi_pm_volpmd) + DEVI(dip)->devi_pm_volpmd--; + } + PM_UNLOCK_DIP(dip); + break; + + case PM_BP_NOINVOL_CFB: + PMD(PMD_NOINVOL, + ("%s: PM_BP_NOIVOL_CFB %s@%s(%s#%d) noinvol %d->%d\n", + pmf, PM_DEVICE(dip), DEVI(dip)->devi_pm_noinvolpm, + DEVI(dip)->devi_pm_noinvolpm + 1)) + PM_LOCK_DIP(dip); + DEVI(dip)->devi_pm_noinvolpm++; + PM_UNLOCK_DIP(dip); + break; + + case PM_BP_NOINVOL_POWER: + PMD(PMD_NOINVOL, + ("%s: PM_BP_NOIVOL_PWR %s@%s(%s#%d) volpmd %d->%d\n", + pmf, PM_DEVICE(dip), + DEVI(dip)->devi_pm_volpmd, DEVI(dip)->devi_pm_volpmd - + req->bpni_volpmd)) + PM_LOCK_DIP(dip); + DEVI(dip)->devi_pm_volpmd -= req->bpni_volpmd; + PM_UNLOCK_DIP(dip); + break; + + default: + break; + } + +} + +#ifdef DEBUG +static int +pm_desc_pwrchk_walk(dev_info_t *dip, void *arg) +{ + PMD_FUNC(pmf, "desc_pwrchk") + pm_desc_pwrchk_t *pdpchk = (pm_desc_pwrchk_t *)arg; + pm_info_t *info = PM_GET_PM_INFO(dip); + int i, curpwr, ce_level; + + if (!info) + return (DDI_WALK_CONTINUE); + + PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip))) + for (i = 0; i < PM_NUMCMPTS(dip); i++) { + if ((curpwr = PM_CURPOWER(dip, i)) == 0) + continue; + ce_level = (pdpchk->pdpc_par_involved == 0) ? CE_PANIC : + CE_WARN; + PMD(PMD_SET, ("%s: %s@%s(%s#%d) is powered off while desc " + "%s@%s(%s#%d)[%d] is at %d\n", pmf, + PM_DEVICE(pdpchk->pdpc_dip), PM_DEVICE(dip), i, curpwr)) + cmn_err(ce_level, "!device %s@%s(%s#%d) is powered on, " + "while its ancestor, %s@%s(%s#%d), is powering off!", + PM_DEVICE(dip), PM_DEVICE(pdpchk->pdpc_dip)); + } + return (DDI_WALK_CONTINUE); +} +#endif + +/* + * Record the fact that one thread is borrowing the lock on a device node. + * Use is restricted to the case where the lending thread will block until + * the borrowing thread (always curthread) completes. + */ +void +pm_borrow_lock(kthread_t *lender) +{ + lock_loan_t *prev = &lock_loan_head; + lock_loan_t *cur = (lock_loan_t *)kmem_zalloc(sizeof (*cur), KM_SLEEP); + + cur->pmlk_borrower = curthread; + cur->pmlk_lender = lender; + mutex_enter(&pm_loan_lock); + cur->pmlk_next = prev->pmlk_next; + prev->pmlk_next = cur; + mutex_exit(&pm_loan_lock); +} + +/* + * Return the borrowed lock. A thread can borrow only one. + */ +void +pm_return_lock(void) +{ + lock_loan_t *cur; + lock_loan_t *prev = &lock_loan_head; + + mutex_enter(&pm_loan_lock); + ASSERT(prev->pmlk_next != NULL); + for (cur = prev->pmlk_next; cur; prev = cur, cur = cur->pmlk_next) + if (cur->pmlk_borrower == curthread) + break; + + ASSERT(cur != NULL); + prev->pmlk_next = cur->pmlk_next; + mutex_exit(&pm_loan_lock); + kmem_free(cur, sizeof (*cur)); +} |