diff options
Diffstat (limited to 'usr/src/uts/i86pc/os/mp_machdep.c')
| -rw-r--r-- | usr/src/uts/i86pc/os/mp_machdep.c | 166 |
1 files changed, 118 insertions, 48 deletions
diff --git a/usr/src/uts/i86pc/os/mp_machdep.c b/usr/src/uts/i86pc/os/mp_machdep.c index e27b45d709..1954dfb81c 100644 --- a/usr/src/uts/i86pc/os/mp_machdep.c +++ b/usr/src/uts/i86pc/os/mp_machdep.c @@ -45,6 +45,7 @@ #include <sys/memlist.h> #include <sys/param.h> #include <sys/promif.h> +#include <sys/cpu_pm.h> #if defined(__xpv) #include <sys/hypervisor.h> #endif @@ -52,6 +53,7 @@ #include <vm/hat_i86.h> #include <sys/kdi_machimpl.h> #include <sys/sdt.h> +#include <sys/hpet.h> #define OFFSETOF(s, m) (size_t)(&(((s *)0)->m)) @@ -76,10 +78,10 @@ static int mach_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *, static void mach_notify_error(int level, char *errmsg); static hrtime_t dummy_hrtime(void); static void dummy_scalehrtime(hrtime_t *); -static void cpu_idle(void); +void cpu_idle(void); static void cpu_wakeup(cpu_t *, int); #ifndef __xpv -static void cpu_idle_mwait(void); +void cpu_idle_mwait(void); static void cpu_wakeup_mwait(cpu_t *, int); #endif /* @@ -184,7 +186,23 @@ int idle_cpu_prefer_mwait = 1; */ int idle_cpu_assert_cflush_monitor = 1; -#endif +/* + * If non-zero, idle cpus will not use power saving Deep C-States idle loop. + */ +int idle_cpu_no_deep_c = 0; +/* + * Non-power saving idle loop and wakeup pointers. + * Allows user to toggle Deep Idle power saving feature on/off. + */ +void (*non_deep_idle_cpu)() = cpu_idle; +void (*non_deep_idle_disp_enq_thread)(cpu_t *, int); + +/* + * Object for the kernel to access the HPET. + */ +hpet_t hpet; + +#endif /* ifndef __xpv */ /*ARGSUSED*/ int @@ -210,6 +228,16 @@ pg_plat_hw_shared(cpu_t *cp, pghw_type_t hw) return (1); else return (0); + case PGHW_POW_ACTIVE: + if (cpupm_domain_id(cp, CPUPM_DTYPE_ACTIVE) != (id_t)-1) + return (1); + else + return (0); + case PGHW_POW_IDLE: + if (cpupm_domain_id(cp, CPUPM_DTYPE_IDLE) != (id_t)-1) + return (1); + else + return (0); default: return (0); } @@ -247,58 +275,63 @@ pg_plat_hw_instance_id(cpu_t *cpu, pghw_type_t hw) return (cpuid_get_last_lvl_cacheid(cpu)); case PGHW_CHIP: return (cpuid_get_chipid(cpu)); + case PGHW_POW_ACTIVE: + return (cpupm_domain_id(cpu, CPUPM_DTYPE_ACTIVE)); + case PGHW_POW_IDLE: + return (cpupm_domain_id(cpu, CPUPM_DTYPE_IDLE)); default: return (-1); } } -int -pg_plat_hw_level(pghw_type_t hw) +/* + * Express preference for optimizing for sharing relationship + * hw1 vs hw2 + */ +pghw_type_t +pg_plat_hw_rank(pghw_type_t hw1, pghw_type_t hw2) { - int i; + int i, rank1, rank2; + static pghw_type_t hw_hier[] = { PGHW_IPIPE, PGHW_CACHE, PGHW_CHIP, + PGHW_POW_IDLE, + PGHW_POW_ACTIVE, PGHW_NUM_COMPONENTS }; for (i = 0; hw_hier[i] != PGHW_NUM_COMPONENTS; i++) { - if (hw_hier[i] == hw) - return (i); + if (hw_hier[i] == hw1) + rank1 = i; + if (hw_hier[i] == hw2) + rank2 = i; } - return (-1); -} -/* - * Return 1 if CMT load balancing policies should be - * implemented across instances of the specified hardware - * sharing relationship. - */ -int -pg_plat_cmt_load_bal_hw(pghw_type_t hw) -{ - if (hw == PGHW_IPIPE || - hw == PGHW_FPU || - hw == PGHW_CHIP || - hw == PGHW_CACHE) - return (1); + if (rank1 > rank2) + return (hw1); else - return (0); + return (hw2); } - /* - * Return 1 if thread affinity polices should be implemented - * for instances of the specifed hardware sharing relationship. + * Override the default CMT dispatcher policy for the specified + * hardware sharing relationship */ -int -pg_plat_cmt_affinity_hw(pghw_type_t hw) +pg_cmt_policy_t +pg_plat_cmt_policy(pghw_type_t hw) { - if (hw == PGHW_CACHE) - return (1); - else - return (0); + /* + * For shared caches, also load balance across them to + * maximize aggregate cache capacity + */ + switch (hw) { + case PGHW_CACHE: + return (CMT_BALANCE|CMT_AFFINITY); + default: + return (CMT_NO_POLICY); + } } id_t @@ -329,9 +362,28 @@ dummy_scalehrtime(hrtime_t *ticks) {} /* + * Supports Deep C-State power saving idle loop. + */ +void +cpu_idle_adaptive(void) +{ + (*CPU->cpu_m.mcpu_idle_cpu)(); +} + +void +cpu_dtrace_idle_probe(uint_t cstate) +{ + cpu_t *cpup = CPU; + struct machcpu *mcpu = &(cpup->cpu_m); + + mcpu->curr_cstate = cstate; + DTRACE_PROBE1(idle__state__transition, uint_t, cstate); +} + +/* * Idle the present CPU until awoken via an interrupt */ -static void +void cpu_idle(void) { cpu_t *cpup = CPU; @@ -427,11 +479,11 @@ cpu_idle(void) return; } - DTRACE_PROBE1(idle__state__transition, uint_t, IDLE_STATE_C1); + cpu_dtrace_idle_probe(IDLE_STATE_C1); mach_cpu_idle(); - DTRACE_PROBE1(idle__state__transition, uint_t, IDLE_STATE_C0); + cpu_dtrace_idle_probe(IDLE_STATE_C0); /* * We're no longer halted @@ -510,7 +562,7 @@ cpu_wakeup(cpu_t *cpu, int bound) /* * Idle the present CPU until awoken via touching its monitored line */ -static void +void cpu_idle_mwait(void) { volatile uint32_t *mcpu_mwait = CPU->cpu_m.mcpu_mwait; @@ -520,7 +572,7 @@ cpu_idle_mwait(void) int hset_update = 1; /* - * Set our mcpu_mwait here, so we can tell if anyone trys to + * Set our mcpu_mwait here, so we can tell if anyone tries to * wake us between now and when we call mwait. No other cpu will * attempt to set our mcpu_mwait until we add ourself to the halted * CPU bitmap. @@ -529,7 +581,7 @@ cpu_idle_mwait(void) /* * If this CPU is online, and there's multiple CPUs - * in the system, then we should notate our halting + * in the system, then we should note our halting * by adding ourselves to the partition's halted CPU * bitmap. This allows other CPUs to find/awaken us when * work becomes available. @@ -543,7 +595,7 @@ cpu_idle_mwait(void) * * When a thread becomes runnable, it is placed on the queue * and then the halted CPU bitmap is checked to determine who - * (if anyone) should be awoken. We therefore need to first + * (if anyone) should be awakened. We therefore need to first * add ourselves to the bitmap, and and then check if there * is any work available. * @@ -580,13 +632,13 @@ cpu_idle_mwait(void) */ i86_monitor(mcpu_mwait, 0, 0); if (*mcpu_mwait == MWAIT_HALTED) { - DTRACE_PROBE1(idle__state__transition, uint_t, IDLE_STATE_C1); + cpu_dtrace_idle_probe(IDLE_STATE_C1); tlb_going_idle(); i86_mwait(0, 0); tlb_service(); - DTRACE_PROBE1(idle__state__transition, uint_t, IDLE_STATE_C0); + cpu_dtrace_idle_probe(IDLE_STATE_C0); } /* @@ -858,14 +910,23 @@ mach_init() (*pops->psm_softinit)(); /* - * Initialize the dispatcher's function hooks - * to enable CPU halting when idle. + * Initialize the dispatcher's function hooks to enable CPU halting + * when idle. Set both the deep-idle and non-deep-idle hooks. + * + * Assume we can use power saving deep-idle loop cpu_idle_adaptive. + * Platform deep-idle driver will reset our idle loop to + * non_deep_idle_cpu if power saving deep-idle feature is not available. + * * Do not use monitor/mwait if idle_cpu_use_hlt is not set(spin idle) * or idle_cpu_prefer_mwait is not set. * Allocate monitor/mwait buffer for cpu0. */ +#ifndef __xpv + non_deep_idle_disp_enq_thread = disp_enq_thread; +#endif if (idle_cpu_use_hlt) { - idle_cpu = cpu_idle; + idle_cpu = cpu_idle_adaptive; + CPU->cpu_m.mcpu_idle_cpu = cpu_idle; #ifndef __xpv if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait) { CPU->cpu_m.mcpu_mwait = cpuid_mwait_alloc(CPU); @@ -878,12 +939,20 @@ mach_init() "handle cpu 0 mwait size."); #endif idle_cpu_prefer_mwait = 0; - idle_cpu = cpu_idle; + CPU->cpu_m.mcpu_idle_cpu = cpu_idle; } else { - idle_cpu = cpu_idle_mwait; + CPU->cpu_m.mcpu_idle_cpu = cpu_idle_mwait; } } else { - idle_cpu = cpu_idle; + CPU->cpu_m.mcpu_idle_cpu = cpu_idle; + } + non_deep_idle_cpu = CPU->cpu_m.mcpu_idle_cpu; + + /* + * Disable power saving deep idle loop? + */ + if (idle_cpu_no_deep_c) { + idle_cpu = non_deep_idle_cpu; } #endif } @@ -970,6 +1039,7 @@ mach_smpinit(void) #ifndef __xpv if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait) disp_enq_thread = cpu_wakeup_mwait; + non_deep_idle_disp_enq_thread = disp_enq_thread; #endif } |
