diff options
author | Haik Aftandilian <Haik.Aftandilian@Sun.COM> | 2009-11-23 16:18:43 -0800 |
---|---|---|
committer | Haik Aftandilian <Haik.Aftandilian@Sun.COM> | 2009-11-23 16:18:43 -0800 |
commit | 023e71de9e5670cebc23dd51162833661d3d2d3b (patch) | |
tree | 04e1e1fc10664466485a6a33787082596ea0f28a /usr/src/uts | |
parent | f6cf9e5015c0407e421e650c54e2cc47c8ea3546 (diff) | |
download | illumos-gate-023e71de9e5670cebc23dd51162833661d3d2d3b.tar.gz |
6741065 Solaris should support cooperative migration
6873667 a domain running Sun Cluster should notify the cluster framework when it is migrating
FWARC 2009/559 Domain Suspend Domain Service
FWARC 2009/452 HV APIs for cooperative guest migration
PSARC 2009/589 LDOM-SunCluster suspend callbacks
Diffstat (limited to 'usr/src/uts')
40 files changed, 1685 insertions, 395 deletions
diff --git a/usr/src/uts/common/os/cpu.c b/usr/src/uts/common/os/cpu.c index 0e2dea355a..009598f03f 100644 --- a/usr/src/uts/common/os/cpu.c +++ b/usr/src/uts/common/os/cpu.c @@ -1802,7 +1802,7 @@ cpu_del_unit(int cpuid) * Tear down the CPU's physical ID cache, and update any * processor groups */ - pg_cpu_fini(cp); + pg_cpu_fini(cp, NULL); pghw_physid_destroy(cp); /* diff --git a/usr/src/uts/common/os/pg.c b/usr/src/uts/common/os/pg.c index 3de3c02f4b..067670dbbb 100644 --- a/usr/src/uts/common/os/pg.c +++ b/usr/src/uts/common/os/pg.c @@ -256,7 +256,7 @@ pg_cpu0_init(void) */ mutex_enter(&cpu_lock); - pg_cpu_init(CPU); + (void) pg_cpu_init(CPU, B_FALSE); pg_cpupart_in(CPU, &cp_default); pg_cpu_active(CPU); @@ -276,9 +276,9 @@ pg_cpu0_reinit(void) mutex_enter(&cpu_lock); pg_cpu_inactive(CPU); pg_cpupart_out(CPU, &cp_default); - pg_cpu_fini(CPU); + pg_cpu_fini(CPU, NULL); - pg_cpu_init(CPU); + (void) pg_cpu_init(CPU, B_FALSE); pg_cpupart_in(CPU, &cp_default); pg_cpu_active(CPU); mutex_exit(&cpu_lock); @@ -531,14 +531,22 @@ pg_cpu_data_free(cpu_pg_t *pgd) } /* - * A new CPU is coming into the system, either via booting or DR. - * Allocate it's PG data, and notify all registered classes about + * Called when either a new CPU is coming into the system (either + * via booting or DR) or when the CPU's PG data is being recalculated. + * Allocate its PG data, and notify all registered classes about * the new CPU. * + * If "deferred_init" is B_TRUE, the CPU's PG data will be allocated + * and returned, but the "bootstrap" structure will be left in place. + * The deferred_init option is used when all CPUs in the system are + * using the bootstrap structure as part of the process of recalculating + * all PG data. The caller must replace the bootstrap structure with the + * allocated PG data before pg_cpu_active is called. + * * This routine may block. */ -void -pg_cpu_init(cpu_t *cp) +cpu_pg_t * +pg_cpu_init(cpu_t *cp, boolean_t deferred_init) { pg_cid_t i; cpu_pg_t *cpu_pg; @@ -569,35 +577,48 @@ pg_cpu_init(cpu_t *cp) /* * The CPU's PG data is now ready to use. */ - cp->cpu_pg = cpu_pg; + if (deferred_init == B_FALSE) + cp->cpu_pg = cpu_pg; + + return (cpu_pg); } /* - * This CPU is being deleted from the system. Notify the classes - * and free up the CPU's PG data. + * Either this CPU is being deleted from the system or its PG data is + * being recalculated. Notify the classes and free up the CPU's PG data. + * + * If "cpu_pg_deferred" is non-NULL, it points to the CPU's PG data and + * serves to indicate that this CPU is already using the bootstrap + * stucture. Used as part of the process to recalculate the PG data for + * all CPUs in the system. */ void -pg_cpu_fini(cpu_t *cp) +pg_cpu_fini(cpu_t *cp, cpu_pg_t *cpu_pg_deferred) { pg_cid_t i; cpu_pg_t *cpu_pg; ASSERT(MUTEX_HELD(&cpu_lock)); - cpu_pg = cp->cpu_pg; + if (cpu_pg_deferred == NULL) { + cpu_pg = cp->cpu_pg; - /* - * This can happen if the CPU coming into the system - * failed to power on. - */ - if (cpu_pg == NULL || pg_cpu_is_bootstrapped(cp)) - return; + /* + * This can happen if the CPU coming into the system + * failed to power on. + */ + if (cpu_pg == NULL || pg_cpu_is_bootstrapped(cp)) + return; - /* - * Have the CPU reference the bootstrap PG data to survive - * the dispatcher should it block from here on out. - */ - pg_cpu_bootstrap(cp); + /* + * Have the CPU reference the bootstrap PG data to survive + * the dispatcher should it block from here on out. + */ + pg_cpu_bootstrap(cp); + } else { + ASSERT(pg_cpu_is_bootstrapped(cp)); + cpu_pg = cpu_pg_deferred; + } for (i = 0; i < pg_nclasses; i++) PG_CPU_FINI(i, cp, cpu_pg); diff --git a/usr/src/uts/common/sys/pg.h b/usr/src/uts/common/sys/pg.h index a643e5b632..0a61530982 100644 --- a/usr/src/uts/common/sys/pg.h +++ b/usr/src/uts/common/sys/pg.h @@ -159,8 +159,8 @@ pg_cid_t pg_class_register(char *, struct pg_ops *, pg_relation_t); * PG CPU reconfiguration hooks */ void pg_cpu0_init(void); -void pg_cpu_init(cpu_t *); -void pg_cpu_fini(cpu_t *); +cpu_pg_t *pg_cpu_init(cpu_t *, boolean_t deferred_init); +void pg_cpu_fini(cpu_t *, cpu_pg_t *cpu_pg_deferred); void pg_cpu_active(cpu_t *); void pg_cpu_inactive(cpu_t *); void pg_cpu_startup(cpu_t *); diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c index cd171eca2c..d9369d0b5c 100644 --- a/usr/src/uts/i86pc/os/mp_startup.c +++ b/usr/src/uts/i86pc/os/mp_startup.c @@ -1587,7 +1587,7 @@ mp_startup(void) */ mutex_enter(&cpu_lock); pghw_physid_create(cp); - pg_cpu_init(cp); + (void) pg_cpu_init(cp, B_FALSE); pg_cmt_cpu_startup(cp); mutex_exit(&cpu_lock); diff --git a/usr/src/uts/sparc/sys/simulate.h b/usr/src/uts/sparc/sys/simulate.h index 56bc95f7bb..c2ce9377b4 100644 --- a/usr/src/uts/sparc/sys/simulate.h +++ b/usr/src/uts/sparc/sys/simulate.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_SIMULATE_H #define _SYS_SIMULATE_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -146,6 +143,7 @@ extern "C" { extern int32_t fetch_user_instr(caddr_t); extern int simulate_unimp(struct regs *, caddr_t *); extern int simulate_lddstd(struct regs *, caddr_t *); +extern int simulate_rdtick(struct regs *); extern int do_unaligned(struct regs *, caddr_t *); extern int calc_memaddr(struct regs *, caddr_t *); extern int is_atomic(struct regs *); diff --git a/usr/src/uts/sparc/v9/ml/sparcv9_subr.s b/usr/src/uts/sparc/v9/ml/sparcv9_subr.s index cb23034e4b..a4c0eac3c8 100644 --- a/usr/src/uts/sparc/v9/ml/sparcv9_subr.s +++ b/usr/src/uts/sparc/v9/ml/sparcv9_subr.s @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * General assembly language routines. * It is the intent of this file to contain routines that are @@ -1275,6 +1273,13 @@ sulword_noerr(void *addr, ulong_t valuep) #endif /* lint */ +/* + * We define rdtick here, but not for sun4v. On sun4v systems, the %tick + * and %stick should not be read directly without considering the tick + * and stick offset kernel variables introduced to support sun4v OS + * suspension. + */ +#if !defined (sun4v) #if defined (lint) @@ -1282,12 +1287,16 @@ hrtime_t rdtick() { return (0); } -#else +#else /* lint */ + ENTRY(rdtick) retl rd %tick, %o0 SET_SIZE(rdtick) -#endif + +#endif /* lint */ + +#endif /* !sun4v */ /* * Set tba to given address, no side effects. diff --git a/usr/src/uts/sparc/v9/ml/syscall_trap.s b/usr/src/uts/sparc/v9/ml/syscall_trap.s index 72c131afb6..abd40db34a 100644 --- a/usr/src/uts/sparc/v9/ml/syscall_trap.s +++ b/usr/src/uts/sparc/v9/ml/syscall_trap.s @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * System call trap handler. */ @@ -112,7 +109,7 @@ syscall_trap(struct regs *rp) /* for tags only; not called from C */ andn %l3, PSTATE_IE | PSTATE_AM, %g3 wrpr %g0, %g3, %pstate ! disable interrupt TRACE_PTR(%g3, %g2) ! get trace pointer - GET_TRACE_TICK(%g1) + GET_TRACE_TICK(%g1, %g2) stxa %g1, [%g3 + TRAP_ENT_TICK]%asi ldx [%l1 + G1_OFF], %g1 ! get syscall code TRACE_SAVE_TL_VAL(%g3, %g1) @@ -195,7 +192,7 @@ syscall_trap(struct regs *rp) /* for tags only; not called from C */ andn %g5, PSTATE_IE | PSTATE_AM, %g4 wrpr %g0, %g4, %pstate ! disable interrupt TRACE_PTR(%g4, %g2) ! get trace pointer - GET_TRACE_TICK(%g2) + GET_TRACE_TICK(%g2, %g3) stxa %g2, [%g4 + TRAP_ENT_TICK]%asi lduh [THREAD_REG + T_SYSNUM], %g2 TRACE_SAVE_TL_VAL(%g4, %g2) @@ -357,7 +354,7 @@ syscall_trap32(void) /* for tags only - trap handler - not called from C */ andn %l3, PSTATE_IE | PSTATE_AM, %g3 wrpr %g0, %g3, %pstate ! disable interrupt TRACE_PTR(%g3, %g2) ! get trace pointer - GET_TRACE_TICK(%g1) + GET_TRACE_TICK(%g1, %g2) stxa %g1, [%g3 + TRAP_ENT_TICK]%asi ldx [%l1 + G1_OFF], %g1 ! get syscall code TRACE_SAVE_TL_VAL(%g3, %g1) @@ -453,7 +450,7 @@ syscall_trap32(void) /* for tags only - trap handler - not called from C */ andn %g5, PSTATE_IE | PSTATE_AM, %g4 wrpr %g0, %g4, %pstate ! disable interrupt TRACE_PTR(%g4, %g2) ! get trace pointer - GET_TRACE_TICK(%g2) + GET_TRACE_TICK(%g2, %g3) stxa %g2, [%g4 + TRAP_ENT_TICK]%asi lduh [THREAD_REG + T_SYSNUM], %g2 TRACE_SAVE_TL_VAL(%g4, %g2) diff --git a/usr/src/uts/sparc/v9/os/simulator.c b/usr/src/uts/sparc/v9/os/simulator.c index 5fdee6efbb..8897ea8706 100644 --- a/usr/src/uts/sparc/v9/os/simulator.c +++ b/usr/src/uts/sparc/v9/os/simulator.c @@ -51,6 +51,7 @@ #include <sys/model.h> #include <vm/seg_vn.h> #include <sys/byteorder.h> +#include <sys/time.h> #define IS_IBIT_SET(x) (x & 0x2000) #define IS_VIS1(op, op3)(op == 2 && op3 == 0x36) @@ -1138,6 +1139,46 @@ simulate_unimp(struct regs *rp, caddr_t *badaddr) } /* + * Simulate a "rd %tick" or "rd %stick" (%asr24) instruction. + */ +int +simulate_rdtick(struct regs *rp) +{ + uint_t inst, op, op3, rd, rs1, i; + caddr_t badaddr; + + inst = fetch_user_instr((caddr_t)rp->r_pc); + op = (inst >> 30) & 0x3; + rd = (inst >> 25) & 0x1F; + op3 = (inst >> 19) & 0x3F; + i = (inst >> 13) & 0x1; + + /* + * Make sure this is either a %tick read (rs1 == 0x4) or + * a %stick read (rs1 == 0x18) instruction. + */ + if (op == 2 && op3 == 0x28 && i == 0) { + rs1 = (inst >> 14) & 0x1F; + + if (rs1 == 0x4) { + uint64_t tick; + (void) flush_user_windows_to_stack(NULL); + tick = gettick_counter(); + if (putreg(&tick, rp, rd, &badaddr) == 0) + return (SIMU_SUCCESS); + } else if (rs1 == 0x18) { + uint64_t stick; + (void) flush_user_windows_to_stack(NULL); + stick = gethrtime_unscaled(); + if (putreg(&stick, rp, rd, &badaddr) == 0) + return (SIMU_SUCCESS); + } + } + + return (SIMU_FAULT); +} + +/* * Get the value of a register for instruction simulation * by using the regs or window structure pointers. * Return 0 for success, and -1 for failure. If there is a failure, diff --git a/usr/src/uts/sun4/cpu/cpu_module.c b/usr/src/uts/sun4/cpu/cpu_module.c index e026efa8de..a0fa2c037b 100644 --- a/usr/src/uts/sun4/cpu/cpu_module.c +++ b/usr/src/uts/sun4/cpu/cpu_module.c @@ -43,6 +43,8 @@ uint_t adj_shift; hrtime_t hrtime_base; int traptrace_use_stick; uint_t cpu_impl_dual_pgsz; +uint64_t native_tick_offset; +uint64_t native_stick_offset; void cpu_setup(void) diff --git a/usr/src/uts/sun4/ml/cpc_hwreg.s b/usr/src/uts/sun4/ml/cpc_hwreg.s index 45686899b6..b357183d72 100644 --- a/usr/src/uts/sun4/ml/cpc_hwreg.s +++ b/usr/src/uts/sun4/ml/cpc_hwreg.s @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 1999-2001,2003 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Routines for manipulating the UltraSPARC performance * counter registers (%pcr and %pic) @@ -56,10 +53,6 @@ uint64_t ultra_getpic(void) { return (0); } -uint64_t -ultra_gettick(void) -{ return (0); } - #else /* lint || __lint */ ENTRY(ultra_setpcr) @@ -92,17 +85,4 @@ ultra_gettick(void) rd %pic, %o0 SET_SIZE(ultra_getpic) -/* - * This isn't the routine you're looking for. - * - * The routine simply returns the value of %tick on the *current* processor. - * Most of the time, gettick() [which in turn maps to %stick on platforms - * that have different CPU %tick rates] is what you want. - */ - - ENTRY(ultra_gettick) - retl - rdpr %tick, %o0 - SET_SIZE(ultra_gettick) - #endif /* lint || __lint */ diff --git a/usr/src/uts/sun4/ml/interrupt.s b/usr/src/uts/sun4/ml/interrupt.s index 302dbc0777..3d7ff2de29 100644 --- a/usr/src/uts/sun4/ml/interrupt.s +++ b/usr/src/uts/sun4/ml/interrupt.s @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #if defined(lint) #include <sys/types.h> #include <sys/thread.h> @@ -103,8 +101,6 @@ pil_interrupt(int level) 2: #ifdef TRAPTRACE TRACE_PTR(%g5, %g6) - GET_TRACE_TICK(%g6) - stxa %g6, [%g5 + TRAP_ENT_TICK]%asi ! trap_tick = %tick TRACE_SAVE_TL_GL_REGS(%g5, %g6) rdpr %tt, %g6 stha %g6, [%g5 + TRAP_ENT_TT]%asi ! trap_type = %tt @@ -115,6 +111,8 @@ pil_interrupt(int level) stna %sp, [%g5 + TRAP_ENT_SP]%asi ! trap_sp = %sp stna %g2, [%g5 + TRAP_ENT_TR]%asi ! trap_tr = first intr_vec stna %g3, [%g5 + TRAP_ENT_F1]%asi ! trap_f1 = next intr_vec + GET_TRACE_TICK(%g6, %g3) + stxa %g6, [%g5 + TRAP_ENT_TICK]%asi ! trap_tick = %tick sll %g4, CPTRSHIFT, %g3 add %g1, INTR_HEAD, %g6 ldn [%g6 + %g3], %g6 ! %g6=cpu->m_cpu.intr_head[pil] @@ -276,13 +274,13 @@ _spurious: rdpr %pstate, os3; \ andn os3, PSTATE_IE | PSTATE_AM, os2; \ wrpr %g0, os2, %pstate; \ - TRACE_PTR(os1, os2); \ + TRACE_PTR(os1, os2); \ ldn [os4 + PC_OFF], os2; \ stna os2, [os1 + TRAP_ENT_TPC]%asi; \ ldx [os4 + TSTATE_OFF], os2; \ stxa os2, [os1 + TRAP_ENT_TSTATE]%asi; \ mov os3, os4; \ - GET_TRACE_TICK(os2); \ + GET_TRACE_TICK(os2, os3); \ stxa os2, [os1 + TRAP_ENT_TICK]%asi; \ TRACE_SAVE_TL_GL_REGS(os1, os2); \ set TT_SERVE_INTR, os2; \ @@ -309,11 +307,11 @@ _spurious: rdpr %pstate, os3; \ andn os3, PSTATE_IE | PSTATE_AM, os2; \ wrpr %g0, os2, %pstate; \ - TRACE_PTR(os1, os2); \ + TRACE_PTR(os1, os2); \ stna %g0, [os1 + TRAP_ENT_TPC]%asi; \ stxa %g0, [os1 + TRAP_ENT_TSTATE]%asi; \ mov os3, os4; \ - GET_TRACE_TICK(os2); \ + GET_TRACE_TICK(os2, os3); \ stxa os2, [os1 + TRAP_ENT_TICK]%asi; \ TRACE_SAVE_TL_GL_REGS(os1, os2); \ set TT_SERVE_INTR, os2; \ @@ -391,9 +389,7 @@ intr_thread(struct regs *regs, uint64_t iv_p, uint_t pil) ! resume() hasn't yet stored a timestamp for it. Or, it could be in ! swtch() after its slice has been accounted for. ! Only account for the time slice if the starting timestamp is non-zero. - rdpr %tick, %o4 ! delay - sllx %o4, 1, %o4 ! shift off NPT bit - srlx %o4, 1, %o4 + RD_TICK(%o4,%l2,%l3,__LINE__) sub %o4, %o3, %o4 ! o4 has interval ! A high-level interrupt in current_thread() interrupting here @@ -522,9 +518,7 @@ intr_thread(struct regs *regs, uint64_t iv_p, uint_t pil) add THREAD_REG, T_INTR_START, %o3 1: ldx [%o3], %o5 - rdpr %tick, %o4 - sllx %o4, 1, %o4 - srlx %o4, 1, %o4 ! shift off NPT bit + RD_TICK(%o4,%l2,%l3,__LINE__) casx [%o3], %o5, %o4 cmp %o4, %o5 ! If a high-level interrupt occurred while we were attempting to store @@ -589,9 +583,7 @@ intr_thread(struct regs *regs, uint64_t iv_p, uint_t pil) or %o0, %lo(intr_thread_t_intr_start_zero), %o0 9: #endif /* DEBUG */ - rdpr %tick, %o1 - sllx %o1, 1, %o1 - srlx %o1, 1, %o1 ! shift off NPT bit + RD_TICK(%o1,%l2,%l3,__LINE__) sub %o1, %o0, %l2 ! l2 has interval ! ! The general outline of what the code here does is: @@ -781,9 +773,7 @@ intr_thread(struct regs *regs, uint64_t iv_p, uint_t pil) add THREAD_REG, T_INTR_START, %o3 ! o3 has &curthread->t_intr_star 0: ldx [%o3], %o4 ! o4 = t_intr_start before - rdpr %tick, %o5 - sllx %o5, 1, %o5 - srlx %o5, 1, %o5 ! shift off NPT bit + RD_TICK(%o5,%l2,%l3,__LINE__) casx [%o3], %o4, %o5 ! put o5 in ts if o4 == ts after cmp %o4, %o5 ! If a high-level interrupt occurred while we were attempting to store @@ -824,7 +814,7 @@ intr_thread(struct regs *regs, uint64_t iv_p, uint_t pil) andn %l2, PSTATE_IE | PSTATE_AM, %o4 wrpr %g0, %o4, %pstate ! cpu to known state TRACE_PTR(%o4, %o5) - GET_TRACE_TICK(%o5) + GET_TRACE_TICK(%o5, %o0) stxa %o5, [%o4 + TRAP_ENT_TICK]%asi TRACE_SAVE_TL_GL_REGS(%o4, %o5) set TT_INTR_EXIT, %o5 @@ -1056,9 +1046,7 @@ no_onfault: or %o0, %lo(current_thread_nested_pil_zero), %o0 9: #endif /* DEBUG */ - rdpr %tick, %l1 - sllx %l1, 1, %l1 - srlx %l1, 1, %l1 ! shake off NPT bit + RD_TICK_NO_SUSPEND_CHECK(%l1, %l2) sub %l1, %l3, %l3 ! interval in %l3 ! ! Check for Energy Star mode @@ -1113,9 +1101,7 @@ no_onfault: nop stx %g0, [THREAD_REG + T_INTR_START] - rdpr %tick, %o4 - sllx %o4, 1, %o4 - srlx %o4, 1, %o4 ! shake off NPT bit + RD_TICK_NO_SUSPEND_CHECK(%o4, %l2) sub %o4, %o5, %o5 ! o5 has the interval ! Check for Energy Star mode @@ -1168,9 +1154,7 @@ no_onfault: sllx %o4, 3, %o4 ! index to byte offset add %o4, CPU_MCPU, %o4 ! CPU_PIL_HIGH_START is too large add %o4, MCPU_PIL_HIGH_START, %o4 - rdpr %tick, %o5 - sllx %o5, 1, %o5 - srlx %o5, 1, %o5 + RD_TICK_NO_SUSPEND_CHECK(%o5, %l2) stx %o5, [%o3 + %o4] wrpr %g0, %o2, %pil ! enable interrupts @@ -1260,9 +1244,7 @@ current_thread_complete: sllx %o4, 3, %o4 ! index to byte offset add %o4, CPU_MCPU, %o4 ! CPU_PIL_HIGH_START is too large add %o4, MCPU_PIL_HIGH_START, %o4 - rdpr %tick, %o5 - sllx %o5, 1, %o5 - srlx %o5, 1, %o5 + RD_TICK_NO_SUSPEND_CHECK(%o5, %o0) ldx [%o3 + %o4], %o0 #ifdef DEBUG ! ASSERT(cpu.cpu_m.pil_high_start[pil - (LOCK_LEVEL + 1)] != 0) @@ -1352,9 +1334,7 @@ current_thread_complete: sll %o5, 3, %o5 ! convert array index to byte offset add %o5, CPU_MCPU, %o5 ! CPU_PIL_HIGH_START is too large add %o5, MCPU_PIL_HIGH_START, %o5 - rdpr %tick, %o4 - sllx %o4, 1, %o4 - srlx %o4, 1, %o4 + RD_TICK_NO_SUSPEND_CHECK(%o4, %l2) ! Another high-level interrupt is active below this one, so ! there is no need to check for an interrupt thread. That will be ! done by the lowest priority high-level interrupt active. @@ -1369,9 +1349,7 @@ current_thread_complete: bz,pt %xcc, 7f nop - rdpr %tick, %o4 - sllx %o4, 1, %o4 - srlx %o4, 1, %o4 ! Shake off NPT bit + RD_TICK_NO_SUSPEND_CHECK(%o4, %l2) stx %o4, [THREAD_REG + T_INTR_START] 7: @@ -1694,7 +1672,7 @@ setsoftint_tl1(uint64_t iv_p, uint64_t dummy) 2: #ifdef TRAPTRACE TRACE_PTR(%g5, %g6) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g3) stxa %g6, [%g5 + TRAP_ENT_TICK]%asi ! trap_tick = %tick TRACE_SAVE_TL_GL_REGS(%g5, %g6) rdpr %tt, %g6 @@ -1829,8 +1807,6 @@ setvecint_tl1(uint64_t inum, uint64_t dummy) 3: #ifdef TRAPTRACE TRACE_PTR(%g5, %g6) - GET_TRACE_TICK(%g6) - stxa %g6, [%g5 + TRAP_ENT_TICK]%asi ! trap_tick = %tick TRACE_SAVE_TL_GL_REGS(%g5, %g6) rdpr %tt, %g6 stha %g6, [%g5 + TRAP_ENT_TT]%asi ! trap_type = %tt` @@ -1848,6 +1824,8 @@ setvecint_tl1(uint64_t inum, uint64_t dummy) ldn [%g6 + %g7], %g6 ! %g6=cpu->m_cpu.intr_tail[pil] stna %g6, [%g5 + TRAP_ENT_F3]%asi ! trap_f3 = intr_tail[pil] stna %g2, [%g5 + TRAP_ENT_F4]%asi ! trap_f4 = pil + GET_TRACE_TICK(%g6, %g7) + stxa %g6, [%g5 + TRAP_ENT_TICK]%asi ! trap_tick = %tick TRACE_NEXT(%g5, %g6, %g7) #endif /* TRAPTRACE */ mov 1, %g6 ! %g6 = 1 @@ -2213,9 +2191,7 @@ intr_get_time(void) ! Calculate elapsed time since t_intr_start. Update t_intr_start, ! get delta, and multiply by cpu_divisor if necessary. ! - rdpr %tick, %o2 - sllx %o2, 1, %o2 - srlx %o2, 1, %o2 + RD_TICK_NO_SUSPEND_CHECK(%o2, %o0) stx %o2, [THREAD_REG + T_INTR_START] sub %o2, %o3, %o0 diff --git a/usr/src/uts/sun4/ml/swtch.s b/usr/src/uts/sun4/ml/swtch.s index 0714c2a344..d88d2bdc67 100644 --- a/usr/src/uts/sun4/ml/swtch.s +++ b/usr/src/uts/sun4/ml/swtch.s @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -38,6 +38,7 @@ #include <sys/mmu.h> #include <sys/pcb.h> #include <sys/machthread.h> +#include <sys/machclock.h> #include <sys/privregs.h> #include <sys/vtrace.h> #include <vm/hat_sfmmu.h> @@ -433,9 +434,7 @@ resume(kthread_id_t t) add THREAD_REG, T_INTR_START, %o2 1: ldx [%o2], %o1 - rdpr %tick, %o0 - sllx %o0, 1, %o0 - srlx %o0, 1, %o0 ! shift off NPT bit + RD_TICK(%o0,%o3,%g5,__LINE__) casx [%o2], %o1, %o0 cmp %o0, %o1 be,pt %xcc, 5b @@ -595,9 +594,7 @@ resume_from_intr(kthread_id_t t) add THREAD_REG, T_INTR_START, %o2 2: ldx [%o2], %o1 - rdpr %tick, %o0 - sllx %o0, 1, %o0 - srlx %o0, 1, %o0 ! shift off NPT bit + RD_TICK(%o0,%o3,%l1,__LINE__) casx [%o2], %o1, %o0 cmp %o0, %o1 bne,pn %xcc, 2b diff --git a/usr/src/uts/sun4/os/mp_startup.c b/usr/src/uts/sun4/os/mp_startup.c index 49ec390172..842f1ec50d 100644 --- a/usr/src/uts/sun4/os/mp_startup.c +++ b/usr/src/uts/sun4/os/mp_startup.c @@ -405,7 +405,7 @@ setup_cpu_common(int cpuid) * Initialize the CPUs physical ID cache, and processor groups */ pghw_physid_create(cp); - pg_cpu_init(cp); + (void) pg_cpu_init(cp, B_FALSE); if ((rval = cpu_intrq_setup(cp)) != 0) { return (rval); diff --git a/usr/src/uts/sun4/os/trap.c b/usr/src/uts/sun4/os/trap.c index d3a2df1e09..5b69f70ada 100644 --- a/usr/src/uts/sun4/os/trap.c +++ b/usr/src/uts/sun4/os/trap.c @@ -158,6 +158,9 @@ trap(struct regs *rp, caddr_t addr, uint32_t type, uint32_t mmu_fsr) int watchpage; extern faultcode_t pagefault(caddr_t, enum fault_type, enum seg_rw, int); +#ifdef sun4v + extern boolean_t tick_stick_emulation_active; +#endif /* sun4v */ CPU_STATS_ADDQ(CPU, sys, trap, 1); @@ -851,7 +854,27 @@ trap(struct regs *rp, caddr_t addr, uint32_t type, uint32_t mmu_fsr) case T_PRIV_INSTR + T_USER: /* privileged instruction fault */ if (tudebug) showregs(type, rp, (caddr_t)0, 0); + bzero(&siginfo, sizeof (siginfo)); +#ifdef sun4v + /* + * If this instruction fault is a non-privileged %tick + * or %stick trap, and %tick/%stick user emulation is + * enabled as a result of an OS suspend, then simulate + * the register read. We rely on simulate_rdtick to fail + * if the instruction is not a %tick or %stick read, + * causing us to fall through to the normal privileged + * instruction handling. + */ + if (tick_stick_emulation_active && + (X_FAULT_TYPE(mmu_fsr) == FT_NEW_PRVACT) && + simulate_rdtick(rp) == SIMU_SUCCESS) { + /* skip the successfully simulated instruction */ + rp->r_pc = rp->r_npc; + rp->r_npc += 4; + goto out; + } +#endif siginfo.si_signo = SIGILL; siginfo.si_code = ILL_PRVOPC; siginfo.si_addr = (caddr_t)rp->r_pc; diff --git a/usr/src/uts/sun4/sys/clock.h b/usr/src/uts/sun4/sys/clock.h index ecb53b4f27..f1e113062c 100644 --- a/usr/src/uts/sun4/sys/clock.h +++ b/usr/src/uts/sun4/sys/clock.h @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_CLOCK_H #define _SYS_CLOCK_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -269,117 +267,11 @@ typedef struct { mov -1, out; \ srlx out, NSEC_SHIFT, out - -/* - * The following macros are only for use in the cpu module. - */ -#if defined(CPU_MODULE) - /* * NSEC_SHIFT and VTRACE_SHIFT constants are defined in * <sys/machclock.h> file. */ - -/* - * NOTE: the macros below assume that the various time-related variables - * (hrestime, hrestime_adj, hres_last_tick, timedelta, nsec_scale, etc) - * are all stored together on a 64-byte boundary. The primary motivation - * is cache performance, but we also take advantage of a convenient side - * effect: these variables all have the same high 22 address bits, so only - * one sethi is needed to access them all. - */ - -/* - * GET_HRESTIME() returns the value of hrestime, hrestime_adj and the - * number of nanoseconds since the last clock tick ('nslt'). It also - * sets 'nano' to the value NANOSEC (one billion). - * - * This macro assumes that all registers are globals or outs so they can - * safely contain 64-bit data, and that it's safe to use the label "5:". - * Further, this macro calls the NATIVE_TIME_TO_NSEC_SCALE which in turn - * uses the labels "6:" and "7:"; labels "5:", "6:" and "7:" must not - * be used across invocations of this macro. - */ -#define GET_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano, scr, hrlock, \ - gnt1, gnt2) \ -5: sethi %hi(hres_lock), scr; \ - lduw [scr + %lo(hres_lock)], hrlock; /* load clock lock */ \ - lduw [scr + %lo(nsec_scale)], nano; /* tick-to-ns factor */ \ - andn hrlock, 1, hrlock; /* see comments above! */ \ - ldx [scr + %lo(hres_last_tick)], nslt; \ - ldn [scr + %lo(hrestime)], hrestsec; /* load hrestime.sec */\ - add scr, %lo(hrestime), hrestnsec; \ - ldn [hrestnsec + CLONGSIZE], hrestnsec; \ - GET_NATIVE_TIME(adj, gnt1, gnt2); /* get current %tick */ \ - subcc adj, nslt, nslt; /* nslt = ticks since last clockint */ \ - movneg %xcc, %g0, nslt; /* ignore neg delta from tick skew */ \ - ldx [scr + %lo(hrestime_adj)], adj; /* load hrestime_adj */ \ - /* membar #LoadLoad; (see comment (2) above) */ \ - lduw [scr + %lo(hres_lock)], scr; /* load clock lock */ \ - NATIVE_TIME_TO_NSEC_SCALE(nslt, nano, gnt1, NSEC_SHIFT); \ - sethi %hi(NANOSEC), nano; \ - xor hrlock, scr, scr; \ -/* CSTYLED */ \ - brnz,pn scr, 5b; \ - or nano, %lo(NANOSEC), nano; - -/* - * Similar to above, but returns current gethrtime() value in 'base'. - */ -#define GET_HRTIME(base, now, nslt, scale, scr, hrlock, gnt1, gnt2) \ -5: sethi %hi(hres_lock), scr; \ - lduw [scr + %lo(hres_lock)], hrlock; /* load clock lock */ \ - lduw [scr + %lo(nsec_scale)], scale; /* tick-to-ns factor */ \ - andn hrlock, 1, hrlock; /* see comments above! */ \ - ldx [scr + %lo(hres_last_tick)], nslt; \ - ldx [scr + %lo(hrtime_base)], base; /* load hrtime_base */ \ - GET_NATIVE_TIME(now, gnt1, gnt2); /* get current %tick */ \ - subcc now, nslt, nslt; /* nslt = ticks since last clockint */ \ - movneg %xcc, %g0, nslt; /* ignore neg delta from tick skew */ \ - /* membar #LoadLoad; (see comment (2) above) */ \ - ld [scr + %lo(hres_lock)], scr; /* load clock lock */ \ - NATIVE_TIME_TO_NSEC_SCALE(nslt, scale, gnt1, NSEC_SHIFT); \ - xor hrlock, scr, scr; \ -/* CSTYLED */ \ - brnz,pn scr, 5b; \ - add base, nslt, base; - -/* - * Maximum-performance timestamp for kernel tracing. We don't bother - * clearing NPT because vtrace expresses everything in 32-bit deltas, - * so only the low-order 32 bits matter. We do shift down a few bits, - * however, so that the trace framework doesn't emit a ridiculous number - * of 32_bit_elapsed_time records (trace points are more expensive when - * the time since the last trace point doesn't fit in a 16-bit delta). - * We currently shift by 4 (divide by 16) on the grounds that (1) there's - * no point making the timing finer-grained than the trace point latency, - * which exceeds 16 cycles; and (2) the cost and probe effect of many - * 32-bit time records far exceeds the cost of the 'srlx' instruction. - */ -#define GET_VTRACE_TIME(out, scr1, scr2) \ - GET_NATIVE_TIME(out, scr1, scr2); /* get current %tick */ \ - srlx out, VTRACE_SHIFT, out; - -/* - * Full 64-bit version for those truly rare occasions when you need it. - * Currently this is only needed to generate the TR_START_TIME record. - */ -#define GET_VTRACE_TIME_64(out, scr1, scr2) \ - GET_NATIVE_TIME(out, scr1, scr2); /* get current %tick */ \ - add out, out, out; \ - srlx out, VTRACE_SHIFT + 1, out; - -/* - * Return the rate at which the vtrace clock runs. - */ -#define GET_VTRACE_FREQUENCY(out, scr1, scr2) \ - sethi %hi(sys_tick_freq), out; \ - ldx [out + %lo(sys_tick_freq)], out; \ - srlx out, VTRACE_SHIFT, out; - -#endif /* CPU_MODULE */ - #ifdef __cplusplus } #endif diff --git a/usr/src/uts/sun4/sys/intreg.h b/usr/src/uts/sun4/sys/intreg.h index f0dec269ee..0f33ecbe81 100644 --- a/usr/src/uts/sun4/sys/intreg.h +++ b/usr/src/uts/sun4/sys/intreg.h @@ -19,17 +19,19 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_INTREG_H #define _SYS_INTREG_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/machintreg.h> +#ifndef _ASM +#include <sys/types.h> +#endif + #ifdef __cplusplus extern "C" { #endif diff --git a/usr/src/uts/sun4u/ml/mach_interrupt.s b/usr/src/uts/sun4u/ml/mach_interrupt.s index 03ab44a878..61e156d3cc 100644 --- a/usr/src/uts/sun4u/ml/mach_interrupt.s +++ b/usr/src/uts/sun4u/ml/mach_interrupt.s @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #if defined(lint) #include <sys/types.h> #include <sys/thread.h> @@ -130,7 +128,7 @@ vec_interrupt_resume: ldxa [%g2]ASI_INTR_RECEIVE, %g2 #ifdef TRAPTRACE TRACE_PTR(%g4, %g6) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g3) stxa %g6, [%g4 + TRAP_ENT_TICK]%asi rdpr %tl, %g6 stha %g6, [%g4 + TRAP_ENT_TL]%asi @@ -292,7 +290,7 @@ vec_intr_spurious(void) ld [%g2 + %lo(vec_spurious_cnt)], %g2 #ifdef TRAPTRACE TRACE_PTR(%g4, %g6) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g3) stxa %g6, [%g4 + TRAP_ENT_TICK]%asi rdpr %tl, %g6 stha %g6, [%g4 + TRAP_ENT_TL]%asi diff --git a/usr/src/uts/sun4u/ml/mach_subr_asm.s b/usr/src/uts/sun4u/ml/mach_subr_asm.s index 16ae6bec31..a49efc30f7 100644 --- a/usr/src/uts/sun4u/ml/mach_subr_asm.s +++ b/usr/src/uts/sun4u/ml/mach_subr_asm.s @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * General machine architecture & implementation specific * assembly language routines. @@ -46,6 +44,29 @@ #if defined(lint) +uint64_t +ultra_gettick(void) +{ return (0); } + +#else /* lint */ + +/* + * This isn't the routine you're looking for. + * + * The routine simply returns the value of %tick on the *current* processor. + * Most of the time, gettick() [which in turn maps to %stick on platforms + * that have different CPU %tick rates] is what you want. + */ + + ENTRY(ultra_gettick) + retl + rdpr %tick, %o0 + SET_SIZE(ultra_gettick) + +#endif /* lint */ + +#if defined(lint) + /*ARGSUSED*/ int getprocessorid(void) diff --git a/usr/src/uts/sun4u/ml/mach_xc.s b/usr/src/uts/sun4u/ml/mach_xc.s index 9a7bbe2591..a04c580ab1 100644 --- a/usr/src/uts/sun4u/ml/mach_xc.s +++ b/usr/src/uts/sun4u/ml/mach_xc.s @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #if defined(lint) #include <sys/types.h> #include <sys/cpuvar.h> @@ -78,7 +75,7 @@ self_xcall(struct cpu *cpu, uint64_t arg1, uint64_t arg2, xcfunc_t *func) #ifdef TRAPTRACE TRACE_PTR(%g4, %g6) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g3) stxa %g6, [%g4 + TRAP_ENT_TICK]%asi rdpr %tl, %g6 stha %g6, [%g4 + TRAP_ENT_TL]%asi @@ -122,7 +119,7 @@ xc_trace(u_int traptype, cpuset_t *cpu_set, xcfunc_t *func, andn %g1, PSTATE_IE | PSTATE_AM, %g2 wrpr %g0, %g2, %pstate /* disable interrupts */ TRACE_PTR(%g3, %g4) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g4) stxa %g6, [%g3 + TRAP_ENT_TICK]%asi stha %g0, [%g3 + TRAP_ENT_TL]%asi set TT_XCALL, %g2 diff --git a/usr/src/uts/sun4u/ml/trap_table.s b/usr/src/uts/sun4u/ml/trap_table.s index e8219e3017..81f90a1e57 100644 --- a/usr/src/uts/sun4u/ml/trap_table.s +++ b/usr/src/uts/sun4u/ml/trap_table.s @@ -1293,7 +1293,7 @@ table_name/**/_itlbmiss: ;\ sethi %hi(FLUSH_ADDR), %g6 ;\ flush %g6 ;\ TRACE_PTR(%g3, %g6) ;\ - GET_TRACE_TICK(%g6) ;\ + GET_TRACE_TICK(%g6, %g4) ;\ stxa %g6, [%g3 + TRAP_ENT_TICK]%asi ;\ stxa %g2, [%g3 + TRAP_ENT_SP]%asi /* tag access */ ;\ stxa %g5, [%g3 + TRAP_ENT_F1]%asi /* tsb data */ ;\ @@ -2634,7 +2634,7 @@ done2: mmu_trap_tl1: #ifdef TRAPTRACE TRACE_PTR(%g5, %g6) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g7) stxa %g6, [%g5 + TRAP_ENT_TICK]%asi rdpr %tl, %g6 stha %g6, [%g5 + TRAP_ENT_TL]%asi @@ -2818,7 +2818,7 @@ kctx_obp_bpt: trace_gen: TRACE_PTR(%g3, %g6) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g4) stxa %g6, [%g3 + TRAP_ENT_TICK]%asi rdpr %tl, %g6 stha %g6, [%g3 + TRAP_ENT_TL]%asi @@ -2873,10 +2873,10 @@ trace_tsbmiss: sethi %hi(FLUSH_ADDR), %g6 flush %g6 TRACE_PTR(%g5, %g6) - GET_TRACE_TICK(%g6) - stxa %g6, [%g5 + TRAP_ENT_TICK]%asi stxa %g2, [%g5 + TRAP_ENT_SP]%asi ! tag access stxa %g4, [%g5 + TRAP_ENT_F1]%asi ! tsb tag + GET_TRACE_TICK(%g6, %g4) + stxa %g6, [%g5 + TRAP_ENT_TICK]%asi rdpr %tnpc, %g6 stxa %g6, [%g5 + TRAP_ENT_F2]%asi stna %g1, [%g5 + TRAP_ENT_F3]%asi ! tsb8k pointer @@ -2908,7 +2908,7 @@ trace_dataprot: sethi %hi(FLUSH_ADDR), %g6 flush %g6 TRACE_PTR(%g1, %g6) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g5) stxa %g6, [%g1 + TRAP_ENT_TICK]%asi rdpr %tpc, %g6 stna %g6, [%g1 + TRAP_ENT_TPC]%asi diff --git a/usr/src/uts/sun4u/sys/machclock.h b/usr/src/uts/sun4u/sys/machclock.h index e0bc501de7..0fe85cfefd 100644 --- a/usr/src/uts/sun4u/sys/machclock.h +++ b/usr/src/uts/sun4u/sys/machclock.h @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_MACHCLOCK_H #define _SYS_MACHCLOCK_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -42,6 +40,14 @@ extern "C" { jmp %g1 + %lo(cpu_clearticknpt); \ rd %pc, %g4 +#define RD_TICK_NO_SUSPEND_CHECK(out, scr1) \ + rdpr %tick, out; \ + sllx out, 1, out; \ + srlx out, 1, out; + +#define RD_TICK(out, scr1, scr2, label) \ + RD_TICK_NO_SUSPEND_CHECK(out, scr1); + #endif /* _ASM */ #if defined(CPU_MODULE) @@ -57,7 +63,6 @@ extern "C" { * At least 3.9MHz, for slower %stick-based systems. */ #define NSEC_SHIFT 8 -#define VTRACE_SHIFT 8 #elif defined(SPITFIRE) @@ -71,6 +76,70 @@ extern "C" { #error "Compiling for CPU_MODULE but no CPU specified" #endif +/* + * NOTE: the macros below assume that the various time-related variables + * (hrestime, hrestime_adj, hres_last_tick, timedelta, nsec_scale, etc) + * are all stored together on a 64-byte boundary. The primary motivation + * is cache performance, but we also take advantage of a convenient side + * effect: these variables all have the same high 22 address bits, so only + * one sethi is needed to access them all. + */ + +/* + * GET_HRESTIME() returns the value of hrestime, hrestime_adj and the + * number of nanoseconds since the last clock tick ('nslt'). It also + * sets 'nano' to the value NANOSEC (one billion). + * + * This macro assumes that all registers are globals or outs so they can + * safely contain 64-bit data, and that it's safe to use the label "5:". + * Further, this macro calls the NATIVE_TIME_TO_NSEC_SCALE which in turn + * uses the labels "6:" and "7:"; labels "5:", "6:" and "7:" must not + * be used across invocations of this macro. + */ +#define GET_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano, scr, hrlock, \ + gnt1, gnt2) \ +5: sethi %hi(hres_lock), scr; \ + lduw [scr + %lo(hres_lock)], hrlock; /* load clock lock */ \ + lduw [scr + %lo(nsec_scale)], nano; /* tick-to-ns factor */ \ + andn hrlock, 1, hrlock; /* see comments above! */ \ + ldx [scr + %lo(hres_last_tick)], nslt; \ + ldn [scr + %lo(hrestime)], hrestsec; /* load hrestime.sec */\ + add scr, %lo(hrestime), hrestnsec; \ + ldn [hrestnsec + CLONGSIZE], hrestnsec; \ + GET_NATIVE_TIME(adj, gnt1, gnt2); /* get current %tick */ \ + subcc adj, nslt, nslt; /* nslt = ticks since last clockint */ \ + movneg %xcc, %g0, nslt; /* ignore neg delta from tick skew */ \ + ldx [scr + %lo(hrestime_adj)], adj; /* load hrestime_adj */ \ + /* membar #LoadLoad; (see comment (2) above) */ \ + lduw [scr + %lo(hres_lock)], scr; /* load clock lock */ \ + NATIVE_TIME_TO_NSEC_SCALE(nslt, nano, gnt1, NSEC_SHIFT); \ + sethi %hi(NANOSEC), nano; \ + xor hrlock, scr, scr; \ +/* CSTYLED */ \ + brnz,pn scr, 5b; \ + or nano, %lo(NANOSEC), nano; + +/* + * Similar to above, but returns current gethrtime() value in 'base'. + */ +#define GET_HRTIME(base, now, nslt, scale, scr, hrlock, gnt1, gnt2) \ +5: sethi %hi(hres_lock), scr; \ + lduw [scr + %lo(hres_lock)], hrlock; /* load clock lock */ \ + lduw [scr + %lo(nsec_scale)], scale; /* tick-to-ns factor */ \ + andn hrlock, 1, hrlock; /* see comments above! */ \ + ldx [scr + %lo(hres_last_tick)], nslt; \ + ldx [scr + %lo(hrtime_base)], base; /* load hrtime_base */ \ + GET_NATIVE_TIME(now, gnt1, gnt2); /* get current %tick */ \ + subcc now, nslt, nslt; /* nslt = ticks since last clockint */ \ + movneg %xcc, %g0, nslt; /* ignore neg delta from tick skew */ \ + /* membar #LoadLoad; (see comment (2) above) */ \ + ld [scr + %lo(hres_lock)], scr; /* load clock lock */ \ + NATIVE_TIME_TO_NSEC_SCALE(nslt, scale, gnt1, NSEC_SHIFT); \ + xor hrlock, scr, scr; \ +/* CSTYLED */ \ + brnz,pn scr, 5b; \ + add base, nslt, base; + #endif /* CPU_MODULE */ #ifndef _ASM diff --git a/usr/src/uts/sun4u/sys/traptrace.h b/usr/src/uts/sun4u/sys/traptrace.h index b2bfeb1efd..de07ecaaa9 100644 --- a/usr/src/uts/sun4u/sys/traptrace.h +++ b/usr/src/uts/sun4u/sys/traptrace.h @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_TRAPTRACE_H #define _SYS_TRAPTRACE_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -156,10 +154,10 @@ extern void mach_htraptrace_cleanup(int); * and when trap trace macros are used. */ #ifdef TRAPTRACE_FORCE_TICK -#define GET_TRACE_TICK(reg) \ +#define GET_TRACE_TICK(reg, scr) \ rdpr %tick, reg; #else -#define GET_TRACE_TICK(reg) \ +#define GET_TRACE_TICK(reg, scr) \ sethi %hi(traptrace_use_stick), reg; \ lduw [reg + %lo(traptrace_use_stick)], reg; \ /* CSTYLED */ \ @@ -262,7 +260,7 @@ extern void mach_htraptrace_cleanup(int); andn scr4, PSTATE_IE | PSTATE_AM, scr3; \ wrpr %g0, scr3, %pstate; \ TRACE_PTR(scr1, scr2); \ - GET_TRACE_TICK(scr2); \ + GET_TRACE_TICK(scr2, scr3); \ stxa scr2, [scr1 + TRAP_ENT_TICK]%asi; \ rdpr %tl, scr2; \ stha scr2, [scr1 + TRAP_ENT_TL]%asi; \ @@ -292,7 +290,7 @@ extern void mach_htraptrace_cleanup(int); */ #define TRACE_WIN_INFO(code, scr1, scr2, scr3) \ TRACE_PTR(scr1, scr2); \ - GET_TRACE_TICK(scr2); \ + GET_TRACE_TICK(scr2, scr3); \ stxa scr2, [scr1 + TRAP_ENT_TICK]%asi; \ rdpr %tl, scr2; \ stha scr2, [scr1 + TRAP_ENT_TL]%asi; \ @@ -332,7 +330,7 @@ extern void mach_htraptrace_cleanup(int); #define FAULT_WINTRACE(scr1, scr2, scr3, type) \ TRACE_PTR(scr1, scr2); \ - GET_TRACE_TICK(scr2); \ + GET_TRACE_TICK(scr2, scr3); \ stxa scr2, [scr1 + TRAP_ENT_TICK]%asi; \ rdpr %tl, scr2; \ stha scr2, [scr1 + TRAP_ENT_TL]%asi; \ @@ -355,7 +353,7 @@ extern void mach_htraptrace_cleanup(int); #define SYSTRAP_TRACE(scr1, scr2, scr3) \ TRACE_PTR(scr1, scr2); \ - GET_TRACE_TICK(scr2); \ + GET_TRACE_TICK(scr2, scr3); \ stxa scr2, [scr1 + TRAP_ENT_TICK]%asi; \ rdpr %tl, scr2; \ stha scr2, [scr1 + TRAP_ENT_TL]%asi; \ diff --git a/usr/src/uts/sun4v/Makefile.files b/usr/src/uts/sun4v/Makefile.files index 8b6a4404bd..9746292e0b 100644 --- a/usr/src/uts/sun4v/Makefile.files +++ b/usr/src/uts/sun4v/Makefile.files @@ -43,6 +43,7 @@ CORE_OBJS += hat_sfmmu.o CORE_OBJS += hat_kdi.o CORE_OBJS += hsvc.o CORE_OBJS += iscsi_boot.o +CORE_OBJS += kldc.o CORE_OBJS += lpad.o CORE_OBJS += mach_cpu_states.o CORE_OBJS += mach_ddi_impl.o @@ -78,9 +79,9 @@ CORE_OBJS += promif_stree.o CORE_OBJS += promif_test.o CORE_OBJS += promif_version.o CORE_OBJS += sfmmu_kdi.o +CORE_OBJS += suspend.o CORE_OBJS += swtch.o CORE_OBJS += wdt.o -CORE_OBJS += kldc.o CORE_OBJS += xhat_sfmmu.o CORE_OBJS += mdesc_diff.o diff --git a/usr/src/uts/sun4v/cpu/common_asm.s b/usr/src/uts/sun4v/cpu/common_asm.s index cf30c07365..360dcdf217 100644 --- a/usr/src/uts/sun4v/cpu/common_asm.s +++ b/usr/src/uts/sun4v/cpu/common_asm.s @@ -42,25 +42,7 @@ #define FAST_TRAP_DONE \ ba,a fast_trap_done -/* - * Override GET_NATIVE_TIME for the cpu module code. This is not - * guaranteed to be exactly one instruction, be careful of using - * the macro in delay slots. - * - * Do not use any instruction that modifies condition codes as the - * caller may depend on these to remain unchanged across the macro. - */ - -#define GET_NATIVE_TIME(out, scr1, scr2) \ - rd STICK, out - -#define RD_TICKCMPR(out, scr) \ - rd STICK_COMPARE, out - -#define WR_TICKCMPR(in,scr1,scr2,label) \ - wr in, STICK_COMPARE - - +#include <sys/machclock.h> #include <sys/clock.h> #if defined(lint) @@ -111,9 +93,9 @@ tickcmpr_set(uint64_t clock_cycles) 1: WR_TICKCMPR(%o2,%o4,%o5,__LINE__) ! Write to TICK_CMPR - GET_NATIVE_TIME(%o0, %o4, %o5) ! Read %tick to confirm the - sllx %o0, 1, %o0 ! value we wrote was in the future. - srlx %o0, 1, %o0 + GET_NATIVE_TIME(%o0,%o4,%o5,__LINE__) ! Read %tick to confirm the + ! value we wrote was in the + ! future. cmp %o2, %o0 ! If the value we wrote was in the bg,pt %xcc, 2f ! future, then blow out of here. @@ -191,7 +173,7 @@ tickcmpr_disabled(void) #else /* lint */ ENTRY_NP(tickcmpr_disabled) - RD_TICKCMPR(%g1, %o0) + RD_TICKCMPR(%g1,%o0,%o1,__LINE__) retl srlx %g1, TICKINT_DIS_SHFT, %o0 SET_SIZE(tickcmpr_disabled) @@ -210,13 +192,29 @@ gettick(void) #else /* lint */ ENTRY(gettick) - GET_NATIVE_TIME(%o0, %o2, %o3) + GET_NATIVE_TIME(%o0,%o2,%o3,__LINE__) retl nop SET_SIZE(gettick) #endif /* lint */ +/* + * Get current tick. For trapstat use only. + */ +#if defined (lint) + +hrtime_t +rdtick() +{ return (0); } + +#else + ENTRY(rdtick) + retl + RD_TICK_PHYSICAL(%o0) + SET_SIZE(rdtick) +#endif /* lint */ + /* * Return the counter portion of the tick register. @@ -228,14 +226,33 @@ uint64_t gettick_counter(void) { return(0); } +uint64_t +gettick_npt(void) +{ return(0); } + +uint64_t +getstick_npt(void) +{ return(0); } + #else /* lint */ ENTRY_NP(gettick_counter) - rdpr %tick, %o0 - sllx %o0, 1, %o0 + RD_TICK(%o0,%o1,%o2,__LINE__) retl - srlx %o0, 1, %o0 ! shake off npt bit + nop SET_SIZE(gettick_counter) + + ENTRY_NP(gettick_npt) + RD_TICK_PHYSICAL(%o0) + retl + srlx %o0, 63, %o0 + SET_SIZE(gettick_npt) + + ENTRY_NP(getstick_npt) + RD_STICK_PHYSICAL(%o0) + retl + srlx %o0, 63, %o0 + SET_SIZE(getstick_npt) #endif /* lint */ /* @@ -303,21 +320,21 @@ panic_hres_tick(void) #else /* lint */ ENTRY_NP(gethrtime) - GET_HRTIME(%g1, %o0, %o1, %o2, %o3, %o4, %o5, %g2) + GET_HRTIME(%g1,%o0,%o1,%o2,%o3,%o4,%o5,%g2,__LINE__) ! %g1 = hrtime retl mov %g1, %o0 SET_SIZE(gethrtime) ENTRY_NP(gethrtime_unscaled) - GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time + GET_NATIVE_TIME(%g1,%o2,%o3,__LINE__) ! %g1 = native time retl mov %g1, %o0 SET_SIZE(gethrtime_unscaled) ENTRY_NP(gethrtime_waitfree) ALTENTRY(dtrace_gethrtime) - GET_NATIVE_TIME(%g1, %o2, %o3) ! %g1 = native time + GET_NATIVE_TIME(%g1,%o2,%o3,__LINE__) ! %g1 = native time NATIVE_TIME_TO_NSEC(%g1, %o2, %o3) retl mov %g1, %o0 @@ -352,7 +369,8 @@ panic_hres_tick(void) */ ENTRY_NP(get_timestamp) - GET_HRTIME(%g1, %g2, %g3, %g4, %g5, %o0, %o1, %o2) ! %g1 = hrtime + GET_HRTIME(%g1,%g2,%g3,%g4,%g5,%o0,%o1,%o2,__LINE__) + ! %g1 = hrtime srlx %g1, 32, %o0 ! %o0 = hi32(%g1) srl %g1, 0, %o1 ! %o1 = lo32(%g1) FAST_TRAP_DONE @@ -386,7 +404,7 @@ panic_hres_tick(void) 4: ENTRY_NP(gethrestime) - GET_HRESTIME(%o1, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4) + GET_HRESTIME(%o1,%o2,%o3,%o4,%o5,%g1,%g2,%g3,%g4,__LINE__) CONV_HRESTIME(%o1, %o2, %o3, %o4, %o5) stn %o1, [%o0] retl @@ -398,7 +416,7 @@ panic_hres_tick(void) * seconds. */ ENTRY_NP(gethrestime_sec) - GET_HRESTIME(%o0, %o2, %o3, %o4, %o5, %g1, %g2, %g3, %g4) + GET_HRESTIME(%o0,%o2,%o3,%o4,%o5,%g1,%g2,%g3,%g4,__LINE__) CONV_HRESTIME(%o0, %o2, %o3, %o4, %o5) retl ! %o0 current hrestime seconds nop @@ -437,7 +455,7 @@ panic_hres_tick(void) */ ENTRY_NP(get_hrestime) - GET_HRESTIME(%o0, %o1, %g1, %g2, %g3, %g4, %g5, %o2, %o3) + GET_HRESTIME(%o0,%o1,%g1,%g2,%g3,%g4,%g5,%o2,%o3,__LINE__) CONV_HRESTIME(%o0, %o1, %g1, %g2, %g3) FAST_TRAP_DONE SET_SIZE(get_hrestime) @@ -457,7 +475,7 @@ panic_hres_tick(void) * %g5 = scratch */ ENTRY_NP(get_virtime) - GET_NATIVE_TIME(%g5, %g1, %g2) ! %g5 = native time in ticks + GET_NATIVE_TIME(%g5,%g1,%g2,__LINE__) ! %g5 = native time in ticks CPU_ADDR(%g2, %g3) ! CPU struct ptr to %g2 ldn [%g2 + CPU_THREAD], %g2 ! thread pointer to %g2 ldn [%g2 + T_LWP], %g3 ! lwp pointer to %g3 @@ -510,7 +528,7 @@ hrtime_base_panic: ! update hres_last_tick. %l5 has the scaling factor (nsec_scale). ! ldx [%l4 + %lo(hrtime_base)], %g1 ! load current hrtime_base - GET_NATIVE_TIME(%l0, %l3, %l6) ! current native time + GET_NATIVE_TIME(%l0,%l3,%l6,__LINE__) ! current native time stx %l0, [%l4 + %lo(hres_last_tick)]! prev = current ! convert native time to nsecs NATIVE_TIME_TO_NSEC_SCALE(%l0, %l5, %l2, NSEC_SHIFT) @@ -631,38 +649,38 @@ QRETURN; \ .align 16 ENTRY(kstat_waitq_enter) - GET_NATIVE_TIME(%g1, %g2, %g3) + GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__) KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W) SET_SIZE(kstat_waitq_enter) .align 16 ENTRY(kstat_waitq_exit) - GET_NATIVE_TIME(%g1, %g2, %g3) + GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__) KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_W) SET_SIZE(kstat_waitq_exit) .align 16 ENTRY(kstat_runq_enter) - GET_NATIVE_TIME(%g1, %g2, %g3) + GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__) KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R) SET_SIZE(kstat_runq_enter) .align 16 ENTRY(kstat_runq_exit) - GET_NATIVE_TIME(%g1, %g2, %g3) + GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__) KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, retl, KSTAT_IO_R) SET_SIZE(kstat_runq_exit) .align 16 ENTRY(kstat_waitq_to_runq) - GET_NATIVE_TIME(%g1, %g2, %g3) + GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__) KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_W) KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_R) SET_SIZE(kstat_waitq_to_runq) .align 16 ENTRY(kstat_runq_back_to_waitq) - GET_NATIVE_TIME(%g1, %g2, %g3) + GET_NATIVE_TIME(%g1,%g2,%g3,__LINE__) KSTAT_Q_UPDATE(sub, BRZPN, kstat_q_panic, 1:, KSTAT_IO_R) KSTAT_Q_UPDATE(add, BRZPT, 1f, 1:retl, KSTAT_IO_W) SET_SIZE(kstat_runq_back_to_waitq) @@ -693,7 +711,7 @@ int traptrace_use_stick; .seg ".data" .global timedelta, hres_last_tick, hrestime, hrestime_adj .global hres_lock, nsec_scale, hrtime_base, traptrace_use_stick - .global nsec_shift, adj_shift + .global nsec_shift, adj_shift, native_tick_offset, native_stick_offset /* XXX - above comment claims 128-bytes is necessary */ .align 64 @@ -717,6 +735,12 @@ nsec_shift: .word NSEC_SHIFT adj_shift: .word ADJ_SHIFT + .align 8 +native_tick_offset: + .word 0, 0 + .align 8 +native_stick_offset: + .word 0, 0 #endif @@ -756,11 +780,11 @@ usec_delay(int n) lduw [%o1 + %lo(sticks_per_usec)], %o1 mulx %o1, %o0, %o1 ! Scale usec to ticks inc %o1 ! We don't start on a tick edge - GET_NATIVE_TIME(%o2, %o3, %o4) + GET_NATIVE_TIME(%o2,%o3,%o4,__LINE__) add %o1, %o2, %o1 1: cmp %o1, %o2 - GET_NATIVE_TIME(%o2, %o3, %o4) + GET_NATIVE_TIME(%o2,%o3,%o4,__LINE__) bgeu,pt %xcc, 1b nop retl @@ -810,7 +834,7 @@ pil14_interrupt(int level) ! Note that %o5 is live until after 1f. ! XXX - there is a subroutine call while %o5 is live! ! - RD_TICKCMPR(%o5, %g1) + RD_TICKCMPR(%o5,%g1,%g2,__LINE__) srlx %o5, TICKINT_DIS_SHFT, %g1 brnz,pt %g1, 2f nop @@ -839,9 +863,7 @@ pil14_interrupt(int level) ! that the value we programmed is still in the future. If it isn't, ! we need to reprogram TICK_COMPARE to fire as soon as possible. ! - GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick - sllx %o0, 1, %o0 ! Clear the DIS bit - srlx %o0, 1, %o0 + GET_NATIVE_TIME(%o0,%g1,%g2,__LINE__) ! %o0 = tick cmp %o5, %o0 ! In the future? bg,a,pt %xcc, 2f ! Yes, drive on. wrpr %g0, %g5, %pstate ! delay: enable vec intr @@ -854,9 +876,7 @@ pil14_interrupt(int level) mov 8, %o4 ! 8 = arbitrary inital step 1: add %o0, %o4, %o5 ! Add the step WR_TICKCMPR(%o5,%g1,%g2,__LINE__) ! Write to TICK_CMPR - GET_NATIVE_TIME(%o0, %g1, %g2) ! %o0 = tick - sllx %o0, 1, %o0 ! Clear the DIS bit - srlx %o0, 1, %o0 + GET_NATIVE_TIME(%o0,%g1,%g2,__LINE__) ! %o0 = tick cmp %o5, %o0 ! In the future? bg,a,pt %xcc, 2f ! Yes, drive on. wrpr %g0, %g5, %pstate ! delay: enable vec intr diff --git a/usr/src/uts/sun4v/cpu/mach_cpu_module.c b/usr/src/uts/sun4v/cpu/mach_cpu_module.c index 335e574f8e..eb7ce530df 100644 --- a/usr/src/uts/sun4v/cpu/mach_cpu_module.c +++ b/usr/src/uts/sun4v/cpu/mach_cpu_module.c @@ -20,12 +20,10 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/cpu_module.h> #include <vm/page.h> #include <vm/seg_map.h> @@ -699,3 +697,11 @@ atomic_clear_long_excl(volatile ulong_t *target, uint_t value) void fp_zero(void) {} + +uint64_t +gettick_npt(void) +{ return (0); } + +uint64_t +getstick_npt(void) +{ return (0); } diff --git a/usr/src/uts/sun4v/io/platsvc.c b/usr/src/uts/sun4v/io/platsvc.c index 7eeaa6659a..f1faf13ed3 100644 --- a/usr/src/uts/sun4v/io/platsvc.c +++ b/usr/src/uts/sun4v/io/platsvc.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,6 +35,10 @@ #include <sys/uadmin.h> #include <sys/ds.h> #include <sys/platsvc.h> +#include <sys/ddi.h> +#include <sys/suspend.h> +#include <sys/proc.h> +#include <sys/disp.h> /* * Debugging routines @@ -52,6 +56,7 @@ uint_t ps_debug = 0x0; #define MS2NANO(x) ((x) * MICROSEC) #define MS2SEC(x) ((x) / MILLISEC) #define MS2MIN(x) (MS2SEC(x) / 60) +#define SEC2HZ(x) (drv_usectohz((x) * MICROSEC)) /* * Domains Services interaction @@ -59,6 +64,7 @@ uint_t ps_debug = 0x0; static ds_svc_hdl_t ds_md_handle; static ds_svc_hdl_t ds_shutdown_handle; static ds_svc_hdl_t ds_panic_handle; +static ds_svc_hdl_t ds_suspend_handle; static ds_ver_t ps_vers[] = {{ 1, 0 }}; #define PS_NVERS (sizeof (ps_vers) / sizeof (ps_vers[0])) @@ -81,12 +87,19 @@ static ds_capability_t ps_panic_cap = { PS_NVERS /* nvers */ }; +static ds_capability_t ps_suspend_cap = { + "domain-suspend", /* svc_id */ + ps_vers, /* vers */ + PS_NVERS /* nvers */ +}; + static void ps_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl); static void ps_unreg_handler(ds_cb_arg_t arg); -static void ps_md_data_handler(ds_cb_arg_t arg, void * buf, size_t buflen); +static void ps_md_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen); static void ps_shutdown_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen); -static void ps_panic_data_handler(ds_cb_arg_t arg, void * buf, size_t buflen); +static void ps_panic_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen); +static void ps_suspend_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen); static ds_clnt_ops_t ps_md_ops = { ps_reg_handler, /* ds_reg_cb */ @@ -109,6 +122,13 @@ static ds_clnt_ops_t ps_panic_ops = { &ds_panic_handle /* cb_arg */ }; +static ds_clnt_ops_t ps_suspend_ops = { + ps_reg_handler, /* ds_reg_cb */ + ps_unreg_handler, /* ds_unreg_cb */ + ps_suspend_data_handler, /* ds_data_cb */ + &ds_suspend_handle /* cb_arg */ +}; + static int ps_init(void); static void ps_fini(void); @@ -117,6 +137,43 @@ static void ps_fini(void); */ #define PLATSVC_POWERDOWN_DELAY 1200 +/* + * Set to true if OS suspend is supported. If OS suspend is not + * supported, the suspend service will not be started. + */ +static boolean_t ps_suspend_enabled = B_FALSE; + +/* + * Suspend service request handling + */ +typedef struct ps_suspend_data { + void *buf; + size_t buflen; +} ps_suspend_data_t; + +static kmutex_t ps_suspend_mutex; +static kcondvar_t ps_suspend_cv; + +static ps_suspend_data_t *ps_suspend_data = NULL; +static boolean_t ps_suspend_thread_exit = B_FALSE; +static kthread_t *ps_suspend_thread = NULL; + +static void ps_suspend_sequence(ps_suspend_data_t *data); +static void ps_suspend_thread_func(void); + +/* + * The DELAY timeout is the time (in seconds) to wait for the + * suspend service to be re-registered after a suspend/resume + * operation. The INTVAL time is the time (in seconds) to wait + * between retry attempts when sending the post-suspend message + * after a suspend/resume operation. + */ +#define PLATSVC_SUSPEND_REREG_DELAY 60 +#define PLATSVC_SUSPEND_RETRY_INTVAL 1 +static int ps_suspend_rereg_delay = PLATSVC_SUSPEND_REREG_DELAY; +static int ps_suspend_retry_intval = PLATSVC_SUSPEND_RETRY_INTVAL; + + static struct modlmisc modlmisc = { &mod_miscops, "sun4v Platform Services" @@ -169,6 +226,7 @@ ps_init(void) { int rv; extern int mdeg_init(void); + extern void mdeg_fini(void); /* register with domain services framework */ rv = ds_cap_init(&ps_md_cap, &ps_md_ops); @@ -177,9 +235,16 @@ ps_init(void) return (rv); } + rv = mdeg_init(); + if (rv != 0) { + (void) ds_cap_fini(&ps_md_cap); + return (rv); + } + rv = ds_cap_init(&ps_shutdown_cap, &ps_shutdown_ops); if (rv != 0) { cmn_err(CE_WARN, "ds_cap_init domain-shutdown failed: %d", rv); + mdeg_fini(); (void) ds_cap_fini(&ps_md_cap); return (rv); } @@ -188,13 +253,36 @@ ps_init(void) if (rv != 0) { cmn_err(CE_WARN, "ds_cap_init domain-panic failed: %d", rv); (void) ds_cap_fini(&ps_md_cap); + mdeg_fini(); (void) ds_cap_fini(&ps_shutdown_cap); return (rv); } - rv = mdeg_init(); + ps_suspend_enabled = suspend_supported(); + + if (ps_suspend_enabled) { + mutex_init(&ps_suspend_mutex, NULL, MUTEX_DEFAULT, NULL); + cv_init(&ps_suspend_cv, NULL, CV_DEFAULT, NULL); + ps_suspend_thread_exit = B_FALSE; + + rv = ds_cap_init(&ps_suspend_cap, &ps_suspend_ops); + if (rv != 0) { + cmn_err(CE_WARN, "ds_cap_init domain-suspend failed: " + "%d", rv); + (void) ds_cap_fini(&ps_md_cap); + mdeg_fini(); + (void) ds_cap_fini(&ps_shutdown_cap); + (void) ds_cap_fini(&ps_panic_cap); + mutex_destroy(&ps_suspend_mutex); + cv_destroy(&ps_suspend_cv); + return (rv); + } - return (rv); + ps_suspend_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, + ps_suspend_thread_func, NULL, 0, &p0, TS_RUN, minclsyspri); + } + + return (0); } static void @@ -209,6 +297,22 @@ ps_fini(void) (void) ds_cap_fini(&ps_shutdown_cap); (void) ds_cap_fini(&ps_panic_cap); + if (ps_suspend_enabled) { + (void) ds_cap_fini(&ps_suspend_cap); + if (ps_suspend_thread != NULL) { + mutex_enter(&ps_suspend_mutex); + ps_suspend_thread_exit = B_TRUE; + cv_signal(&ps_suspend_cv); + mutex_exit(&ps_suspend_mutex); + + thread_join(ps_suspend_thread->t_did); + ps_suspend_thread = NULL; + + mutex_destroy(&ps_suspend_mutex); + cv_destroy(&ps_suspend_cv); + } + } + mdeg_fini(); } @@ -353,6 +457,233 @@ ps_panic_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen) _NOTE(NOTREACHED) } +/* + * Send a suspend response message. If a timeout is specified, wait + * intval seconds between attempts to send the message. The timeout + * and intval arguments are in seconds. + */ +static void +ps_suspend_send_response(ds_svc_hdl_t *ds_handle, uint64_t req_num, + uint32_t result, uint32_t rec_result, char *reason, int timeout, + int intval) +{ + platsvc_suspend_resp_t *resp; + size_t reason_length; + int tries = 0; + int rv = -1; + time_t deadline; + + if (reason == NULL) { + reason_length = 0; + } else { + /* Get number of non-NULL bytes */ + reason_length = strnlen(reason, SUSPEND_MAX_REASON_SIZE - 1); + ASSERT(reason[reason_length] == '\0'); + /* Account for NULL terminator */ + reason_length++; + } + + resp = (platsvc_suspend_resp_t *) + kmem_zalloc(sizeof (platsvc_suspend_resp_t) + reason_length, + KM_SLEEP); + + resp->req_num = req_num; + resp->result = result; + resp->rec_result = rec_result; + if (reason_length > 0) { + bcopy(reason, &resp->reason, reason_length - 1); + /* Ensure NULL terminator is present */ + resp->reason[reason_length] = '\0'; + } + + if (timeout == 0) { + tries++; + rv = ds_cap_send(*ds_handle, resp, + sizeof (platsvc_suspend_resp_t) + reason_length); + } else { + deadline = gethrestime_sec() + timeout; + do { + ds_svc_hdl_t hdl; + /* + * Copy the handle so we can ensure we never pass + * an invalid handle to ds_cap_send. We don't want + * to trigger warning messages just because the + * service was temporarily unregistered. + */ + if ((hdl = *ds_handle) == DS_INVALID_HDL) { + delay(SEC2HZ(intval)); + } else if ((rv = ds_cap_send(hdl, resp, + sizeof (platsvc_suspend_resp_t) + + reason_length)) != 0) { + tries++; + delay(SEC2HZ(intval)); + } + } while ((rv != 0) && (gethrestime_sec() < deadline)); + } + + if (rv != 0) { + cmn_err(CE_NOTE, "suspend ds_cap_send resp failed (%d) " + "sending message: %d, attempts: %d", rv, resp->result, + tries); + } + + kmem_free(resp, sizeof (platsvc_suspend_resp_t) + reason_length); +} + +/* + * Handle data coming in for the suspend service. The suspend is + * sequenced by the ps_suspend_thread, but perform some checks here + * to make sure that the request is a valid request message and that + * a suspend operation is not already in progress. + */ +/*ARGSUSED*/ +static void +ps_suspend_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen) +{ + platsvc_suspend_req_t *msg = buf; + + if (arg == NULL) + return; + + if (ds_suspend_handle == DS_INVALID_HDL) { + DBG("ps_suspend_data_handler: DS handle no longer valid\n"); + return; + } + + /* Handle invalid requests */ + if (msg == NULL || buflen != sizeof (platsvc_suspend_req_t) || + msg->type != DOMAIN_SUSPEND_SUSPEND) { + ps_suspend_send_response(&ds_suspend_handle, msg->req_num, + DOMAIN_SUSPEND_INVALID_MSG, DOMAIN_SUSPEND_REC_SUCCESS, + NULL, 0, 0); + return; + } + + /* + * If ps_suspend_thread_exit is set, ds_cap_fini has been + * called and we shouldn't be receving data. Handle this unexpected + * case by returning without sending a response. + */ + if (ps_suspend_thread_exit) { + DBG("ps_suspend_data_handler: ps_suspend_thread is exiting\n"); + return; + } + + mutex_enter(&ps_suspend_mutex); + + /* If a suspend operation is in progress, abort now */ + if (ps_suspend_data != NULL) { + mutex_exit(&ps_suspend_mutex); + ps_suspend_send_response(&ds_suspend_handle, msg->req_num, + DOMAIN_SUSPEND_INPROGRESS, DOMAIN_SUSPEND_REC_SUCCESS, + NULL, 0, 0); + return; + } + + ps_suspend_data = kmem_alloc(sizeof (ps_suspend_data_t), KM_SLEEP); + ps_suspend_data->buf = kmem_alloc(buflen, KM_SLEEP); + ps_suspend_data->buflen = buflen; + bcopy(buf, ps_suspend_data->buf, buflen); + + cv_signal(&ps_suspend_cv); + mutex_exit(&ps_suspend_mutex); +} + +/* + * Schedule the suspend operation by calling the pre-suspend, suspend, + * and post-suspend functions. When sending back response messages, we + * only use a timeout for the post-suspend response because after + * a resume, domain services will be re-registered and we may not + * be able to send the response immediately. + */ +static void +ps_suspend_sequence(ps_suspend_data_t *data) +{ + platsvc_suspend_req_t *msg; + uint32_t rec_result; + char *error_reason; + boolean_t recovered = B_TRUE; + uint_t rv; + + ASSERT(data != NULL); + + msg = data->buf; + error_reason = (char *)kmem_zalloc(SUSPEND_MAX_REASON_SIZE, KM_SLEEP); + + /* Pre-suspend */ + rv = suspend_pre(error_reason, SUSPEND_MAX_REASON_SIZE, &recovered); + if (rv != 0) { + rec_result = (recovered ? DOMAIN_SUSPEND_REC_SUCCESS : + DOMAIN_SUSPEND_REC_FAILURE); + + ps_suspend_send_response(&ds_suspend_handle, msg->req_num, + DOMAIN_SUSPEND_PRE_FAILURE, rec_result, error_reason, 0, 0); + + kmem_free(error_reason, SUSPEND_MAX_REASON_SIZE); + return; + } + + ps_suspend_send_response(&ds_suspend_handle, msg->req_num, + DOMAIN_SUSPEND_PRE_SUCCESS, 0, NULL, 0, 0); + + /* Suspend */ + rv = suspend_start(error_reason, SUSPEND_MAX_REASON_SIZE); + if (rv != 0) { + rec_result = (suspend_post(NULL, 0) == 0 ? + DOMAIN_SUSPEND_REC_SUCCESS : DOMAIN_SUSPEND_REC_FAILURE); + + ps_suspend_send_response(&ds_suspend_handle, msg->req_num, + DOMAIN_SUSPEND_SUSPEND_FAILURE, rec_result, error_reason, + 0, 0); + + kmem_free(error_reason, SUSPEND_MAX_REASON_SIZE); + return; + } + + /* Post-suspend */ + rv = suspend_post(error_reason, SUSPEND_MAX_REASON_SIZE); + if (rv != 0) { + ps_suspend_send_response(&ds_suspend_handle, msg->req_num, + DOMAIN_SUSPEND_POST_FAILURE, 0, error_reason, + ps_suspend_rereg_delay, ps_suspend_retry_intval); + } else { + ps_suspend_send_response(&ds_suspend_handle, msg->req_num, + DOMAIN_SUSPEND_POST_SUCCESS, 0, error_reason, + ps_suspend_rereg_delay, ps_suspend_retry_intval); + } + + kmem_free(error_reason, SUSPEND_MAX_REASON_SIZE); +} + +/* + * Wait for a suspend request or for ps_suspend_thread_exit to be set. + */ +static void +ps_suspend_thread_func(void) +{ + mutex_enter(&ps_suspend_mutex); + + while (ps_suspend_thread_exit == B_FALSE) { + + if (ps_suspend_data == NULL) { + cv_wait(&ps_suspend_cv, &ps_suspend_mutex); + continue; + } + + mutex_exit(&ps_suspend_mutex); + ps_suspend_sequence(ps_suspend_data); + mutex_enter(&ps_suspend_mutex); + + kmem_free(ps_suspend_data->buf, ps_suspend_data->buflen); + kmem_free(ps_suspend_data, sizeof (ps_suspend_data_t)); + ps_suspend_data = NULL; + } + + mutex_exit(&ps_suspend_mutex); + + thread_exit(); +} + static void ps_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl) { @@ -365,6 +696,8 @@ ps_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl) ds_shutdown_handle = hdl; if ((ds_svc_hdl_t *)arg == &ds_panic_handle) ds_panic_handle = hdl; + if ((ds_svc_hdl_t *)arg == &ds_suspend_handle) + ds_suspend_handle = hdl; } static void @@ -378,4 +711,6 @@ ps_unreg_handler(ds_cb_arg_t arg) ds_shutdown_handle = DS_INVALID_HDL; if ((ds_svc_hdl_t *)arg == &ds_panic_handle) ds_panic_handle = DS_INVALID_HDL; + if ((ds_svc_hdl_t *)arg == &ds_suspend_handle) + ds_suspend_handle = DS_INVALID_HDL; } diff --git a/usr/src/uts/sun4v/ml/hcall.s b/usr/src/uts/sun4v/ml/hcall.s index 70635baf63..7f99a07f59 100644 --- a/usr/src/uts/sun4v/ml/hcall.s +++ b/usr/src/uts/sun4v/ml/hcall.s @@ -349,6 +349,20 @@ uint64_t hv_soft_state_get(uint64_t string, uint64_t *state) { return (0); } +uint64_t +hv_guest_suspend(void) +{ return (0); } + +/*ARGSUSED*/ +uint64_t +hv_set_tick_npt(uint64_t npt) +{ return (0); } + +/*ARGSUSED*/ +uint64_t +hv_set_stick_npt(uint64_t npt) +{ return (0); } + #else /* lint || __lint */ /* @@ -1238,4 +1252,25 @@ hv_soft_state_get(uint64_t string, uint64_t *state) stx %o1, [%o2] SET_SIZE(hv_soft_state_get) + ENTRY(hv_guest_suspend) + mov GUEST_SUSPEND, %o5 + ta FAST_TRAP + retl + nop + SET_SIZE(hv_guest_suspend) + + ENTRY(hv_tick_set_npt) + mov TICK_SET_NPT, %o5 + ta FAST_TRAP + retl + nop + SET_SIZE(hv_tick_set_npt) + + ENTRY(hv_stick_set_npt) + mov STICK_SET_NPT, %o5 + ta FAST_TRAP + retl + nop + SET_SIZE(hv_stick_set_npt) + #endif /* lint || __lint */ diff --git a/usr/src/uts/sun4v/ml/mach_interrupt.s b/usr/src/uts/sun4v/ml/mach_interrupt.s index e5978dbb7f..e49ee12d91 100644 --- a/usr/src/uts/sun4v/ml/mach_interrupt.s +++ b/usr/src/uts/sun4v/ml/mach_interrupt.s @@ -108,7 +108,7 @@ cpu_mondo(void) #ifdef TRAPTRACE TRACE_PTR(%g4, %g6) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g3) stxa %g6, [%g4 + TRAP_ENT_TICK]%asi TRACE_SAVE_TL_GL_REGS(%g4, %g6) rdpr %tt, %g6 @@ -311,7 +311,7 @@ dev_mondo(void) #ifdef TRAPTRACE TRACE_PTR(%g4, %g6) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g3) stxa %g6, [%g4 + TRAP_ENT_TICK]%asi TRACE_SAVE_TL_GL_REGS(%g4, %g6) rdpr %tt, %g6 diff --git a/usr/src/uts/sun4v/ml/mach_locore.s b/usr/src/uts/sun4v/ml/mach_locore.s index de815641bd..c64f3f6e94 100644 --- a/usr/src/uts/sun4v/ml/mach_locore.s +++ b/usr/src/uts/sun4v/ml/mach_locore.s @@ -1401,12 +1401,8 @@ save_cpu_state: rd SOFTINT, %g1 sta %g1, [%g3 + PTL1_SOFTINT] %asi wr %g1, CLEAR_SOFTINT - sethi %hi(traptrace_use_stick), %g1 - ld [%g1 + %lo(traptrace_use_stick)], %g1 - brz,a,pn %g1, 2f - rdpr %tick, %g1 - rd STICK, %g1 -2: stxa %g1, [%g3 + PTL1_TICK] %asi + RD_TICKSTICK_FLAG(%g1, %g4, traptrace_use_stick) + stxa %g1, [%g3 + PTL1_TICK] %asi MMU_FAULT_STATUS_AREA(%g1) ldx [%g1 + MMFSA_D_TYPE], %g4 diff --git a/usr/src/uts/sun4v/ml/mach_subr_asm.s b/usr/src/uts/sun4v/ml/mach_subr_asm.s index 0e41f01d66..18d902b393 100644 --- a/usr/src/uts/sun4v/ml/mach_subr_asm.s +++ b/usr/src/uts/sun4v/ml/mach_subr_asm.s @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * General machine architecture & implementation specific * assembly language routines. @@ -39,6 +37,7 @@ #include <sys/asm_linkage.h> #include <sys/machsystm.h> #include <sys/machthread.h> +#include <sys/machclock.h> #include <sys/privregs.h> #include <sys/cmpregs.h> #include <sys/clock.h> @@ -46,6 +45,30 @@ #include <sys/soft_state.h> #if defined(lint) + +uint64_t +ultra_gettick(void) +{ return (0); } + +#else /* lint */ + +/* + * This isn't the routine you're looking for. + * + * The routine simply returns the value of %tick on the *current* processor. + * Most of the time, gettick() [which in turn maps to %stick on platforms + * that have different CPU %tick rates] is what you want. + */ + + ENTRY(ultra_gettick) + RD_TICK(%o0,%o1,%o2,__LINE__) + retl + nop + SET_SIZE(ultra_gettick) + +#endif /* lint */ + +#if defined(lint) /* ARGSUSED */ void set_mmfsa_scratchpad(caddr_t vaddr) diff --git a/usr/src/uts/sun4v/ml/mach_xc.s b/usr/src/uts/sun4v/ml/mach_xc.s index 3f59994c91..a3dfd1e571 100644 --- a/usr/src/uts/sun4v/ml/mach_xc.s +++ b/usr/src/uts/sun4v/ml/mach_xc.s @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #if defined(lint) #include <sys/types.h> #include <sys/cpuvar.h> @@ -80,7 +77,7 @@ self_xcall(struct cpu *cpu, uint64_t arg1, uint64_t arg2, xcfunc_t *func) #ifdef TRAPTRACE TRACE_PTR(%g4, %g6) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g3) stxa %g6, [%g4 + TRAP_ENT_TICK]%asi rdpr %tl, %g6 stha %g6, [%g4 + TRAP_ENT_TL]%asi @@ -124,7 +121,7 @@ xc_trace(u_int traptype, cpuset_t *cpu_set, xcfunc_t *func, andn %g1, PSTATE_IE | PSTATE_AM, %g2 wrpr %g0, %g2, %pstate /* disable interrupts */ TRACE_PTR(%g3, %g4) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g4) stxa %g6, [%g3 + TRAP_ENT_TICK]%asi stha %g0, [%g3 + TRAP_ENT_TL]%asi set TT_XCALL, %g2 diff --git a/usr/src/uts/sun4v/ml/trap_table.s b/usr/src/uts/sun4v/ml/trap_table.s index 9def20fbc8..ad781d5fb5 100644 --- a/usr/src/uts/sun4v/ml/trap_table.s +++ b/usr/src/uts/sun4v/ml/trap_table.s @@ -1052,7 +1052,7 @@ table_name/**/_itlbmiss: ;\ sethi %hi(FLUSH_ADDR), %g6 ;\ flush %g6 ;\ TRACE_PTR(%g3, %g6) ;\ - GET_TRACE_TICK(%g6) ;\ + GET_TRACE_TICK(%g6, %g4) ;\ stxa %g6, [%g3 + TRAP_ENT_TICK]%asi ;\ stna %g2, [%g3 + TRAP_ENT_SP]%asi /* tag access */ ;\ stna %g5, [%g3 + TRAP_ENT_F1]%asi /* tsb data */ ;\ @@ -2392,7 +2392,7 @@ done2: mmu_trap_tl1: #ifdef TRAPTRACE TRACE_PTR(%g5, %g6) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g7) stxa %g6, [%g5 + TRAP_ENT_TICK]%asi TRACE_SAVE_TL_GL_REGS(%g5, %g6) rdpr %tt, %g6 @@ -2524,7 +2524,7 @@ obp_bpt: trace_dmmu: TRACE_PTR(%g3, %g6) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g5) stxa %g6, [%g3 + TRAP_ENT_TICK]%asi TRACE_SAVE_TL_GL_REGS(%g3, %g6) rdpr %tt, %g6 @@ -2549,7 +2549,7 @@ trace_dmmu: trace_immu: TRACE_PTR(%g3, %g6) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g5) stxa %g6, [%g3 + TRAP_ENT_TICK]%asi TRACE_SAVE_TL_GL_REGS(%g3, %g6) rdpr %tt, %g6 @@ -2574,7 +2574,7 @@ trace_immu: trace_gen: TRACE_PTR(%g3, %g6) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g5) stxa %g6, [%g3 + TRAP_ENT_TICK]%asi TRACE_SAVE_TL_GL_REGS(%g3, %g6) rdpr %tt, %g6 @@ -2633,10 +2633,10 @@ trace_tsbmiss: sethi %hi(FLUSH_ADDR), %g6 flush %g6 TRACE_PTR(%g5, %g6) - GET_TRACE_TICK(%g6) - stxa %g6, [%g5 + TRAP_ENT_TICK]%asi stna %g2, [%g5 + TRAP_ENT_SP]%asi ! tag access stna %g4, [%g5 + TRAP_ENT_F1]%asi ! XXX? tsb tag + GET_TRACE_TICK(%g6, %g4) + stxa %g6, [%g5 + TRAP_ENT_TICK]%asi rdpr %tnpc, %g6 stna %g6, [%g5 + TRAP_ENT_F2]%asi stna %g1, [%g5 + TRAP_ENT_F3]%asi ! tsb8k pointer @@ -2674,7 +2674,7 @@ trace_dataprot: sethi %hi(FLUSH_ADDR), %g6 flush %g6 TRACE_PTR(%g1, %g6) - GET_TRACE_TICK(%g6) + GET_TRACE_TICK(%g6, %g4) stxa %g6, [%g1 + TRAP_ENT_TICK]%asi rdpr %tpc, %g6 stna %g6, [%g1 + TRAP_ENT_TPC]%asi diff --git a/usr/src/uts/sun4v/os/hsvc.c b/usr/src/uts/sun4v/os/hsvc.c index 166ab99f45..82c7c2f67a 100644 --- a/usr/src/uts/sun4v/os/hsvc.c +++ b/usr/src/uts/sun4v/os/hsvc.c @@ -20,12 +20,10 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/dditypes.h> #include <sys/machsystm.h> @@ -665,7 +663,7 @@ typedef struct hsvc_info_unix_s { static hsvc_info_unix_t hsvcinfo_unix[] = { {{HSVC_REV_1, NULL, HSVC_GROUP_SUN4V, 1, 0, NULL}, 1}, - {{HSVC_REV_1, NULL, HSVC_GROUP_CORE, 1, 1, NULL}, 1}, + {{HSVC_REV_1, NULL, HSVC_GROUP_CORE, 1, 2, NULL}, 1}, {{HSVC_REV_1, NULL, HSVC_GROUP_DIAG, 1, 0, NULL}, 1}, {{HSVC_REV_1, NULL, HSVC_GROUP_INTR, 1, 0, NULL}, 0}, }; diff --git a/usr/src/uts/sun4v/os/mach_cpu_states.c b/usr/src/uts/sun4v/os/mach_cpu_states.c index 3b41ebcbf4..6b3ddd7aec 100644 --- a/usr/src/uts/sun4v/os/mach_cpu_states.c +++ b/usr/src/uts/sun4v/os/mach_cpu_states.c @@ -55,6 +55,7 @@ #include <sys/ldoms.h> #include <sys/kldc.h> #include <sys/clock_impl.h> +#include <sys/suspend.h> #include <sys/dumphdr.h> /* @@ -965,7 +966,9 @@ syncfpu(void) void sticksync_slave(void) -{} +{ + suspend_sync_tick_stick_npt(); +} void sticksync_master(void) diff --git a/usr/src/uts/sun4v/os/suspend.c b/usr/src/uts/sun4v/os/suspend.c new file mode 100644 index 0000000000..2de4331424 --- /dev/null +++ b/usr/src/uts/sun4v/os/suspend.c @@ -0,0 +1,589 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/mutex.h> +#include <sys/cpuvar.h> +#include <sys/cyclic.h> +#include <sys/disp.h> +#include <sys/ddi.h> +#include <sys/wdt.h> +#include <sys/callb.h> +#include <sys/cmn_err.h> +#include <sys/hypervisor_api.h> +#include <sys/membar.h> +#include <sys/x_call.h> +#include <sys/promif.h> +#include <sys/systm.h> +#include <sys/mach_descrip.h> +#include <sys/cpu_module.h> +#include <sys/pg.h> +#include <sys/lgrp.h> +#include <sys/sysmacros.h> +#include <sys/sunddi.h> +#include <sys/cpupart.h> +#include <sys/hsvc.h> + +/* + * Sun4v OS Suspend + * + * Provides a means to suspend a sun4v guest domain by pausing CPUs and then + * calling into the HV to initiate a suspension. Suspension is sequenced + * externally by calling suspend_pre, suspend_start, and suspend_post. + * suspend_pre and suspend_post are meant to perform any special operations + * that should be done before or after a suspend/resume operation. e.g., + * callbacks to cluster software to disable heartbeat monitoring before the + * system is suspended. suspend_start prepares kernel services to be suspended + * and then suspends the domain by calling hv_guest_suspend. + * + * Special Handling for %tick and %stick Registers + * + * After a suspend/resume operation, the %tick and %stick registers may have + * jumped forwards or backwards. The delta is assumed to be consistent across + * all CPUs, within the negligible level of %tick and %stick variation + * acceptable on a cold boot. In order to maintain increasing %tick and %stick + * counter values without exposing large positive or negative jumps to kernel + * or user code, a %tick and %stick offset is used. Kernel reads of these + * counters return the sum of the hardware register counter and offset + * variable. After a suspend/resume operation, user reads of %tick or %stick + * are emulated. Suspend code enables emulation by setting the + * %{tick,stick}.NPT fields which trigger a privileged instruction access + * trap whenever the registers are read from user mode. If emulation has been + * enabled, the trap handler emulates the instruction. Emulation is only + * enabled during a successful suspend/resume operation. When emulation is + * enabled, CPUs that are DR'd into the system will have their + * %{tick,stick}.NPT bits set to 1 as well. + */ + +extern u_longlong_t gettick(void); /* returns %stick */ +extern uint64_t gettick_counter(void); /* returns %tick */ +extern uint64_t gettick_npt(void); +extern uint64_t getstick_npt(void); +extern int mach_descrip_update(void); +extern cpuset_t cpu_ready_set; +extern uint64_t native_tick_offset; +extern uint64_t native_stick_offset; + +/* + * Global Sun Cluster pre/post callbacks. + */ +const char *(*cl_suspend_error_decode)(int); +int (*cl_suspend_pre_callback)(void); +int (*cl_suspend_post_callback)(void); +#define SC_PRE_FAIL_STR_FMT "Sun Cluster pre-suspend failure: %d" +#define SC_POST_FAIL_STR_FMT "Sun Cluster post-suspend failure: %d" +#define SC_FAIL_STR_MAX 256 + +/* + * The minimum major and minor version of the HSVC_GROUP_CORE API group + * required in order to use OS suspend. + */ +#define SUSPEND_CORE_MAJOR 1 +#define SUSPEND_CORE_MINOR 2 + +/* + * By default, sun4v OS suspend is supported if the required HV version + * is present. suspend_disabled should be set on platforms that do not + * allow OS suspend regardless of whether or not the HV supports it. + * It can also be set in /etc/system. + */ +static int suspend_disabled = 0; + +/* + * Controls whether or not user-land tick and stick register emulation + * will be enabled following a successful suspend operation. + */ +static int enable_user_tick_stick_emulation = 1; + +/* + * Indicates whether or not tick and stick emulation is currently active. + * After a successful suspend operation, if emulation is enabled, this + * variable is set to B_TRUE. Global scope to allow emulation code to + * check if emulation is active. + */ +boolean_t tick_stick_emulation_active = B_FALSE; + +/* + * Controls whether or not MD information is refreshed after a + * successful suspend and resume. When non-zero, after a successful + * suspend and resume, the MD will be downloaded, cpunodes updated, + * and processor grouping information recalculated. + */ +static int suspend_update_cpu_mappings = 1; + +/* + * DBG and DBG_PROM() macro. + */ +#ifdef DEBUG + +static int suspend_debug_flag = 0; + +#define DBG_PROM \ +if (suspend_debug_flag) \ + prom_printf + +#define DBG \ +if (suspend_debug_flag) \ + suspend_debug + +static void +suspend_debug(const char *fmt, ...) +{ + char buf[512]; + va_list ap; + + va_start(ap, fmt); + (void) vsprintf(buf, fmt, ap); + va_end(ap); + + cmn_err(CE_NOTE, "%s", buf); +} + +#else /* DEBUG */ + +#define DBG_PROM +#define DBG + +#endif /* DEBUG */ + +/* + * Return true if the HV supports OS suspend and if suspend has not been + * disabled on this platform. + */ +boolean_t +suspend_supported(void) +{ + uint64_t major, minor; + + if (suspend_disabled) + return (B_FALSE); + + if (hsvc_version(HSVC_GROUP_CORE, &major, &minor) != 0) + return (B_FALSE); + + return ((major == SUSPEND_CORE_MAJOR && minor >= SUSPEND_CORE_MINOR) || + (major > SUSPEND_CORE_MAJOR)); +} + +/* + * Given a source tick and stick value, set the tick and stick offsets such + * that the (current physical register value + offset == source value). + */ +static void +set_tick_offsets(uint64_t source_tick, uint64_t source_stick) +{ + uint64_t target_tick; + uint64_t target_stick; + + native_tick_offset = 0; + native_stick_offset = 0; + + target_tick = gettick_counter(); /* returns %tick */ + target_stick = gettick(); /* returns %stick */ + + native_tick_offset = source_tick - target_tick; + native_stick_offset = source_stick - target_stick; +} + +/* + * Set the {tick,stick}.NPT field to 1 on this CPU. + */ +static void +enable_tick_stick_npt(void) +{ + hv_stick_set_npt(1); + hv_tick_set_npt(1); +} + +/* + * Synchronize a CPU's {tick,stick}.NPT fields with the current state + * of the system. This is used when a CPU is DR'd into the system. + */ +void +suspend_sync_tick_stick_npt(void) +{ + if (tick_stick_emulation_active) { + DBG("enabling {%%tick/%%stick}.NPT on CPU 0x%x", CPU->cpu_id); + hv_stick_set_npt(1); + hv_tick_set_npt(1); + } else { + ASSERT(gettick_npt() == 0); + ASSERT(getstick_npt() == 0); + } +} + +/* + * Obtain an updated MD from the hypervisor and update cpunodes, CPU HW + * sharing data structures, and processor groups. + */ +static void +update_cpu_mappings(void) +{ + md_t *mdp; + processorid_t id; + cpu_t *cp; + int rv; + cpu_pg_t *pgps[NCPU]; + + /* Download the latest MD */ + if ((rv = mach_descrip_update()) != 0) { + DBG("suspend: mach_descrip_update error: %d", rv); + return; + } + + if ((mdp = md_get_handle()) == NULL) { + DBG("suspend: md_get_handle failed"); + return; + } + + DBG("suspend: updating CPU mappings"); + + mutex_enter(&cpu_lock); + + setup_chip_mappings(mdp); + setup_exec_unit_mappings(mdp); + for (id = 0; id < NCPU; id++) { + if ((cp = cpu_get(id)) == NULL) + continue; + cpu_map_exec_units(cp); + } + + /* + * Re-calculate processor groups. + * + * First tear down all PG information before adding any new PG + * information derived from the MD we just downloaded. We must + * call pg_cpu_inactive and pg_cpu_active with CPUs paused and + * we want to minimize the number of times pause_cpus is called. + * Inactivating all CPUs would leave PGs without any active CPUs, + * so while CPUs are paused, call pg_cpu_inactive and swap in the + * bootstrap PG structure saving the original PG structure to be + * fini'd afterwards. This prevents the dispatcher from encountering + * PGs in which all CPUs are inactive. + */ + pause_cpus(NULL); + for (id = 0; id < NCPU; id++) { + if ((cp = cpu_get(id)) == NULL) + continue; + pg_cpu_inactive(cp); + pgps[id] = cp->cpu_pg; + pg_cpu_bootstrap(cp); + } + start_cpus(); + + /* + * pg_cpu_fini* and pg_cpu_init* must be called while CPUs are + * not paused. Use two separate loops here so that we do not + * initialize PG data for CPUs until all the old PG data structures + * are torn down. + */ + for (id = 0; id < NCPU; id++) { + if ((cp = cpu_get(id)) == NULL) + continue; + pg_cpu_fini(cp, pgps[id]); + } + + /* + * Initialize PG data for each CPU, but leave the bootstrapped + * PG structure in place to avoid running with any PGs containing + * nothing but inactive CPUs. + */ + for (id = 0; id < NCPU; id++) { + if ((cp = cpu_get(id)) == NULL) + continue; + pgps[id] = pg_cpu_init(cp, B_TRUE); + } + + /* + * Now that PG data has been initialized for all CPUs in the + * system, replace the bootstrapped PG structure with the + * initialized PG structure and call pg_cpu_active for each CPU. + */ + pause_cpus(NULL); + for (id = 0; id < NCPU; id++) { + if ((cp = cpu_get(id)) == NULL) + continue; + cp->cpu_pg = pgps[id]; + pg_cpu_active(cp); + } + start_cpus(); + + mutex_exit(&cpu_lock); + + (void) md_fini_handle(mdp); +} + +/* + * Wrapper for the Sun Cluster error decoding function. + */ +static int +cluster_error_decode(int error, char *error_reason, size_t max_reason_len) +{ + const char *decoded; + size_t decoded_len; + + ASSERT(error_reason != NULL); + ASSERT(max_reason_len > 0); + + max_reason_len = MIN(max_reason_len, SC_FAIL_STR_MAX); + + if (cl_suspend_error_decode == NULL) + return (-1); + + if ((decoded = (*cl_suspend_error_decode)(error)) == NULL) + return (-1); + + /* Get number of non-NULL bytes */ + if ((decoded_len = strnlen(decoded, max_reason_len - 1)) == 0) + return (-1); + + bcopy(decoded, error_reason, decoded_len); + + /* + * The error string returned from cl_suspend_error_decode + * should be NULL-terminated, but set the terminator here + * because we only copied non-NULL bytes. If the decoded + * string was not NULL-terminated, this guarantees that + * error_reason will be. + */ + error_reason[decoded_len] = '\0'; + + return (0); +} + +/* + * Wrapper for the Sun Cluster pre-suspend callback. + */ +static int +cluster_pre_wrapper(char *error_reason, size_t max_reason_len) +{ + int rv = 0; + + if (cl_suspend_pre_callback != NULL) { + rv = (*cl_suspend_pre_callback)(); + DBG("suspend: cl_suspend_pre_callback returned %d", rv); + if (rv != 0 && error_reason != NULL && max_reason_len > 0) { + if (cluster_error_decode(rv, error_reason, + max_reason_len)) { + (void) snprintf(error_reason, max_reason_len, + SC_PRE_FAIL_STR_FMT, rv); + } + } + } + + return (rv); +} + +/* + * Wrapper for the Sun Cluster post-suspend callback. + */ +static int +cluster_post_wrapper(char *error_reason, size_t max_reason_len) +{ + int rv = 0; + + if (cl_suspend_post_callback != NULL) { + rv = (*cl_suspend_post_callback)(); + DBG("suspend: cl_suspend_post_callback returned %d", rv); + if (rv != 0 && error_reason != NULL && max_reason_len > 0) { + if (cluster_error_decode(rv, error_reason, + max_reason_len)) { + (void) snprintf(error_reason, + max_reason_len, SC_POST_FAIL_STR_FMT, rv); + } + } + } + + return (rv); +} + +/* + * Execute pre-suspend callbacks preparing the system for a suspend operation. + * Returns zero on success, non-zero on failure. Sets the recovered argument + * to indicate whether or not callbacks could be undone in the event of a + * failure--if callbacks were successfully undone, *recovered is set to B_TRUE, + * otherwise *recovered is set to B_FALSE. Must be called successfully before + * suspend_start can be called. Callers should first call suspend_support to + * determine if OS suspend is supported. + */ +int +suspend_pre(char *error_reason, size_t max_reason_len, boolean_t *recovered) +{ + int rv; + + ASSERT(recovered != NULL); + + /* + * Return an error if suspend_pre is erreoneously called + * when OS suspend is not supported. + */ + ASSERT(suspend_supported()); + if (!suspend_supported()) { + DBG("suspend: suspend_pre called without suspend support"); + *recovered = B_TRUE; + return (ENOTSUP); + } + DBG("suspend: %s", __func__); + + rv = cluster_pre_wrapper(error_reason, max_reason_len); + + /* + * At present, only one pre-suspend operation exists. + * If it fails, no recovery needs to be done. + */ + if (rv != 0 && recovered != NULL) + *recovered = B_TRUE; + + return (rv); +} + +/* + * Execute post-suspend callbacks. Returns zero on success, non-zero on + * failure. Must be called after suspend_start is called, regardless of + * whether or not suspend_start is successful. + */ +int +suspend_post(char *error_reason, size_t max_reason_len) +{ + ASSERT(suspend_supported()); + DBG("suspend: %s", __func__); + return (cluster_post_wrapper(error_reason, max_reason_len)); +} + +/* + * Suspends the OS by pausing CPUs and calling into the HV to initiate + * the suspend. When the HV routine hv_guest_suspend returns, the system + * will be resumed. Must be called after a successful call to suspend_pre. + * suspend_post must be called after suspend_start, whether or not + * suspend_start returns an error. + */ +/*ARGSUSED*/ +int +suspend_start(char *error_reason, size_t max_reason_len) +{ + uint64_t source_tick; + uint64_t source_stick; + uint64_t rv; + timestruc_t source_tod; + int spl; + + ASSERT(suspend_supported()); + DBG("suspend: %s", __func__); + + mutex_enter(&cpu_lock); + + /* Suspend the watchdog */ + watchdog_suspend(); + + /* Record the TOD */ + mutex_enter(&tod_lock); + source_tod = tod_get(); + mutex_exit(&tod_lock); + + /* Pause all other CPUs */ + pause_cpus(NULL); + DBG_PROM("suspend: CPUs paused\n"); + + /* Suspend cyclics and disable interrupts */ + cyclic_suspend(); + DBG_PROM("suspend: cyclics suspended\n"); + spl = spl8(); + + source_tick = gettick_counter(); + source_stick = gettick(); + DBG_PROM("suspend: source_tick: 0x%lx\n", source_tick); + DBG_PROM("suspend: source_stick: 0x%lx\n", source_stick); + + /* + * Call into the HV to initiate the suspend. + * hv_guest_suspend() returns after the guest has been + * resumed or if the suspend operation failed or was + * cancelled. After a successful suspend, the %tick and + * %stick registers may have changed by an amount that is + * not proportional to the amount of time that has passed. + * They may have jumped forwards or backwards. This jump + * must be uniform across all CPUs and we operate under + * the assumption that it is (maintaining two global offset + * variables--one for %tick and one for %stick.) + */ + DBG_PROM("suspend: suspending... \n"); + rv = hv_guest_suspend(); + if (rv != 0) { + splx(spl); + cyclic_resume(); + start_cpus(); + watchdog_resume(); + mutex_exit(&cpu_lock); + DBG("suspend: failed, rv: %ld\n", rv); + return (rv); + } + + /* Update the global tick and stick offsets */ + set_tick_offsets(source_tick, source_stick); + + /* Ensure new offsets are globally visible before resuming CPUs */ + membar_sync(); + + /* Enable interrupts */ + splx(spl); + + /* Set the {%tick,%stick}.NPT bits on all CPUs */ + if (enable_user_tick_stick_emulation) { + xc_all((xcfunc_t *)enable_tick_stick_npt, NULL, NULL); + xt_sync(cpu_ready_set); + ASSERT(gettick_npt() != 0); + ASSERT(getstick_npt() != 0); + } + + /* If emulation is enabled, but not currently active, enable it */ + if (enable_user_tick_stick_emulation && !tick_stick_emulation_active) { + tick_stick_emulation_active = B_TRUE; + } + + /* Resume cyclics, unpause CPUs */ + cyclic_resume(); + start_cpus(); + + /* Set the TOD */ + mutex_enter(&tod_lock); + tod_set(source_tod); + mutex_exit(&tod_lock); + + /* Re-enable the watchdog */ + watchdog_resume(); + + mutex_exit(&cpu_lock); + + /* Get new MD, update CPU mappings/relationships */ + if (suspend_update_cpu_mappings) + update_cpu_mappings(); + + DBG("suspend: target tick: 0x%lx", gettick_counter()); + DBG("suspend: target stick: 0x%llx", gettick()); + DBG("suspend: user %%tick/%%stick emulation is %d", + tick_stick_emulation_active); + DBG("suspend: finished"); + + return (0); +} diff --git a/usr/src/uts/sun4v/sys/hypervisor_api.h b/usr/src/uts/sun4v/sys/hypervisor_api.h index e51333f0b4..8aa2abe70a 100644 --- a/usr/src/uts/sun4v/sys/hypervisor_api.h +++ b/usr/src/uts/sun4v/sys/hypervisor_api.h @@ -172,6 +172,10 @@ extern "C" { #define HV_TM_ENABLE 0x180 +#define GUEST_SUSPEND 0x181 +#define TICK_SET_NPT 0x182 +#define STICK_SET_NPT 0x183 + #define HV_RA2PA 0x200 #define HV_HPRIV 0x201 @@ -442,6 +446,10 @@ extern uint64_t hvldc_intr_gettarget(uint64_t dev_hdl, uint32_t devino, extern uint64_t hvldc_intr_settarget(uint64_t dev_hdl, uint32_t devino, uint32_t cpuid); +extern uint64_t hv_guest_suspend(void); +extern uint64_t hv_tick_set_npt(uint64_t npt); +extern uint64_t hv_stick_set_npt(uint64_t npt); + #endif /* ! _ASM */ diff --git a/usr/src/uts/sun4v/sys/machclock.h b/usr/src/uts/sun4v/sys/machclock.h index aee7eed171..3a8e8fd618 100644 --- a/usr/src/uts/sun4v/sys/machclock.h +++ b/usr/src/uts/sun4v/sys/machclock.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,20 +19,139 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_MACHCLOCK_H #define _SYS_MACHCLOCK_H -#pragma ident "%Z%%M% %I% %E% SMI" +#include <sys/intreg.h> #ifdef __cplusplus extern "C" { #endif /* + * Tick/Stick Register Access + * + * The following assembly language macros are defined for reading + * the %tick and %stick registers as well as reading and writing + * the stick compare register. With the exception of trapstat, reads + * and writes of these registers all take into account an offset + * value which is added to the hardware counter. By default, this + * offset is zero. The offsets can only be modified when CPUs are + * paused and are only intended to be modified during an OS suspend + * operation. + * + * Since the read of the %tick or %stick is not an atomic operation, + * it is possible for a suspend operation to occur between the read + * of the hardware register and its offset variable. The default + * macros here take this into account by comparing the value of the + * offset variable before and after reading the hardware register. + * Callers that need to read the %tick register and can guarantee + * they will not be preempted can use the RD_TICK_NO_SUSPEND_CHECK + * which does not check for native_tick_offset changing. + */ +#define RD_STICK(out, scr1, scr2, label) \ +.rd_stick.label: \ + sethi %hi(native_stick_offset), scr1; \ + ldx [scr1 + %lo(native_stick_offset)], scr2; \ + rd STICK, out; \ + ldx [scr1 + %lo(native_stick_offset)], scr1; \ + sub scr1, scr2, scr2; \ +/* CSTYLED */ \ + brnz,pn scr2, .rd_stick.label; \ + sllx out, 1, out; \ + srlx out, 1, out; \ + add out, scr1, out + +#define RD_TICK(out, scr1, scr2, label) \ +.rd_tick.label: \ + sethi %hi(native_tick_offset), scr1; \ + ldx [scr1 + %lo(native_tick_offset)], scr2; \ + rd %tick, out; \ + ldx [scr1 + %lo(native_tick_offset)], scr1; \ + sub scr1, scr2, scr2; \ +/* CSTYLED */ \ + brnz,pn scr2, .rd_tick.label; \ + sllx out, 1, out; \ + srlx out, 1, out; \ + add out, scr1, out + +#define RD_TICK_NO_SUSPEND_CHECK(out, scr1) \ + sethi %hi(native_tick_offset), scr1; \ + ldx [scr1 + %lo(native_tick_offset)], scr1; \ + rd %tick, out; \ + sllx out, 1, out; \ + srlx out, 1, out; \ + add out, scr1, out + +/* + * Read the %stick register without taking the native_stick_offset + * into account. + */ +#define RD_STICK_PHYSICAL(out) \ + rd %stick, out + +/* + * Read the %tick register without taking the native_tick_offset + * into account. Required to be a single instruction, usable in a + * delay slot. + */ +#define RD_TICK_PHYSICAL(out) \ + rd %tick, out + +/* + * For traptrace, which requires either the %tick or %stick + * counter depending on the value of a global variable. + * If the kernel variable passed in as 'use_stick' is non-zero, + * read the %stick counter into the 'out' register, otherwise, + * read the %tick counter. Note the label-less branches. + * We do not check for the tick or stick offset variables changing + * during the course of the macro's execution and as a result + * if a suspend operation occurs between the time the offset + * variable is read and the hardware register is read, we will + * use an inaccurate traptrace timestamp. + */ +#define RD_TICKSTICK_FLAG(out, scr1, use_stick) \ + sethi %hi(use_stick), scr1; \ + lduw [scr1 + %lo(use_stick)], scr1; \ +/* CSTYLED */ \ + brz,a scr1, .+24; \ + rd %tick, out; \ + sethi %hi(native_stick_offset), scr1; \ + ldx [scr1 + %lo(native_stick_offset)], scr1; \ + ba .+16; \ + rd STICK, out; \ + sethi %hi(native_tick_offset), scr1; \ + ldx [scr1 + %lo(native_tick_offset)], scr1; \ + sllx out, 1, out; \ + srlx out, 1, out; \ + add out, scr1, out; + +#define RD_TICKCMPR(out, scr1, scr2, label) \ +.rd_stickcmpr.label: \ + sethi %hi(native_stick_offset), scr1; \ + ldx [scr1 + %lo(native_stick_offset)], scr2; \ + rd STICK_COMPARE, out; \ + ldx [scr1 + %lo(native_stick_offset)], scr1; \ + sub scr1, scr2, scr2; \ +/* CSTYLED */ \ + brnz,pn scr2, .rd_stickcmpr.label; \ + add out, scr1, out + +#define WR_TICKCMPR(in, scr1, scr2, label) \ + sethi %hi(native_stick_offset), scr1; \ + ldx [scr1 + %lo(native_stick_offset)], scr1; \ + sub in, scr1, scr1; \ + wr scr1, STICK_COMPARE + +#define GET_NATIVE_TIME(out, scr1, scr2, label) \ +/* CSTYLED */ \ + RD_STICK(out,scr1,scr2,label) + +/* * Sun4v processors come up with NPT cleared and there is no need to * clear it again. Also, clearing of the NPT cannot be done atomically * on a CMT processor. @@ -51,7 +169,72 @@ extern "C" { * At least 62.5 MHz, for faster %tick-based systems. */ #define NSEC_SHIFT 4 -#define VTRACE_SHIFT 4 + +/* + * NOTE: the macros below assume that the various time-related variables + * (hrestime, hrestime_adj, hres_last_tick, timedelta, nsec_scale, etc) + * are all stored together on a 64-byte boundary. The primary motivation + * is cache performance, but we also take advantage of a convenient side + * effect: these variables all have the same high 22 address bits, so only + * one sethi is needed to access them all. + */ + +/* + * GET_HRESTIME() returns the value of hrestime, hrestime_adj and the + * number of nanoseconds since the last clock tick ('nslt'). It also + * sets 'nano' to the value NANOSEC (one billion). + * + * This macro assumes that all registers are globals or outs so they can + * safely contain 64-bit data, and that it's safe to use the label "5:". + * Further, this macro calls the NATIVE_TIME_TO_NSEC_SCALE which in turn + * uses the labels "6:" and "7:"; labels "5:", "6:" and "7:" must not + * be used across invocations of this macro. + */ +#define GET_HRESTIME(hrestsec, hrestnsec, adj, nslt, nano, scr, hrlock, \ + gnt1, gnt2, label) \ +5: sethi %hi(hres_lock), scr; \ + lduw [scr + %lo(hres_lock)], hrlock; /* load clock lock */ \ + lduw [scr + %lo(nsec_scale)], nano; /* tick-to-ns factor */ \ + andn hrlock, 1, hrlock; /* see comments above! */ \ + ldx [scr + %lo(hres_last_tick)], nslt; \ + ldn [scr + %lo(hrestime)], hrestsec; /* load hrestime.sec */\ + add scr, %lo(hrestime), hrestnsec; \ + ldn [hrestnsec + CLONGSIZE], hrestnsec; \ +/* CSTYLED */ \ + GET_NATIVE_TIME(adj,gnt1,gnt2,label); /* get current %stick */ \ + subcc adj, nslt, nslt; /* nslt = ticks since last clockint */ \ + movneg %xcc, %g0, nslt; /* ignore neg delta from tick skew */ \ + ldx [scr + %lo(hrestime_adj)], adj; /* load hrestime_adj */ \ + /* membar #LoadLoad; (see comment (2) above) */ \ + lduw [scr + %lo(hres_lock)], scr; /* load clock lock */ \ + NATIVE_TIME_TO_NSEC_SCALE(nslt, nano, gnt1, NSEC_SHIFT); \ + sethi %hi(NANOSEC), nano; \ + xor hrlock, scr, scr; \ +/* CSTYLED */ \ + brnz,pn scr, 5b; \ + or nano, %lo(NANOSEC), nano; + +/* + * Similar to above, but returns current gethrtime() value in 'base'. + */ +#define GET_HRTIME(base, now, nslt, scale, scr, hrlock, gnt1, gnt2, label) \ +5: sethi %hi(hres_lock), scr; \ + lduw [scr + %lo(hres_lock)], hrlock; /* load clock lock */ \ + lduw [scr + %lo(nsec_scale)], scale; /* tick-to-ns factor */ \ + andn hrlock, 1, hrlock; /* see comments above! */ \ + ldx [scr + %lo(hres_last_tick)], nslt; \ + ldx [scr + %lo(hrtime_base)], base; /* load hrtime_base */ \ +/* CSTYLED */ \ + GET_NATIVE_TIME(now,gnt1,gnt2,label); /* get current %stick */ \ + subcc now, nslt, nslt; /* nslt = ticks since last clockint */ \ + movneg %xcc, %g0, nslt; /* ignore neg delta from tick skew */ \ + /* membar #LoadLoad; (see comment (2) above) */ \ + ld [scr + %lo(hres_lock)], scr; /* load clock lock */ \ + NATIVE_TIME_TO_NSEC_SCALE(nslt, scale, gnt1, NSEC_SHIFT); \ + xor hrlock, scr, scr; \ +/* CSTYLED */ \ + brnz,pn scr, 5b; \ + add base, nslt, base; #endif /* CPU_MODULE */ diff --git a/usr/src/uts/sun4v/sys/platsvc.h b/usr/src/uts/sun4v/sys/platsvc.h index 9b76f1548c..2f1e0f37b5 100644 --- a/usr/src/uts/sun4v/sys/platsvc.h +++ b/usr/src/uts/sun4v/sys/platsvc.h @@ -20,15 +20,13 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _PLATSVC_H #define _PLATSVC_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -37,6 +35,7 @@ extern "C" { #include <sys/ds.h> #define MAX_REASON_SIZE 1 +#define SUSPEND_MAX_REASON_SIZE 256 /* * PLATSVC STATUS @@ -57,6 +56,28 @@ extern "C" { #define DOMAIN_PANIC_FAILURE PLATSVC_FAILURE #define DOMAIN_PANIC_INVALID_MSG PLATSVC_INVALID_MESG +/* + * Suspend message types. + */ +#define DOMAIN_SUSPEND_SUSPEND 0x0 + +/* + * Suspend response result values. + */ +#define DOMAIN_SUSPEND_PRE_SUCCESS PLATSVC_SUCCESS +#define DOMAIN_SUSPEND_PRE_FAILURE PLATSVC_FAILURE +#define DOMAIN_SUSPEND_INVALID_MSG PLATSVC_INVALID_MESG +#define DOMAIN_SUSPEND_INPROGRESS 0x3 +#define DOMAIN_SUSPEND_SUSPEND_FAILURE 0x4 +#define DOMAIN_SUSPEND_POST_SUCCESS 0x5 +#define DOMAIN_SUSPEND_POST_FAILURE 0x6 + +/* + * Suspend recovery result values. + */ +#define DOMAIN_SUSPEND_REC_SUCCESS 0x0 +#define DOMAIN_SUSPEND_REC_FAILURE 0x1 + typedef struct platsvc_md_update_req { uint64_t req_num; } platsvc_md_update_req_t; @@ -66,7 +87,6 @@ typedef struct platsvc_md_update_resp { uint32_t result; } platsvc_md_update_resp_t; - typedef struct platsvc_shutdown_req { uint64_t req_num; uint32_t delay; @@ -88,6 +108,18 @@ typedef struct platsvc_panic_resp { char reason[MAX_REASON_SIZE]; } platsvc_panic_resp_t; +typedef struct platsvc_suspend_req { + uint64_t req_num; + uint64_t type; +} platsvc_suspend_req_t; + +typedef struct platsvc_suspend_resp { + uint64_t req_num; + uint32_t result; + uint32_t rec_result; + char reason[MAX_REASON_SIZE]; +} platsvc_suspend_resp_t; + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/sun4v/sys/suspend.h b/usr/src/uts/sun4v/sys/suspend.h new file mode 100644 index 0000000000..c218f320d8 --- /dev/null +++ b/usr/src/uts/sun4v/sys/suspend.h @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_SUSPEND_H +#define _SYS_SUSPEND_H + +#ifdef __cplusplus +extern "C" { +#endif + +int suspend_pre(char *error_reason, size_t max_reason_length, + boolean_t *recovered); +int suspend_start(char *error_reason, size_t max_reason_length); +int suspend_post(char *error_reason, size_t max_reason_length); +void suspend_sync_tick_stick_npt(void); +boolean_t suspend_supported(void); + +#ifdef __cplusplus +} +#endif + +#endif /* !_SYS_SUSPEND_H */ diff --git a/usr/src/uts/sun4v/sys/traptrace.h b/usr/src/uts/sun4v/sys/traptrace.h index 93b8c6a5e6..2cb9e5d803 100644 --- a/usr/src/uts/sun4v/sys/traptrace.h +++ b/usr/src/uts/sun4v/sys/traptrace.h @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_TRAPTRACE_H #define _SYS_TRAPTRACE_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -180,6 +178,7 @@ extern void mach_htraptrace_cleanup(int); #else /* _ASM */ #include <sys/machthread.h> +#include <sys/machclock.h> /* * Offsets of words in trap_trace_ctl: @@ -206,22 +205,22 @@ extern void mach_htraptrace_cleanup(int); /* * Use new %stick register for UltraSparc III and beyond for * sane debugging of mixed speed CPU systems. Use TRAPTRACE_FORCE_TICK - * for finer granularity on same speed systems. - * - * Note the label-less branches used due to contraints of where - * and when trap trace macros are used. + * for finer granularity on same speed systems. Note that traptrace + * %tick or %stick reads use the NO_SUSPEND_CHECK version of the + * register read macros. This requires fewer registers and a few less + * instructions to execute. As a result, if a suspend operation occurs + * while traptrace is executing GET_TRACE_TICK between the time that + * the counter offset variable is read and the hardware register is read, + * this traptrace entry in the log will have an incorrect %tick value + * since it is derived from a pre-suspend offset variable and a post- + * suspend hardware counter. */ #ifdef TRAPTRACE_FORCE_TICK -#define GET_TRACE_TICK(reg) \ - rdpr %tick, reg; +#define GET_TRACE_TICK(reg, scr) \ + RD_TICK_NO_SUSPEND_CHECK(reg, scr); #else -#define GET_TRACE_TICK(reg) \ - sethi %hi(traptrace_use_stick), reg; \ - lduw [reg + %lo(traptrace_use_stick)], reg; \ - /* CSTYLED */ \ - brz,a reg, .+12; \ - rdpr %tick, reg; \ - rd %asr24, reg; +#define GET_TRACE_TICK(reg, scr) \ + RD_TICKSTICK_FLAG(reg, scr, traptrace_use_stick); #endif /* @@ -321,7 +320,7 @@ extern void mach_htraptrace_cleanup(int); andn scr4, PSTATE_IE | PSTATE_AM, scr3; \ wrpr %g0, scr3, %pstate; \ TRACE_PTR(scr1, scr2); \ - GET_TRACE_TICK(scr2); \ + GET_TRACE_TICK(scr2, scr3); \ stxa scr2, [scr1 + TRAP_ENT_TICK]%asi; \ TRACE_SAVE_TL_GL_REGS(scr1, scr2); \ set code, scr2; \ @@ -348,7 +347,7 @@ extern void mach_htraptrace_cleanup(int); */ #define TRACE_WIN_INFO(code, scr1, scr2, scr3) \ TRACE_PTR(scr1, scr2); \ - GET_TRACE_TICK(scr2); \ + GET_TRACE_TICK(scr2, scr3); \ stxa scr2, [scr1 + TRAP_ENT_TICK]%asi; \ TRACE_SAVE_TL_GL_REGS(scr1, scr2); \ rdpr %tt, scr2; \ @@ -387,7 +386,7 @@ extern void mach_htraptrace_cleanup(int); #define FAULT_WINTRACE(scr1, scr2, scr3, type) \ TRACE_PTR(scr1, scr2); \ - GET_TRACE_TICK(scr2); \ + GET_TRACE_TICK(scr2, scr3); \ stxa scr2, [scr1 + TRAP_ENT_TICK]%asi; \ TRACE_SAVE_TL_GL_REGS(scr1, scr2); \ set type, scr2; \ @@ -409,7 +408,7 @@ extern void mach_htraptrace_cleanup(int); #define SYSTRAP_TRACE(scr1, scr2, scr3) \ TRACE_PTR(scr1, scr2); \ - GET_TRACE_TICK(scr2); \ + GET_TRACE_TICK(scr2, scr3); \ stxa scr2, [scr1 + TRAP_ENT_TICK]%asi; \ TRACE_SAVE_TL_GL_REGS(scr1, scr2); \ set SYSTRAP_TT, scr3; \ |