summaryrefslogtreecommitdiff
path: root/usr
diff options
context:
space:
mode:
authorHaik Aftandilian <Haik.Aftandilian@Sun.COM>2010-03-26 14:31:08 -0700
committerHaik Aftandilian <Haik.Aftandilian@Sun.COM>2010-03-26 14:31:08 -0700
commit00a57bdfe7eeb62d10d0c0b3aab64d24a4d89287 (patch)
treeed27f6a694fef51b53b935b1af7a66861d1aa779 /usr
parent273a517f474549f42a8e8b9af219c177c300c49e (diff)
downloadillumos-gate-00a57bdfe7eeb62d10d0c0b3aab64d24a4d89287.tar.gz
6927091 cooperative guest migration should account for some %stick variation across CPUs
Diffstat (limited to 'usr')
-rw-r--r--usr/src/uts/sun4v/cpu/common_asm.s22
-rw-r--r--usr/src/uts/sun4v/os/suspend.c135
2 files changed, 129 insertions, 28 deletions
diff --git a/usr/src/uts/sun4v/cpu/common_asm.s b/usr/src/uts/sun4v/cpu/common_asm.s
index 360dcdf217..9427dd04c3 100644
--- a/usr/src/uts/sun4v/cpu/common_asm.s
+++ b/usr/src/uts/sun4v/cpu/common_asm.s
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -130,14 +130,15 @@ tickcmpr_disable(void)
#if defined(lint)
/*
- * tick_write_delta() increments %tick by the specified delta. This should
- * only be called after a CPR event to assure that gethrtime() continues to
- * increase monotonically. Obviously, writing %tick needs to de done very
- * carefully to avoid introducing unnecessary %tick skew across CPUs. For
- * this reason, we make sure we're i-cache hot before actually writing to
- * %tick.
- *
- * NOTE: No provision for this on sun4v right now.
+ * tick_write_delta() is intended to increment %stick by the specified delta,
+ * but %stick is only writeable in hyperprivileged mode and at present there
+ * is no provision for this. tick_write_delta is called by the cylic subsystem
+ * if a negative %stick delta is observed after cyclic processing is resumed
+ * after an event such as an OS suspend/resume. On sun4v, the suspend/resume
+ * routines should adjust the %stick offset preventing the cyclic subsystem
+ * from detecting a negative delta. If a negative delta is detected, panic the
+ * system. The negative delta could be caused by improper %stick
+ * synchronization after a suspend/resume.
*/
/*ARGSUSED*/
@@ -149,11 +150,12 @@ tick_write_delta(uint64_t delta)
.seg ".text"
tick_write_delta_panic:
- .asciz "tick_write_delta: not supported"
+ .asciz "tick_write_delta: not supported, delta: 0x%lx"
ENTRY_NP(tick_write_delta)
sethi %hi(tick_write_delta_panic), %o1
save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller
+ mov %i0, %o1
call panic
or %i1, %lo(tick_write_delta_panic), %o0
/*NOTREACHED*/
diff --git a/usr/src/uts/sun4v/os/suspend.c b/usr/src/uts/sun4v/os/suspend.c
index d4cd260d7d..5b11e9225d 100644
--- a/usr/src/uts/sun4v/os/suspend.c
+++ b/usr/src/uts/sun4v/os/suspend.c
@@ -46,6 +46,8 @@
#include <sys/hsvc.h>
#include <sys/mpo.h>
#include <vm/hat_sfmmu.h>
+#include <sys/time.h>
+#include <sys/clock.h>
/*
* Sun4v OS Suspend
@@ -86,6 +88,7 @@ extern int mach_descrip_update(void);
extern cpuset_t cpu_ready_set;
extern uint64_t native_tick_offset;
extern uint64_t native_stick_offset;
+extern uint64_t sys_tick_freq;
/*
* Global Sun Cluster pre/post callbacks.
@@ -134,6 +137,26 @@ boolean_t tick_stick_emulation_active = B_FALSE;
static int suspend_update_cpu_mappings = 1;
/*
+ * The maximum number of microseconds by which the %tick or %stick register
+ * can vary between any two CPUs in the system. To calculate the
+ * native_stick_offset and native_tick_offset, we measure the change in these
+ * registers on one CPU over a suspend/resume. Other CPUs may experience
+ * slightly larger or smaller changes. %tick and %stick should be synchronized
+ * between CPUs, but there may be some variation. So we add an additional value
+ * derived from this variable to ensure that these registers always increase
+ * over a suspend/resume operation, assuming all %tick and %stick registers
+ * are synchronized (within a certain limit) across CPUs in the system. The
+ * delta between %sticks on different CPUs should be a small number of cycles,
+ * not perceptible to readers of %stick that migrate between CPUs. We set this
+ * value to 1 millisecond which means that over a suspend/resume operation,
+ * all CPU's %tick and %stick will advance forwards as long as, across all
+ * CPUs, the %tick and %stick are synchronized to within 1 ms. This applies to
+ * CPUs before the suspend and CPUs after the resume. 1 ms is conservative,
+ * but small enough to not trigger TOD faults.
+ */
+static uint64_t suspend_tick_stick_max_delta = 1000; /* microseconds */
+
+/*
* DBG and DBG_PROM() macro.
*/
#ifdef DEBUG
@@ -188,23 +211,96 @@ suspend_supported(void)
}
/*
- * Given a source tick and stick value, set the tick and stick offsets such
- * that the (current physical register value + offset == source value).
+ * Given a source tick, stick, and tod value, set the tick and stick offsets
+ * such that the (current physical register value) + offset == (source value)
+ * and in addition account for some variation between the %tick/%stick on
+ * different CPUs. We account for this variation by adding in double the value
+ * of suspend_tick_stick_max_delta. The following is an explanation of why
+ * suspend_tick_stick_max_delta must be multplied by two and added to
+ * native_stick_offset.
+ *
+ * Consider a guest instance that is yet to be suspended with CPUs p0 and p1
+ * with physical "source" %stick values s0 and s1 respectively. When the guest
+ * is first resumed, the physical "target" %stick values are t0 and t1
+ * respectively. The virtual %stick values after the resume are v0 and v1
+ * respectively. Let x be the maximum difference between any two CPU's %stick
+ * register at a given point in time and let the %stick values be assigned
+ * such that
+ *
+ * s1 = s0 + x and
+ * t1 = t0 - x
+ *
+ * Let us assume that p0 is driving the suspend and resume. Then, we will
+ * calculate the stick offset f and the virtual %stick on p0 after the
+ * resume as follows.
+ *
+ * f = s0 - t0 and
+ * v0 = t0 + f
+ *
+ * We calculate the virtual %stick v1 on p1 after the resume as
+ *
+ * v1 = t1 + f
+ *
+ * Substitution yields
+ *
+ * v1 = t1 + (s0 - t0)
+ * v1 = (t0 - x) + (s0 - t0)
+ * v1 = -x + s0
+ * v1 = s0 - x
+ * v1 = (s1 - x) - x
+ * v1 = s1 - 2x
+ *
+ * Therefore, in this scenario, without accounting for %stick variation in
+ * the calculation of the native_stick_offset f, the virtual %stick on p1
+ * is less than the value of the %stick on p1 before the suspend which is
+ * unacceptable. By adding 2x to v1, we guarantee it will be equal to s1
+ * which means the %stick on p1 after the resume will always be greater
+ * than or equal to the %stick on p1 before the suspend. Since v1 = t1 + f
+ * at any point in time, we can accomplish this by adding 2x to f. This
+ * guarantees any processes bound to CPU P0 or P1 will not see a %stick
+ * decrease across a suspend/resume. Hence, in the code below, we multiply
+ * suspend_tick_stick_max_delta by two in the calculation for
+ * native_stick_offset, native_tick_offset, and target_hrtime.
*/
static void
-set_tick_offsets(uint64_t source_tick, uint64_t source_stick)
+set_tick_offsets(uint64_t source_tick, uint64_t source_stick, timestruc_t *tsp)
{
uint64_t target_tick;
uint64_t target_stick;
+ hrtime_t source_hrtime;
+ hrtime_t target_hrtime;
+ /*
+ * Temporarily set the offsets to zero so that the following reads
+ * of the registers will yield physical unadjusted counter values.
+ */
native_tick_offset = 0;
native_stick_offset = 0;
target_tick = gettick_counter(); /* returns %tick */
target_stick = gettick(); /* returns %stick */
- native_tick_offset = source_tick - target_tick;
- native_stick_offset = source_stick - target_stick;
+ /*
+ * Calculate the new offsets. In addition to the delta observed on
+ * this CPU, add an additional value. Multiply the %tick/%stick
+ * frequency by suspend_tick_stick_max_delta (us). Then, multiply by 2
+ * to account for a delta between CPUs before the suspend and a
+ * delta between CPUs after the resume.
+ */
+ native_tick_offset = (source_tick - target_tick) +
+ (CPU->cpu_curr_clock * suspend_tick_stick_max_delta * 2 / MICROSEC);
+ native_stick_offset = (source_stick - target_stick) +
+ (sys_tick_freq * suspend_tick_stick_max_delta * 2 / MICROSEC);
+
+ /*
+ * We've effectively increased %stick and %tick by twice the value
+ * of suspend_tick_stick_max_delta to account for variation across
+ * CPUs. Now adjust the preserved TOD by the same amount.
+ */
+ source_hrtime = ts2hrt(tsp);
+ target_hrtime = source_hrtime +
+ (suspend_tick_stick_max_delta * 2 * (NANOSEC/MICROSEC));
+ hrt2ts(target_hrtime, tsp);
}
/*
@@ -503,10 +599,13 @@ suspend_start(char *error_reason, size_t max_reason_len)
pause_cpus(NULL);
DBG_PROM("suspend: CPUs paused\n");
- /* Suspend cyclics and disable interrupts */
+ /* Suspend cyclics */
cyclic_suspend();
DBG_PROM("suspend: cyclics suspended\n");
+
+ /* Disable interrupts */
spl = spl8();
+ DBG_PROM("suspend: spl8()\n");
source_tick = gettick_counter();
source_stick = gettick();
@@ -514,16 +613,16 @@ suspend_start(char *error_reason, size_t max_reason_len)
DBG_PROM("suspend: source_stick: 0x%lx\n", source_stick);
/*
- * Call into the HV to initiate the suspend.
- * hv_guest_suspend() returns after the guest has been
- * resumed or if the suspend operation failed or was
- * cancelled. After a successful suspend, the %tick and
- * %stick registers may have changed by an amount that is
- * not proportional to the amount of time that has passed.
- * They may have jumped forwards or backwards. This jump
- * must be uniform across all CPUs and we operate under
- * the assumption that it is (maintaining two global offset
- * variables--one for %tick and one for %stick.)
+ * Call into the HV to initiate the suspend. hv_guest_suspend()
+ * returns after the guest has been resumed or if the suspend
+ * operation failed or was cancelled. After a successful suspend,
+ * the %tick and %stick registers may have changed by an amount
+ * that is not proportional to the amount of time that has passed.
+ * They may have jumped forwards or backwards. Some variation is
+ * allowed and accounted for using suspend_tick_stick_max_delta,
+ * but otherwise this jump must be uniform across all CPUs and we
+ * operate under the assumption that it is (maintaining two global
+ * offset variables--one for %tick and one for %stick.)
*/
DBG_PROM("suspend: suspending... \n");
rv = hv_guest_suspend();
@@ -538,8 +637,8 @@ suspend_start(char *error_reason, size_t max_reason_len)
return (rv);
}
- /* Update the global tick and stick offsets */
- set_tick_offsets(source_tick, source_stick);
+ /* Update the global tick and stick offsets and the preserved TOD */
+ set_tick_offsets(source_tick, source_stick, &source_tod);
/* Ensure new offsets are globally visible before resuming CPUs */
membar_sync();