diff options
author | Haik Aftandilian <Haik.Aftandilian@Sun.COM> | 2010-03-26 14:31:08 -0700 |
---|---|---|
committer | Haik Aftandilian <Haik.Aftandilian@Sun.COM> | 2010-03-26 14:31:08 -0700 |
commit | 00a57bdfe7eeb62d10d0c0b3aab64d24a4d89287 (patch) | |
tree | ed27f6a694fef51b53b935b1af7a66861d1aa779 /usr | |
parent | 273a517f474549f42a8e8b9af219c177c300c49e (diff) | |
download | illumos-gate-00a57bdfe7eeb62d10d0c0b3aab64d24a4d89287.tar.gz |
6927091 cooperative guest migration should account for some %stick variation across CPUs
Diffstat (limited to 'usr')
-rw-r--r-- | usr/src/uts/sun4v/cpu/common_asm.s | 22 | ||||
-rw-r--r-- | usr/src/uts/sun4v/os/suspend.c | 135 |
2 files changed, 129 insertions, 28 deletions
diff --git a/usr/src/uts/sun4v/cpu/common_asm.s b/usr/src/uts/sun4v/cpu/common_asm.s index 360dcdf217..9427dd04c3 100644 --- a/usr/src/uts/sun4v/cpu/common_asm.s +++ b/usr/src/uts/sun4v/cpu/common_asm.s @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -130,14 +130,15 @@ tickcmpr_disable(void) #if defined(lint) /* - * tick_write_delta() increments %tick by the specified delta. This should - * only be called after a CPR event to assure that gethrtime() continues to - * increase monotonically. Obviously, writing %tick needs to de done very - * carefully to avoid introducing unnecessary %tick skew across CPUs. For - * this reason, we make sure we're i-cache hot before actually writing to - * %tick. - * - * NOTE: No provision for this on sun4v right now. + * tick_write_delta() is intended to increment %stick by the specified delta, + * but %stick is only writeable in hyperprivileged mode and at present there + * is no provision for this. tick_write_delta is called by the cylic subsystem + * if a negative %stick delta is observed after cyclic processing is resumed + * after an event such as an OS suspend/resume. On sun4v, the suspend/resume + * routines should adjust the %stick offset preventing the cyclic subsystem + * from detecting a negative delta. If a negative delta is detected, panic the + * system. The negative delta could be caused by improper %stick + * synchronization after a suspend/resume. */ /*ARGSUSED*/ @@ -149,11 +150,12 @@ tick_write_delta(uint64_t delta) .seg ".text" tick_write_delta_panic: - .asciz "tick_write_delta: not supported" + .asciz "tick_write_delta: not supported, delta: 0x%lx" ENTRY_NP(tick_write_delta) sethi %hi(tick_write_delta_panic), %o1 save %sp, -SA(MINFRAME), %sp ! get a new window to preserve caller + mov %i0, %o1 call panic or %i1, %lo(tick_write_delta_panic), %o0 /*NOTREACHED*/ diff --git a/usr/src/uts/sun4v/os/suspend.c b/usr/src/uts/sun4v/os/suspend.c index d4cd260d7d..5b11e9225d 100644 --- a/usr/src/uts/sun4v/os/suspend.c +++ b/usr/src/uts/sun4v/os/suspend.c @@ -46,6 +46,8 @@ #include <sys/hsvc.h> #include <sys/mpo.h> #include <vm/hat_sfmmu.h> +#include <sys/time.h> +#include <sys/clock.h> /* * Sun4v OS Suspend @@ -86,6 +88,7 @@ extern int mach_descrip_update(void); extern cpuset_t cpu_ready_set; extern uint64_t native_tick_offset; extern uint64_t native_stick_offset; +extern uint64_t sys_tick_freq; /* * Global Sun Cluster pre/post callbacks. @@ -134,6 +137,26 @@ boolean_t tick_stick_emulation_active = B_FALSE; static int suspend_update_cpu_mappings = 1; /* + * The maximum number of microseconds by which the %tick or %stick register + * can vary between any two CPUs in the system. To calculate the + * native_stick_offset and native_tick_offset, we measure the change in these + * registers on one CPU over a suspend/resume. Other CPUs may experience + * slightly larger or smaller changes. %tick and %stick should be synchronized + * between CPUs, but there may be some variation. So we add an additional value + * derived from this variable to ensure that these registers always increase + * over a suspend/resume operation, assuming all %tick and %stick registers + * are synchronized (within a certain limit) across CPUs in the system. The + * delta between %sticks on different CPUs should be a small number of cycles, + * not perceptible to readers of %stick that migrate between CPUs. We set this + * value to 1 millisecond which means that over a suspend/resume operation, + * all CPU's %tick and %stick will advance forwards as long as, across all + * CPUs, the %tick and %stick are synchronized to within 1 ms. This applies to + * CPUs before the suspend and CPUs after the resume. 1 ms is conservative, + * but small enough to not trigger TOD faults. + */ +static uint64_t suspend_tick_stick_max_delta = 1000; /* microseconds */ + +/* * DBG and DBG_PROM() macro. */ #ifdef DEBUG @@ -188,23 +211,96 @@ suspend_supported(void) } /* - * Given a source tick and stick value, set the tick and stick offsets such - * that the (current physical register value + offset == source value). + * Given a source tick, stick, and tod value, set the tick and stick offsets + * such that the (current physical register value) + offset == (source value) + * and in addition account for some variation between the %tick/%stick on + * different CPUs. We account for this variation by adding in double the value + * of suspend_tick_stick_max_delta. The following is an explanation of why + * suspend_tick_stick_max_delta must be multplied by two and added to + * native_stick_offset. + * + * Consider a guest instance that is yet to be suspended with CPUs p0 and p1 + * with physical "source" %stick values s0 and s1 respectively. When the guest + * is first resumed, the physical "target" %stick values are t0 and t1 + * respectively. The virtual %stick values after the resume are v0 and v1 + * respectively. Let x be the maximum difference between any two CPU's %stick + * register at a given point in time and let the %stick values be assigned + * such that + * + * s1 = s0 + x and + * t1 = t0 - x + * + * Let us assume that p0 is driving the suspend and resume. Then, we will + * calculate the stick offset f and the virtual %stick on p0 after the + * resume as follows. + * + * f = s0 - t0 and + * v0 = t0 + f + * + * We calculate the virtual %stick v1 on p1 after the resume as + * + * v1 = t1 + f + * + * Substitution yields + * + * v1 = t1 + (s0 - t0) + * v1 = (t0 - x) + (s0 - t0) + * v1 = -x + s0 + * v1 = s0 - x + * v1 = (s1 - x) - x + * v1 = s1 - 2x + * + * Therefore, in this scenario, without accounting for %stick variation in + * the calculation of the native_stick_offset f, the virtual %stick on p1 + * is less than the value of the %stick on p1 before the suspend which is + * unacceptable. By adding 2x to v1, we guarantee it will be equal to s1 + * which means the %stick on p1 after the resume will always be greater + * than or equal to the %stick on p1 before the suspend. Since v1 = t1 + f + * at any point in time, we can accomplish this by adding 2x to f. This + * guarantees any processes bound to CPU P0 or P1 will not see a %stick + * decrease across a suspend/resume. Hence, in the code below, we multiply + * suspend_tick_stick_max_delta by two in the calculation for + * native_stick_offset, native_tick_offset, and target_hrtime. */ static void -set_tick_offsets(uint64_t source_tick, uint64_t source_stick) +set_tick_offsets(uint64_t source_tick, uint64_t source_stick, timestruc_t *tsp) { uint64_t target_tick; uint64_t target_stick; + hrtime_t source_hrtime; + hrtime_t target_hrtime; + /* + * Temporarily set the offsets to zero so that the following reads + * of the registers will yield physical unadjusted counter values. + */ native_tick_offset = 0; native_stick_offset = 0; target_tick = gettick_counter(); /* returns %tick */ target_stick = gettick(); /* returns %stick */ - native_tick_offset = source_tick - target_tick; - native_stick_offset = source_stick - target_stick; + /* + * Calculate the new offsets. In addition to the delta observed on + * this CPU, add an additional value. Multiply the %tick/%stick + * frequency by suspend_tick_stick_max_delta (us). Then, multiply by 2 + * to account for a delta between CPUs before the suspend and a + * delta between CPUs after the resume. + */ + native_tick_offset = (source_tick - target_tick) + + (CPU->cpu_curr_clock * suspend_tick_stick_max_delta * 2 / MICROSEC); + native_stick_offset = (source_stick - target_stick) + + (sys_tick_freq * suspend_tick_stick_max_delta * 2 / MICROSEC); + + /* + * We've effectively increased %stick and %tick by twice the value + * of suspend_tick_stick_max_delta to account for variation across + * CPUs. Now adjust the preserved TOD by the same amount. + */ + source_hrtime = ts2hrt(tsp); + target_hrtime = source_hrtime + + (suspend_tick_stick_max_delta * 2 * (NANOSEC/MICROSEC)); + hrt2ts(target_hrtime, tsp); } /* @@ -503,10 +599,13 @@ suspend_start(char *error_reason, size_t max_reason_len) pause_cpus(NULL); DBG_PROM("suspend: CPUs paused\n"); - /* Suspend cyclics and disable interrupts */ + /* Suspend cyclics */ cyclic_suspend(); DBG_PROM("suspend: cyclics suspended\n"); + + /* Disable interrupts */ spl = spl8(); + DBG_PROM("suspend: spl8()\n"); source_tick = gettick_counter(); source_stick = gettick(); @@ -514,16 +613,16 @@ suspend_start(char *error_reason, size_t max_reason_len) DBG_PROM("suspend: source_stick: 0x%lx\n", source_stick); /* - * Call into the HV to initiate the suspend. - * hv_guest_suspend() returns after the guest has been - * resumed or if the suspend operation failed or was - * cancelled. After a successful suspend, the %tick and - * %stick registers may have changed by an amount that is - * not proportional to the amount of time that has passed. - * They may have jumped forwards or backwards. This jump - * must be uniform across all CPUs and we operate under - * the assumption that it is (maintaining two global offset - * variables--one for %tick and one for %stick.) + * Call into the HV to initiate the suspend. hv_guest_suspend() + * returns after the guest has been resumed or if the suspend + * operation failed or was cancelled. After a successful suspend, + * the %tick and %stick registers may have changed by an amount + * that is not proportional to the amount of time that has passed. + * They may have jumped forwards or backwards. Some variation is + * allowed and accounted for using suspend_tick_stick_max_delta, + * but otherwise this jump must be uniform across all CPUs and we + * operate under the assumption that it is (maintaining two global + * offset variables--one for %tick and one for %stick.) */ DBG_PROM("suspend: suspending... \n"); rv = hv_guest_suspend(); @@ -538,8 +637,8 @@ suspend_start(char *error_reason, size_t max_reason_len) return (rv); } - /* Update the global tick and stick offsets */ - set_tick_offsets(source_tick, source_stick); + /* Update the global tick and stick offsets and the preserved TOD */ + set_tick_offsets(source_tick, source_stick, &source_tod); /* Ensure new offsets are globally visible before resuming CPUs */ membar_sync(); |