summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBryan Cantrill <bryan@joyent.com>2012-01-03 09:52:15 +0000
committerBryan Cantrill <bryan@joyent.com>2012-01-03 09:52:15 +0000
commit4f48eea6a4f757ec8e9e0884360387c45fff7356 (patch)
tree426679618b10c7232cbeb0025f21a82e48aa7d6b
parent2eda6ac426328a10c4c76ee386951b2976e11328 (diff)
downloadillumos-kvm-cmd-4f48eea6a4f757ec8e9e0884360387c45fff7356.tar.gz
HVM-711 need an interval timer-based alarm timer backend
-rw-r--r--qemu-timer-common.c2
-rw-r--r--qemu-timer.c381
-rw-r--r--qemu-timer.h4
-rw-r--r--trace-events9
4 files changed, 389 insertions, 7 deletions
diff --git a/qemu-timer-common.c b/qemu-timer-common.c
index 755e300..24a15bb 100644
--- a/qemu-timer-common.c
+++ b/qemu-timer-common.c
@@ -51,7 +51,7 @@ static void __attribute__((constructor)) init_get_clock(void)
use_rt_clock = 0;
#if defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD_version >= 500000) \
|| defined(__DragonFly__) || defined(__FreeBSD_kernel__) \
- || defined(__OpenBSD__)
+ || defined(__OpenBSD__) || defined (__sun__)
{
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) {
diff --git a/qemu-timer.c b/qemu-timer.c
index 0230274..10a8ff5 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -27,6 +27,7 @@
#include "net.h"
#include "monitor.h"
#include "console.h"
+#include "trace.h"
#include "hw/hw.h"
@@ -162,8 +163,10 @@ struct QEMUClock {
struct QEMUTimer {
QEMUClock *clock;
int64_t expire_time;
+ int64_t interval;
QEMUTimerCB *cb;
void *opaque;
+ void *source;
struct QEMUTimer *next;
};
@@ -217,6 +220,14 @@ static void win32_rearm_timer(struct qemu_alarm_timer *t);
static int unix_start_timer(struct qemu_alarm_timer *t);
static void unix_stop_timer(struct qemu_alarm_timer *t);
+#if defined(__sun__)
+
+static int multiticks_start_timer(struct qemu_alarm_timer *t);
+static void multiticks_stop_timer(struct qemu_alarm_timer *t);
+static void multiticks_rearm_timer(struct qemu_alarm_timer *t);
+
+#endif
+
#if defined(__linux__) || defined(__sun__)
static int dynticks_start_timer(struct qemu_alarm_timer *t);
@@ -294,6 +305,10 @@ int64_t qemu_icount_round(int64_t count)
static struct qemu_alarm_timer alarm_timers[] = {
#ifndef _WIN32
+#if defined(__sun__)
+ {"multiticks", multiticks_start_timer,
+ multiticks_stop_timer, multiticks_rearm_timer, NULL},
+#endif
#if defined(__linux__) || defined(__sun__)
{"dynticks", dynticks_start_timer,
dynticks_stop_timer, dynticks_rearm_timer, NULL},
@@ -456,10 +471,19 @@ void qemu_mod_timer(QEMUTimer *ts, int64_t expire_time)
break;
pt = &t->next;
}
+
+ if (ts->expire_time && expire_time > ts->expire_time) {
+ ts->interval = expire_time - ts->expire_time;
+ } else {
+ ts->interval = 0;
+ }
+
ts->expire_time = expire_time;
ts->next = *pt;
*pt = ts;
+ trace_qemu_mod_timer(ts, expire_time, ts->interval);
+
/* Rearm if necessary */
if (pt == &active_timers[ts->clock->type]) {
if (!alarm_timer->pending) {
@@ -502,6 +526,9 @@ static void qemu_run_timers(QEMUClock *clock)
ts = *ptimer_head;
if (!ts || ts->expire_time > current_time)
break;
+
+ trace_qemu_run_timer(ts, ts->expire_time, current_time);
+
/* remove timer from the list before calling the callback */
*ptimer_head = ts->next;
ts->next = NULL;
@@ -642,7 +669,7 @@ void qemu_run_all_timers(void)
qemu_run_timers(host_clock);
}
-static int64_t qemu_next_alarm_deadline(void);
+static int64_t qemu_next_alarm_deadline(struct QEMUTimer **);
#ifdef _WIN32
static void CALLBACK host_alarm_handler(UINT uTimerID, UINT uMsg,
@@ -686,7 +713,7 @@ static void host_alarm_handler(int host_signum)
}
#endif
if (alarm_has_dynticks(t) ||
- qemu_next_alarm_deadline () <= 0) {
+ qemu_next_alarm_deadline (NULL) <= 0) {
t->expired = alarm_has_dynticks(t);
t->pending = 1;
qemu_notify_event();
@@ -715,28 +742,39 @@ int64_t qemu_next_deadline(void)
return delta;
}
-static int64_t qemu_next_alarm_deadline(void)
+static int64_t qemu_next_alarm_deadline(struct QEMUTimer **tp)
{
int64_t delta;
int64_t rtdelta;
+ struct QEMUTimer *t;
+
+ if (tp == NULL)
+ tp = &t;
if (!use_icount && active_timers[QEMU_CLOCK_VIRTUAL]) {
delta = active_timers[QEMU_CLOCK_VIRTUAL]->expire_time -
qemu_get_clock(vm_clock);
+ *tp = active_timers[QEMU_CLOCK_VIRTUAL];
} else {
delta = INT32_MAX;
+ *tp = NULL;
}
if (active_timers[QEMU_CLOCK_HOST]) {
int64_t hdelta = active_timers[QEMU_CLOCK_HOST]->expire_time -
qemu_get_clock_ns(host_clock);
- if (hdelta < delta)
+ if (hdelta < delta) {
delta = hdelta;
+ *tp = active_timers[QEMU_CLOCK_HOST];
+ }
}
+
if (active_timers[QEMU_CLOCK_REALTIME]) {
rtdelta = (active_timers[QEMU_CLOCK_REALTIME]->expire_time * 1000000 -
qemu_get_clock_ns(rt_clock));
- if (rtdelta < delta)
+ if (rtdelta < delta) {
delta = rtdelta;
+ *tp = active_timers[QEMU_CLOCK_REALTIME];
+ }
}
return delta;
@@ -847,6 +885,337 @@ static void rtc_stop_timer(struct qemu_alarm_timer *t)
#endif /* defined(__linux__) */
+#if defined(__sun__)
+
+#define QEMU_MULTITICKS_NSOURCES 8
+
+int multiticks_enabled = 1;
+int multiticks_tolerance_jitter = 20000;
+int64_t multiticks_tolerance_interval = 200000;
+int64_t multiticks_reap_threshold = NANOSEC;
+int multiticks_reap_multiplier = 4;
+
+struct multitick_source {
+ timer_t source;
+ QEMUTimer *timer;
+ int64_t armed;
+ int64_t interval;
+ int64_t initial;
+};
+
+struct qemu_alarm_multiticks {
+ int64_t reaped;
+ struct multitick_source sources[QEMU_MULTITICKS_NSOURCES];
+};
+
+/*
+ * Many QEMU timer consumers seek to create interval timers, but QEMU only has
+ * a one-shot timer facility. This forces the consumer to effect their own
+ * intervals, an annoying (but not necessarily difficult) task. However, the
+ * problem with using one-shots to implement interval timers is the overhead
+ * of programming the underlying timer (e.g., timer_settime()): even at
+ * moderate frequencies (e.g., 1 KHz) this overhead can become significant at
+ * modest levels of tenancy. Given that the underlying POSIX timer facility
+ * is in fact capable of providing interval timers (and given that using the
+ * interval timers is more accurate than effecting the same with a one-shot),
+ * and given that one can have multiple timers in a process, there is an
+ * opportunity to significantly reduce timer programming overhead while
+ * increasing timer accuracy by making better use of POSIX timers. The
+ * multiticks alarm timer does exactly this via a cache of interval timers,
+ * associating a timer in a one-to-one manner with an underlying source.
+ */
+static int multiticks_start_timer(struct qemu_alarm_timer *t)
+{
+ struct sigevent ev;
+ struct sigaction act;
+ struct qemu_alarm_multiticks *multiticks;
+ struct multitick_source *sources;
+ struct itimerspec timeout;
+ struct timespec res;
+ int64_t resolution, found;
+ int i;
+
+ if (!multiticks_enabled) {
+ fprintf(stderr, "multiticks: programmatically disabled\n");
+ return -1;
+ }
+
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = host_alarm_handler;
+
+ sigaction(SIGALRM, &act, NULL);
+
+ multiticks = qemu_mallocz(sizeof (struct qemu_alarm_multiticks));
+ sources = multiticks->sources;
+ t->priv = multiticks;
+
+ memset(&ev, 0, sizeof(ev));
+ ev.sigev_value.sival_int = 0;
+ ev.sigev_notify = SIGEV_SIGNAL;
+ ev.sigev_signo = SIGALRM;
+
+ for (i = 0; i < QEMU_MULTITICKS_NSOURCES; i++)
+ sources[i].source = -1;
+
+ for (i = 0; i < QEMU_MULTITICKS_NSOURCES; i++) {
+ if (timer_create(CLOCK_MONOTONIC, &ev, &sources[i].source) != 0) {
+ perror("multiticks: timer_create");
+ fprintf(stderr, "multiticks: could not create timer; disabling\n");
+ multiticks_stop_timer(t);
+ return -1;
+ }
+ }
+
+ /*
+ * Check that the implementation properly honors an arbitrary interval --
+ * and in particular, an interval that is explicitly not evenly divided
+ * by the resolution. (Multiticks very much relies on interval timers
+ * being properly implemented; even small errors in the interval can
+ * add up quickly when frequencies are high.)
+ */
+ if (clock_getres(CLOCK_MONOTONIC, &res) != 0) {
+ perror("multiticks: clock_getres");
+ fprintf(stderr, "multiticks: could not get resolution; disabling\n");
+ multiticks_stop_timer(t);
+ return -1;
+ }
+
+ resolution = (res.tv_sec * NANOSEC + res.tv_nsec) * 60 * NANOSEC + 1;
+
+ timeout.it_value.tv_sec = resolution / NANOSEC;
+ timeout.it_value.tv_nsec = resolution % NANOSEC;
+ timeout.it_interval.tv_sec = resolution / NANOSEC;
+ timeout.it_interval.tv_nsec = resolution % NANOSEC;
+
+ if (timer_settime(sources[0].source, TIMER_RELTIME, &timeout, NULL) != 0) {
+ perror("multiticks: timer_settime");
+ fprintf(stderr, "multiticks: could not set test timer; disabling\n");
+ multiticks_stop_timer(t);
+ return -1;
+ }
+
+ if (timer_gettime(sources[0].source, &timeout) != 0) {
+ perror("multiticks: timer_gettime");
+ fprintf(stderr, "multiticks: could not get test timer; disabling\n");
+ multiticks_stop_timer(t);
+ return -1;
+ }
+
+ found = timeout.it_interval.tv_sec * NANOSEC + timeout.it_interval.tv_nsec;
+
+ if (resolution != found) {
+ fprintf(stderr, "multitics: interval not properly honored "
+ "(set to %lld; found %lld); disabling\n",
+ (long long)resolution, (long long)found);
+ multiticks_stop_timer(t);
+ return -1;
+ }
+
+ memset(&timeout, 0, sizeof (timeout));
+ (void) timer_settime(sources[0].source, TIMER_RELTIME, &timeout, NULL);
+
+ return 0;
+}
+
+static void multiticks_stop_timer(struct qemu_alarm_timer *t)
+{
+ struct qemu_alarm_multiticks *multiticks = t->priv;
+ struct multitick_source *sources = multiticks->sources;
+ int i;
+
+ for (i = 0; i < QEMU_MULTITICKS_NSOURCES; i++) {
+ if (sources[i].source != -1)
+ timer_delete(sources[i].source);
+ }
+
+ qemu_vfree(multiticks);
+ t->priv = NULL;
+}
+
+static struct multitick_source *multiticks_source(struct qemu_alarm_timer *t,
+ QEMUTimer *timer)
+{
+ struct qemu_alarm_multiticks *multiticks = t->priv;
+ struct multitick_source *sources = multiticks->sources, *source;
+ int64_t oldest = INT64_MAX;
+ int i;
+
+ /*
+ * We have a dynamic check here against multiticks_enabled to allow it
+ * to be dynamically disabled after the multiticks alarm timer has been
+ * configured. When disabled, multiticks should degenerate to an
+ * implementation approximating that of dynticks, allowing for behavior
+ * comparisons to be made without restarting guests.
+ */
+ if (!multiticks_enabled) {
+ source = &sources[0];
+ source->interval = 0;
+ } else {
+ if ((source = timer->source) != NULL && source->timer == timer) {
+ /*
+ * This timer still owns its source -- it wasn't stolen since last
+ * being armed.
+ */
+ return (source);
+ }
+
+ /*
+ * The source has either been stolen from the timer, or it was never
+ * assigned; find a source and assign it.
+ */
+ for (i = 0; i < QEMU_MULTITICKS_NSOURCES; i++) {
+ if (sources[i].armed < oldest) {
+ oldest = sources[i].armed;
+ source = &sources[i];
+ }
+ }
+ }
+
+ trace_multiticks_assign(source->timer, source->source);
+
+ assert(source != NULL);
+ source->timer = timer;
+ timer->source = source;
+
+ return (source);
+}
+
+static void multiticks_reap(struct qemu_alarm_timer *t, int64_t now)
+{
+ struct qemu_alarm_multiticks *multiticks = t->priv;
+ struct multitick_source *sources = multiticks->sources, *source;
+ int multiplier = multiticks_reap_multiplier;
+ struct itimerspec timeout;
+ int64_t interval;
+ int i;
+
+ if (now - multiticks->reaped < multiticks_reap_threshold)
+ return;
+
+ memset(&timeout, 0, sizeof (timeout));
+
+ for (i = 0; i < QEMU_MULTITICKS_NSOURCES; i++) {
+ if (!(interval = sources[i].interval))
+ continue;
+
+ if (sources[i].armed + (multiplier * interval) > now)
+ continue;
+
+ source = &sources[i];
+ trace_multiticks_reap(source->source, source->armed, interval);
+
+ source->interval = 0;
+
+ if (timer_settime(source->source, TIMER_RELTIME, &timeout, NULL) != 0) {
+ perror("timer_settime");
+ fprintf(stderr, "multiticks: internal reaping error; aborting\n");
+ exit(1);
+ }
+ }
+
+ multiticks->reaped = now;
+}
+
+static void multiticks_rearm_timer(struct qemu_alarm_timer *t)
+{
+ struct multitick_source *source;
+ struct itimerspec timeout;
+ QEMUTimer *timer;
+ int64_t delta, when, interval;
+ int64_t low, high, now;
+
+ assert(alarm_has_dynticks(t));
+
+ /*
+ * First we need to find the next timer to fire.
+ */
+ low = get_clock();
+ delta = qemu_next_alarm_deadline(&timer);
+ now = high = get_clock();
+
+ multiticks_reap(t, now);
+
+ if (timer == NULL)
+ return;
+
+ low += delta;
+ high += delta;
+
+ if (timer->clock->type == QEMU_CLOCK_REALTIME) {
+ interval = timer->interval * 1000000;
+ } else {
+ interval = timer->interval;
+ }
+
+ if (interval < multiticks_tolerance_interval)
+ interval = 0;
+
+ source = multiticks_source(t, timer);
+
+ if (interval && source->interval) {
+ int64_t offset, fire;
+
+ if (low < source->initial && source->initial < high) {
+ /*
+ * Our timer has not yet had its initial firing, which is already
+ * scheduled to be within band; we have nothing else to do.
+ */
+ trace_multiticks_inband(source->timer, low, high, source->initial);
+ source->armed = now;
+ return;
+ }
+
+ offset = (low - source->initial) % source->interval;
+ fire = low + (source->interval - offset);
+
+ if (fire < high) {
+ /*
+ * Our timer is going to fire within our band of expectation; we
+ * have nothing else to do.
+ */
+ trace_multiticks_inband(source->timer, low, high, fire);
+ source->armed = now;
+ return;
+ }
+
+ if (fire - high < multiticks_tolerance_jitter) {
+ /*
+ * Our timer is going to fire out of our band of expection, but
+ * within our jitter tolerance; we'll let it ride.
+ */
+ trace_multiticks_inband(source->timer, low, high, fire);
+ source->armed = now;
+ return;
+ }
+
+ trace_multiticks_outofband(source->timer, low, high, fire);
+ }
+
+ /*
+ * We don't actually know the precise (absolute) time to fire, so we'll
+ * take the middle of the band.
+ */
+ when = low + (high - low) / 2;
+
+ trace_multiticks_program(source->timer, when, interval);
+
+ source->interval = interval;
+ source->armed = interval ? now : 0;
+ source->initial = when;
+ timeout.it_value.tv_sec = when / NANOSEC;
+ timeout.it_value.tv_nsec = when % NANOSEC;
+ timeout.it_interval.tv_sec = interval / NANOSEC;
+ timeout.it_interval.tv_nsec = interval % NANOSEC;
+
+ if (timer_settime(source->source, TIMER_ABSTIME, &timeout, NULL) != 0) {
+ perror("timer_settime");
+ fprintf(stderr, "multiticks: internal timer error; aborting\n");
+ exit(1);
+ }
+}
+#endif
+
#if defined(__linux__) || defined(__sun__)
static int dynticks_start_timer(struct qemu_alarm_timer *t)
@@ -908,7 +1277,7 @@ static void dynticks_rearm_timer(struct qemu_alarm_timer *t)
!active_timers[QEMU_CLOCK_HOST])
return;
- nearest_delta_ns = qemu_next_alarm_deadline();
+ nearest_delta_ns = qemu_next_alarm_deadline(NULL);
if (nearest_delta_ns < MIN_TIMER_REARM_NS)
nearest_delta_ns = MIN_TIMER_REARM_NS;
diff --git a/qemu-timer.h b/qemu-timer.h
index 8cd8f83..e2da6d5 100644
--- a/qemu-timer.h
+++ b/qemu-timer.h
@@ -87,6 +87,9 @@ extern int use_rt_clock;
static inline int64_t get_clock(void)
{
+#if defined(__sun__)
+ return gethrtime();
+#else
#if defined(__linux__) || (defined(__FreeBSD__) && __FreeBSD_version >= 500000) \
|| defined(__DragonFly__) || defined(__FreeBSD_kernel__)
if (use_rt_clock) {
@@ -100,6 +103,7 @@ static inline int64_t get_clock(void)
changes, so it should be avoided. */
return get_clock_realtime();
}
+#endif
}
#endif
diff --git a/trace-events b/trace-events
index e6138ea..b424944 100644
--- a/trace-events
+++ b/trace-events
@@ -38,6 +38,15 @@ disable qemu_memalign(size_t alignment, size_t size, void *ptr) "alignment %zu s
disable qemu_vmalloc(size_t size, void *ptr) "size %zu ptr %p"
disable qemu_vfree(void *ptr) "ptr %p"
+# qemu-timer.c
+disable qemu_mod_timer(void *ptr, int64_t expire, int64_t interval) "ptr %p expire %d interval %d"
+disable qemu_run_timer(void *ptr, int64_t expire, int64_t current_time) "ptr %p expire %d current_time %d"
+disable multiticks_assign(void *ptr, int source) "ptr %p source %d"
+disable multiticks_reap(int source, int64_t prog, int64_t interval) "source %d prog %d intrval %d"
+disable multiticks_program(void *ptr, int64_t expire, int64_t interval) "ptr %p expire %d interval %d"
+disable multiticks_inband(void *ptr, int64_t low, int64_t high, int64_t fire) "ptr %p low %d high %d fire %d"
+disable multiticks_outofband(void *ptr, int64_t low, int64_t high, int64_t fire) "ptr %p low %d high %d fire %d"
+
# hw/virtio.c
disable virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u"
disable virtqueue_flush(void *vq, unsigned int count) "vq %p count %u"