summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBryan Cantrill <bryan@joyent.com>2011-10-07 01:20:14 +0000
committerBryan Cantrill <bryan@joyent.com>2011-10-07 01:20:14 +0000
commit7bdfeb90593194a6d242831c9e085e8798c84702 (patch)
tree08c43183aadc0721bed33b95a356b0ecf5e6d38f
parent96f81f9c842fffd0feae765e8ad755f00dfdfe4a (diff)
downloadillumos-kvm-cmd-7bdfeb90593194a6d242831c9e085e8798c84702.tar.gz
HVM-671 qemu cannot start due to a failing mlock()
-rw-r--r--exec.c68
1 files changed, 67 insertions, 1 deletions
diff --git a/exec.c b/exec.c
index 598f9d1..24e5aed 100644
--- a/exec.c
+++ b/exec.c
@@ -2828,6 +2828,72 @@ static ram_addr_t last_ram_offset(void)
return last;
}
+#ifdef CONFIG_SOLARIS
+static int
+qemu_mlock(caddr_t base, ram_addr_t size)
+{
+ ram_addr_t ps = qemu_real_host_page_size, nbytes, locked = 0;
+ ram_addr_t remaining = size / ps;
+ ram_addr_t step = remaining;
+ timespec_t tv;
+ hrtime_t waiting = 0, threshold;
+
+ tv.tv_sec = 0;
+ tv.tv_nsec = NANOSEC / MILLISEC;
+ threshold = 10 * (hrtime_t)NANOSEC;
+
+ /*
+ * We cannot lock memory with a single call to mlock() because it
+ * won't result in sustained memory pressure: if there is a
+ * substantial amount of kernel memory in use electively (e.g., for
+ * the ARC) a single call to mlock() may fail where sustained memory
+ * pressure would succeed. We therefore start by trying to lock the
+ * entire region, adjusting our size down as we fail with EAGAIN; once
+ * we successfully lock a portion of the region, we advance to the
+ * unlocked portion of the region (if any remains) and increase the
+ * size. Note that this will continue to hoard memory until it locks
+ * what it needs -- it won't give up. To help debug situations in
+ * which one has mistakenly overprovisioned, we emit a message every
+ * ten seconds with no forward progress.
+ */
+ while (remaining) {
+ if (step > remaining)
+ step = remaining;
+
+ while (mlock(base, (nbytes = step * ps)) == -1) {
+ if (errno != EAGAIN)
+ return (-1);
+
+ if (waiting == 0)
+ waiting = gethrtime();
+
+ if (step > 1) {
+ step >>= 1;
+ continue;
+ }
+
+ (void) nanosleep(&tv, NULL);
+
+ if (gethrtime() - waiting > threshold) {
+ (void) fprintf(stderr, "qemu_mlock: have only "
+ "locked %ld of %ld bytes; still "
+ "trying...\n", locked, size);
+ waiting = 0;
+ }
+ }
+
+ waiting = 0;
+ base += nbytes;
+ locked += nbytes;
+ remaining -= step;
+
+ step <<= 1;
+ }
+
+ return (0);
+}
+#endif
+
ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
ram_addr_t size, void *host)
{
@@ -2883,7 +2949,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
* revisited if we implement mmu notifiers in the kernel.
* Note also that pages are touched in kvm_set_user_memory_region.
*/
- if (mlock(new_block->host, size) != 0) {
+ if (qemu_mlock(new_block->host, size) != 0) {
fprintf(stderr, "qemu_ram_alloc: Could not lock %ld memory, errno = %d\n",
size, errno);
exit(1);