15 files changed, 923 insertions, 78 deletions
diff --git a/usr/src/uts/common/os/bio.c b/usr/src/uts/common/os/bio.c
index 0db01f80d7..c3d04e5508 100644
--- a/usr/src/uts/common/os/bio.c
+++ b/usr/src/uts/common/os/bio.c
@@ -21,6 +21,7 @@
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc.  All rights reserved.
  */
 
 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
@@ -1320,6 +1321,9 @@ pageio_setup(struct page *pp, size_t len, struct vnode *vp, int flags)
 		cpup = CPU;	/* get pointer AFTER preemption is disabled */
 		CPU_STATS_ADDQ(cpup, vm, pgin, 1);
 		CPU_STATS_ADDQ(cpup, vm, pgpgin, btopr(len));
+
+		atomic_add_64(&curzone->zone_pgpgin, btopr(len));
+
 		if ((flags & B_ASYNC) == 0) {
 			klwp_t *lwp = ttolwp(curthread);
 			if (lwp != NULL)
@@ -1336,13 +1340,19 @@ pageio_setup(struct page *pp, size_t len, struct vnode *vp, int flags)
 		if (pp != NULL && pp->p_vnode != NULL) {
 			if (IS_SWAPFSVP(pp->p_vnode)) {
 				CPU_STATS_ADDQ(cpup, vm, anonpgin, btopr(len));
+				atomic_add_64(&curzone->zone_anonpgin,
+				    btopr(len));
 			} else {
 				if (pp->p_vnode->v_flag & VVMEXEC) {
 					CPU_STATS_ADDQ(cpup, vm, execpgin,
 					    btopr(len));
+					atomic_add_64(&curzone->zone_execpgin,
+					    btopr(len));
 				} else {
 					CPU_STATS_ADDQ(cpup, vm, fspgin,
 					    btopr(len));
+					atomic_add_64(&curzone->zone_fspgin,
+					    btopr(len));
 				}
 			}
 		}
diff --git a/usr/src/uts/common/os/clock.c b/usr/src/uts/common/os/clock.c
index 451c9db48c..3f4dd63c82 100644
--- a/usr/src/uts/common/os/clock.c
+++ b/usr/src/uts/common/os/clock.c
@@ -23,6 +23,7 @@
 
 /*
  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
  */
 
 #include <sys/param.h>
@@ -66,6 +67,7 @@
 #include <sys/ddi_timer.h>
 #include <sys/random.h>
 #include <sys/modctl.h>
+#include <sys/zone.h>
 
 /*
  * for NTP support
@@ -1158,6 +1160,10 @@ loadavg_update()
 
 	} while ((cpupart = cpupart->cp_next) != cp_list_head);
 
+	/*
+	 * Third pass totals up per-zone statistics.
+	 */
+	zone_loadavg_update();
 }
 
 /*
diff --git a/usr/src/uts/common/os/clock_highres.c b/usr/src/uts/common/os/clock_highres.c
index e097f355ec..7870617a26 100644
--- a/usr/src/uts/common/os/clock_highres.c
+++ b/usr/src/uts/common/os/clock_highres.c
@@ -24,7 +24,9 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
+/*
+ * Copyright (c) 2012, Joyent Inc. All rights reserved.
+ */
 
 #include <sys/timer.h>
 #include <sys/systm.h>
@@ -112,6 +114,25 @@ clock_highres_timer_settime(itimer_t *it, int flags,
 	cyctime.cyt_when = ts2hrt(&when->it_value);
 	cyctime.cyt_interval = ts2hrt(&when->it_interval);
 
+	if (cyctime.cyt_when != 0 && cyctime.cyt_interval == 0 &&
+	    it->it_itime.it_interval.tv_sec == 0 &&
+	    it->it_itime.it_interval.tv_nsec == 0 &&
+	    (cyc = *cycp) != CYCLIC_NONE) {
+		/*
+		 * If our existing timer is a one-shot and our new timer is a
+		 * one-shot, we'll save ourselves a world of grief and just
+		 * reprogram the cyclic.
+		 */
+		it->it_itime = *when;
+
+		if (!(flags & TIMER_ABSTIME))
+			cyctime.cyt_when += gethrtime();
+
+		hrt2ts(cyctime.cyt_when, &it->it_itime.it_value);
+		(void) cyclic_reprogram(cyc, cyctime.cyt_when);
+		return (0);
+	}
+
 	mutex_enter(&cpu_lock);
 	if ((cyc = *cycp) != CYCLIC_NONE) {
 		cyclic_remove(cyc);
@@ -162,17 +183,14 @@ clock_highres_timer_settime(itimer_t *it, int flags,
 
 	if (cyctime.cyt_interval == 0) {
 		/*
-		 * If this is a one-shot, then we set the interval to assure
-		 * that the cyclic will next fire INT64_MAX nanoseconds after
-		 * boot (which corresponds to over 292 years -- yes, Buck Rogers
-		 * may have his 292-year-uptime-Solaris box malfunction).  If
-		 * this timer is never touched, this cyclic will simply
-		 * consume space in the cyclic subsystem.  As soon as
+		 * If this is a one-shot, then we set the interval to be
+		 * inifinite.  If this timer is never touched, this cyclic will
+		 * simply consume space in the cyclic subsystem.  As soon as
 		 * timer_settime() or timer_delete() is called, the cyclic is
 		 * removed (so it's not possible to run the machine out
 		 * of resources by creating one-shots).
 		 */
-		cyctime.cyt_interval = INT64_MAX - cyctime.cyt_when;
+		cyctime.cyt_interval = CY_INFINITY;
 	}
 
 	it->it_itime = *when;
@@ -185,8 +203,6 @@ clock_highres_timer_settime(itimer_t *it, int flags,
 
 	if (cyctime.cyt_when != 0)
 		*cycp = cyc = cyclic_add(&hdlr, &cyctime);
-	else
-		*cycp = cyc = CYCLIC_NONE;
 
 	/*
 	 * Now that we have the cyclic created, we need to bind it to our
diff --git a/usr/src/uts/common/os/contract.c b/usr/src/uts/common/os/contract.c
index a292f4e14f..ebaa6bfe41 100644
--- a/usr/src/uts/common/os/contract.c
+++ b/usr/src/uts/common/os/contract.c
@@ -497,7 +497,7 @@ contract_abandon(contract_t *ct, proc_t *p, int explicit)
 	contract_t *parent = &p->p_ct_process->conp_contract;
 	int inherit = 0;
 
-	ASSERT(p == curproc);
+	VERIFY(p == curproc);
 
 	mutex_enter(&ct->ct_lock);
 
@@ -547,7 +547,7 @@ contract_abandon(contract_t *ct, proc_t *p, int explicit)
 
 	if (inherit) {
 		ct->ct_state = CTS_INHERITED;
-		ASSERT(ct->ct_regent == parent);
+		VERIFY(ct->ct_regent == parent);
 		contract_process_take(parent, ct);
 
 		/*
@@ -2063,8 +2063,8 @@ cte_copy(ct_equeue_t *q, ct_equeue_t *newq)
 {
 	ct_kevent_t *e, *first = NULL;
 
-	ASSERT(q->ctq_listno == CTEL_CONTRACT);
-	ASSERT(newq->ctq_listno == CTEL_PBUNDLE);
+	VERIFY(q->ctq_listno == CTEL_CONTRACT);
+	VERIFY(newq->ctq_listno == CTEL_PBUNDLE);
 
 	mutex_enter(&q->ctq_lock);
 	mutex_enter(&newq->ctq_lock);
@@ -2077,8 +2077,16 @@ cte_copy(ct_equeue_t *q, ct_equeue_t *newq)
 		if ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
 			if (first == NULL)
 				first = e;
-			list_insert_tail(&newq->ctq_events, e);
-			cte_hold(e);
+			/*
+			 * It is possible for adoption to race with an owner's
+			 * cte_publish_all(); we must only enqueue events that
+			 * have not already been enqueued.
+			 */
+			if (!list_link_active((list_node_t *)
+			    ((uintptr_t)e + newq->ctq_events.list_offset))) {
+				list_insert_tail(&newq->ctq_events, e);
+				cte_hold(e);
+			}
 		}
 	}
 
@@ -2117,7 +2125,7 @@ cte_trim(ct_equeue_t *q, contract_t *ct)
 	int flags, stopper;
 	int start = 1;
 
-	ASSERT(MUTEX_HELD(&q->ctq_lock));
+	VERIFY(MUTEX_HELD(&q->ctq_lock));
 
 	for (e = list_head(&q->ctq_events); e != NULL; e = next) {
 		next = list_next(&q->ctq_events, e);
@@ -2227,13 +2235,24 @@ cte_queue_drain(ct_equeue_t *q, int ack)
  * cte_publish_all.
  */
 static void
-cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp)
+cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp, boolean_t mayexist)
 {
 	ASSERT(MUTEX_HELD(&q->ctq_lock));
 
 	q->ctq_atime = *tsp;
 
 	/*
+	 * If this event may already exist on this queue, check to see if it
+	 * is already there and return if so.
+	 */
+	if (mayexist && list_link_active((list_node_t *)((uintptr_t)e +
+	    q->ctq_events.list_offset))) {
+		mutex_exit(&q->ctq_lock);
+		cte_rele(e);
+		return;
+	}
+
+	/*
 	 * Don't publish if the event is informative and there aren't
 	 * any listeners, or if the queue has been shut down.
 	 */
@@ -2247,6 +2266,8 @@ cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp)
 	/*
 	 * Enqueue event
 	 */
+	VERIFY(!list_link_active((list_node_t *)
+	    ((uintptr_t)e + q->ctq_events.list_offset)));
 	list_insert_tail(&q->ctq_events, e);
 
 	/*
@@ -2318,14 +2339,14 @@ cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata)
 			ct->ct_evcnt++;
 	}
 	mutex_exit(&ct->ct_lock);
-	cte_publish(&ct->ct_events, e, &ts);
+	cte_publish(&ct->ct_events, e, &ts, B_FALSE);
 
 	/*
 	 * CTEL_BUNDLE - Next deliver to the contract type's bundle
 	 * queue.
 	 */
 	mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
-	cte_publish(&ct->ct_type->ct_type_events, e, &ts);
+	cte_publish(&ct->ct_type->ct_type_events, e, &ts, B_FALSE);
 
 	/*
 	 * CTEL_PBUNDLE - Finally, if the contract has an owner,
@@ -2342,7 +2363,14 @@ cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata)
 		q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
 		mutex_enter(&q->ctq_lock);
 		mutex_exit(&ct->ct_lock);
-		cte_publish(q, e, &ts);
+
+		/*
+		 * It is possible for this code to race with adoption; we
+		 * publish the event indicating that the event may already
+		 * be enqueued because adoption beat us to it (in which case
+		 * cte_pubish() does nothing).
+		 */
+		cte_publish(q, e, &ts, B_TRUE);
 	} else {
 		mutex_exit(&ct->ct_lock);
 		cte_rele(e);
diff --git a/usr/src/uts/common/os/core.c b/usr/src/uts/common/os/core.c
index 9e04f631a9..3b3935a772 100644
--- a/usr/src/uts/common/os/core.c
+++ b/usr/src/uts/common/os/core.c
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
  */
 
 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
@@ -534,6 +535,10 @@ expand_string(const char *pat, char *fp, int size, cred_t *cr)
 		case 'z':
 			s = p->p_zone->zone_name;
 			break;
+		case 'Z':
+			/* This is zonepath + "/root/", except for GZ */
+			s = p->p_zone->zone_rootpath;
+			break;
 		case '%':
 			(void) strcpy((s = buf), "%");
 			break;
@@ -548,6 +553,9 @@ expand_string(const char *pat, char *fp, int size, cred_t *cr)
 		if ((size -= len) <= 0)
 			return (ENAMETOOLONG);
 		(void) strcpy(fp, s);
+		/* strip trailing "/root/" from non-GZ zonepath string */
+		if (c == 'Z' && len > 6)
+			len -= 6;
 		fp += len;
 	}
 
diff --git a/usr/src/uts/common/os/cred.c b/usr/src/uts/common/os/cred.c
index 1ec63249ab..20e57efaad 100644
--- a/usr/src/uts/common/os/cred.c
+++ b/usr/src/uts/common/os/cred.c
@@ -724,6 +724,14 @@ crgetzoneid(const cred_t *cr)
 	    cr->cr_zone->zone_id);
 }
 
+zoneid_t
+crgetzonedid(const cred_t *cr)
+{
+	return (cr->cr_zone == NULL ?
+	    (cr->cr_uid == -1 ? (zoneid_t)-1 : GLOBAL_ZONEID) :
+	    cr->cr_zone->zone_did);
+}
+
 projid_t
 crgetprojid(const cred_t *cr)
 {
diff --git a/usr/src/uts/common/os/cyclic.c b/usr/src/uts/common/os/cyclic.c
index 1bb6baf445..93a318d260 100644
--- a/usr/src/uts/common/os/cyclic.c
+++ b/usr/src/uts/common/os/cyclic.c
@@ -24,6 +24,10 @@
  */
 
 /*
+ * Copyright (c) 2012, Joyent Inc. All rights reserved.
+ */
+
+/*
  *  The Cyclic Subsystem
  *  --------------------
  *
@@ -1139,7 +1143,7 @@ top:
 	CYC_TRACE(cpu, level, "softint-top", cyclics, pc);
 
 	while (consndx != pc->cypc_prodndx) {
-		int pend, npend, opend;
+		uint32_t pend, npend, opend;
 		int consmasked = consndx & sizemask;
 		cyclic_t *cyclic = &cyclics[buf[consmasked]];
 		cyc_func_t handler = cyclic->cy_handler;
diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c
index b97a09454b..7c5b8323e3 100644
--- a/usr/src/uts/common/os/exit.c
+++ b/usr/src/uts/common/os/exit.c
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
  */
 
 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
@@ -388,10 +389,16 @@ proc_exit(int why, int what)
 	if (p->p_pid == z->zone_proc_initpid) {
 		if (z->zone_boot_err == 0 &&
 		    zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
-		    zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN &&
-		    z->zone_restart_init == B_TRUE &&
-		    restart_init(what, why) == 0)
-			return (0);
+		    zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {
+			if (z->zone_restart_init == B_TRUE) {
+				if (restart_init(what, why) == 0)
+					return (0);
+			} else {
+				(void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
+				    CRED());
+			}
+		}
+
 		/*
 		 * Since we didn't or couldn't restart init, we clear
 		 * the zone's init state and proceed with exit
diff --git a/usr/src/uts/common/os/kstat_fr.c b/usr/src/uts/common/os/kstat_fr.c
index 83b817e866..a5f5a6f3c2 100644
--- a/usr/src/uts/common/os/kstat_fr.c
+++ b/usr/src/uts/common/os/kstat_fr.c
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 /*
@@ -160,6 +161,7 @@ struct {
 	kstat_named_t avenrun_5min;
 	kstat_named_t avenrun_15min;
 	kstat_named_t boot_time;
+	kstat_named_t nsec_per_tick;
 } system_misc_kstat = {
 	{ "ncpus",		KSTAT_DATA_UINT32 },
 	{ "lbolt",		KSTAT_DATA_UINT32 },
@@ -171,6 +173,7 @@ struct {
 	{ "avenrun_5min",	KSTAT_DATA_UINT32 },
 	{ "avenrun_15min",	KSTAT_DATA_UINT32 },
 	{ "boot_time",		KSTAT_DATA_UINT32 },
+	{ "nsec_per_tick",	KSTAT_DATA_UINT32 },
 };
 
 struct {
@@ -803,7 +806,6 @@ system_misc_kstat_update(kstat_t *ksp, int rw)
 {
 	int myncpus = ncpus;
 	int *loadavgp = &avenrun[0];
-	int loadavg[LOADAVG_NSTATS];
 	time_t zone_boot_time;
 	clock_t zone_lbolt;
 	hrtime_t zone_hrtime;
@@ -820,17 +822,11 @@ system_misc_kstat_update(kstat_t *ksp, int rw)
 		 */
 		mutex_enter(&cpu_lock);
 		if (pool_pset_enabled()) {
-			psetid_t mypsid = zone_pset_get(curproc->p_zone);
-			int error;
-
 			myncpus = zone_ncpus_get(curproc->p_zone);
 			ASSERT(myncpus > 0);
-			error = cpupart_get_loadavg(mypsid, &loadavg[0],
-			    LOADAVG_NSTATS);
-			ASSERT(error == 0);
-			loadavgp = &loadavg[0];
 		}
 		mutex_exit(&cpu_lock);
+		loadavgp = &curproc->p_zone->zone_avenrun[0];
 	}
 
 	if (INGLOBALZONE(curproc)) {
@@ -838,9 +834,7 @@ system_misc_kstat_update(kstat_t *ksp, int rw)
 		zone_lbolt = ddi_get_lbolt();
 		zone_nproc = nproc;
 	} else {
-		struct timeval tvp;
-		hrt2tv(curproc->p_zone->zone_zsched->p_mstart, &tvp);
-		zone_boot_time = tvp.tv_sec;
+		zone_boot_time = curproc->p_zone->zone_boot_time;
 
 		zone_hrtime = gethrtime();
 		zone_lbolt = (clock_t)(NSEC_TO_TICK(zone_hrtime) -
@@ -861,6 +855,8 @@ system_misc_kstat_update(kstat_t *ksp, int rw)
 	system_misc_kstat.avenrun_15min.value.ui32	= (uint32_t)loadavgp[2];
 	system_misc_kstat.boot_time.value.ui32		= (uint32_t)
 	    zone_boot_time;
+	system_misc_kstat.nsec_per_tick.value.ui32	= (uint32_t)
+	    nsec_per_tick;
 	return (0);
 }
 
diff --git a/usr/src/uts/common/os/logsubr.c b/usr/src/uts/common/os/logsubr.c
index f5cebbf82e..63a89a2ce8 100644
--- a/usr/src/uts/common/os/logsubr.c
+++ b/usr/src/uts/common/os/logsubr.c
@@ -248,8 +248,7 @@ log_init(void)
 	 */
 	printf("\rSunOS Release %s Version %s %u-bit\n",
 	    utsname.release, utsname.version, NBBY * (uint_t)sizeof (void *));
-	printf("Copyright (c) 1983, 2010, Oracle and/or its affiliates. "
-	    "All rights reserved.\n");
+	printf("Copyright (c) 2010-2012, Joyent Inc. All rights reserved.\n");
 #ifdef DEBUG
 	printf("DEBUG enabled\n");
 #endif
diff --git a/usr/src/uts/common/os/msacct.c b/usr/src/uts/common/os/msacct.c
index df975eb7ee..c10dce81ca 100644
--- a/usr/src/uts/common/os/msacct.c
+++ b/usr/src/uts/common/os/msacct.c
@@ -33,6 +33,7 @@
 #include <sys/debug.h>
 #include <sys/msacct.h>
 #include <sys/time.h>
+#include <sys/zone.h>
 
 /*
  * Mega-theory block comment:
@@ -390,6 +391,7 @@ void
 syscall_mstate(int fromms, int toms)
 {
 	kthread_t *t = curthread;
+	zone_t *z = ttozone(t);
 	struct mstate *ms;
 	hrtime_t *mstimep;
 	hrtime_t curtime;
@@ -413,6 +415,10 @@ syscall_mstate(int fromms, int toms)
 		newtime = curtime - ms->ms_state_start;
 	}
 	*mstimep += newtime;
+	if (fromms == LMS_USER)
+		atomic_add_64(&z->zone_utime, newtime);
+	else if (fromms == LMS_SYSTEM)
+		atomic_add_64(&z->zone_stime, newtime);
 	t->t_mstate = toms;
 	ms->ms_state_start = curtime;
 	ms->ms_prev = fromms;
@@ -602,7 +608,10 @@ new_mstate(kthread_t *t, int new_state)
 	hrtime_t curtime;
 	hrtime_t newtime;
 	hrtime_t oldtime;
+	hrtime_t ztime;
+	hrtime_t origstart;
 	klwp_t *lwp;
+	zone_t *z;
 
 	ASSERT(new_state != LMS_WAIT_CPU);
 	ASSERT((unsigned)new_state < NMSTATES);
@@ -625,6 +634,7 @@ new_mstate(kthread_t *t, int new_state)
 
 	ms = &lwp->lwp_mstate;
 	state = t->t_mstate;
+	origstart = ms->ms_state_start;
 	do {
 		switch (state) {
 		case LMS_TFAULT:
@@ -637,7 +647,7 @@ new_mstate(kthread_t *t, int new_state)
 			mstimep = &ms->ms_acct[state];
 			break;
 		}
-		newtime = curtime - ms->ms_state_start;
+		ztime = newtime = curtime - ms->ms_state_start;
 		if (newtime < 0) {
 			curtime = gethrtime_unscaled();
 			oldtime = *mstimep - 1; /* force CAS to fail */
@@ -648,6 +658,20 @@ new_mstate(kthread_t *t, int new_state)
 		t->t_mstate = new_state;
 		ms->ms_state_start = curtime;
 	} while (cas64((uint64_t *)mstimep, oldtime, newtime) != oldtime);
+
+	/*
+	 * When the system boots the initial startup thread will have a
+	 * ms_state_start of 0 which would add a huge system time to the global
+	 * zone.  We want to skip aggregating that initial bit of work.
+	 */
+	if (origstart != 0) {
+		z = ttozone(t);
+		if (state == LMS_USER)
+			atomic_add_64(&z->zone_utime, ztime);
+		else if (state == LMS_SYSTEM)
+			atomic_add_64(&z->zone_stime, ztime);
+	}
+
 	/*
 	 * Remember the previous running microstate.
 	 */
@@ -686,6 +710,8 @@ restore_mstate(kthread_t *t)
 	hrtime_t waitrq;
 	hrtime_t newtime;
 	hrtime_t oldtime;
+	hrtime_t waittime;
+	zone_t *z;
 
 	/*
 	 * Don't call restore mstate of threads without lwps.  (Kernel threads)
@@ -756,11 +782,15 @@ restore_mstate(kthread_t *t)
 		oldtime = *mstimep;
 		newtime += oldtime;
 	} while (cas64((uint64_t *)mstimep, oldtime, newtime) != oldtime);
+
 	/*
 	 * Update the WAIT_CPU timer and per-cpu waitrq total.
 	 */
-	ms->ms_acct[LMS_WAIT_CPU] += (curtime - waitrq);
-	CPU->cpu_waitrq += (curtime - waitrq);
+	z = ttozone(t);
+	waittime = curtime - waitrq;
+	ms->ms_acct[LMS_WAIT_CPU] += waittime;
+	atomic_add_64(&z->zone_wtime, waittime);
+	CPU->cpu_waitrq += waittime;
 	ms->ms_state_start = curtime;
 }
 
diff --git a/usr/src/uts/common/os/policy.c b/usr/src/uts/common/os/policy.c
index 573ebbc367..d8f7882723 100644
--- a/usr/src/uts/common/os/policy.c
+++ b/usr/src/uts/common/os/policy.c
@@ -20,6 +20,7 @@
  */
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, Joyent, Inc. All rights reserved.
  */
 
 #include <sys/types.h>
@@ -2563,3 +2564,12 @@ secpolicy_ppp_config(const cred_t *cr)
 		return (secpolicy_net_config(cr, B_FALSE));
 	return (PRIV_POLICY(cr, PRIV_SYS_PPP_CONFIG, B_FALSE, EPERM, NULL));
 }
+
+int
+secpolicy_hyprlofs_control(const cred_t *cr)
+{
+	if (PRIV_POLICY(cr, PRIV_HYPRLOFS_CONTROL, B_FALSE, EPERM, NULL))
+		return (EPERM);
+	return (0);
+}
+
diff --git a/usr/src/uts/common/os/priv_defs b/usr/src/uts/common/os/priv_defs
index a5a918b326..53617bd0fe 100644
--- a/usr/src/uts/common/os/priv_defs
+++ b/usr/src/uts/common/os/priv_defs
@@ -176,6 +176,10 @@ privilege PRIV_GRAPHICS_MAP
 	Allows a process to perform privileged mappings through a
 	graphics device.
 
+privilege PRIV_HYPRLOFS_CONTROL
+
+	Allows a process to manage hyprlofs entries.
+
 privilege PRIV_IPC_DAC_READ
 
 	Allows a process to read a System V IPC
diff --git a/usr/src/uts/common/os/vmem.c b/usr/src/uts/common/os/vmem.c
index f0027d18e9..d444def5cc 100644
--- a/usr/src/uts/common/os/vmem.c
+++ b/usr/src/uts/common/os/vmem.c
@@ -1599,7 +1599,7 @@ vmem_destroy(vmem_t *vmp)
 
 	leaked = vmem_size(vmp, VMEM_ALLOC);
 	if (leaked != 0)
-		cmn_err(CE_WARN, "vmem_destroy('%s'): leaked %lu %s",
+		cmn_err(CE_WARN, "!vmem_destroy('%s'): leaked %lu %s",
 		    vmp->vm_name, leaked, (vmp->vm_cflags & VMC_IDENTIFIER) ?
 		    "identifiers" : "bytes");
 
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index 79ccd94ae4..4b9dc9fc93 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2012, Joyent Inc. All rights reserved.
  */
 
 /*
@@ -369,8 +370,12 @@ static char *zone_ref_subsys_names[] = {
 rctl_hndl_t rc_zone_cpu_shares;
 rctl_hndl_t rc_zone_locked_mem;
 rctl_hndl_t rc_zone_max_swap;
+rctl_hndl_t rc_zone_phys_mem;
 rctl_hndl_t rc_zone_max_lofi;
 rctl_hndl_t rc_zone_cpu_cap;
+rctl_hndl_t rc_zone_cpu_baseline;
+rctl_hndl_t rc_zone_cpu_burst_time;
+rctl_hndl_t rc_zone_zfs_io_pri;
 rctl_hndl_t rc_zone_nlwps;
 rctl_hndl_t rc_zone_nprocs;
 rctl_hndl_t rc_zone_shmmax;
@@ -423,8 +428,9 @@ static boolean_t zsd_wait_for_inprogress(zone_t *, struct zsd_entry *,
  * Version 5 alters the zone_boot system call, and converts its old
  *     bootargs parameter to be set by the zone_setattr API instead.
  * Version 6 adds the flag argument to zone_create.
+ * Version 7 adds the requested zoneid to zone_create.
  */
-static const int ZONE_SYSCALL_API_VERSION = 6;
+static const int ZONE_SYSCALL_API_VERSION = 7;
 
 /*
  * Certain filesystems (such as NFS and autofs) need to know which zone
@@ -1380,6 +1386,114 @@ static rctl_ops_t zone_cpu_cap_ops = {
 
 /*ARGSUSED*/
 static rctl_qty_t
+zone_cpu_base_get(rctl_t *rctl, struct proc *p)
+{
+	ASSERT(MUTEX_HELD(&p->p_lock));
+	return (cpucaps_zone_get_base(p->p_zone));
+}
+
+/*
+ * The zone cpu base is used to set the baseline CPU for the zone
+ * so we can track when the zone is bursting.
+ */
+/*ARGSUSED*/
+static int
+zone_cpu_base_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
+    rctl_qty_t nv)
+{
+	zone_t *zone = e->rcep_p.zone;
+
+	ASSERT(MUTEX_HELD(&p->p_lock));
+	ASSERT(e->rcep_t == RCENTITY_ZONE);
+
+	if (zone == NULL)
+		return (0);
+
+	return (cpucaps_zone_set_base(zone, nv));
+}
+
+static rctl_ops_t zone_cpu_base_ops = {
+	rcop_no_action,
+	zone_cpu_base_get,
+	zone_cpu_base_set,
+	rcop_no_test
+};
+
+/*ARGSUSED*/
+static rctl_qty_t
+zone_cpu_burst_time_get(rctl_t *rctl, struct proc *p)
+{
+	ASSERT(MUTEX_HELD(&p->p_lock));
+	return (cpucaps_zone_get_burst_time(p->p_zone));
+}
+
+/*
+ * The zone cpu burst time is used to set the amount of time CPU(s) can be
+ * bursting for the zone.
+ */
+/*ARGSUSED*/
+static int
+zone_cpu_burst_time_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
+    rctl_qty_t nv)
+{
+	zone_t *zone = e->rcep_p.zone;
+
+	ASSERT(MUTEX_HELD(&p->p_lock));
+	ASSERT(e->rcep_t == RCENTITY_ZONE);
+
+	if (zone == NULL)
+		return (0);
+
+	return (cpucaps_zone_set_burst_time(zone, nv));
+}
+
+static rctl_ops_t zone_cpu_burst_time_ops = {
+	rcop_no_action,
+	zone_cpu_burst_time_get,
+	zone_cpu_burst_time_set,
+	rcop_no_test
+};
+
+/*
+ * zone.zfs-io-pri resource control support (IO priority).
+ */
+/*ARGSUSED*/
+static rctl_qty_t
+zone_zfs_io_pri_get(rctl_t *rctl, struct proc *p)
+{
+	ASSERT(MUTEX_HELD(&p->p_lock));
+	return (p->p_zone->zone_zfs_io_pri);
+}
+
+/*ARGSUSED*/
+static int
+zone_zfs_io_pri_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
+    rctl_qty_t nv)
+{
+	zone_t *zone = e->rcep_p.zone;
+
+	ASSERT(MUTEX_HELD(&p->p_lock));
+	ASSERT(e->rcep_t == RCENTITY_ZONE);
+
+	if (zone == NULL)
+		return (0);
+
+	/*
+	 * set priority to the new value.
+	 */
+	zone->zone_zfs_io_pri = nv;
+	return (0);
+}
+
+static rctl_ops_t zone_zfs_io_pri_ops = {
+	rcop_no_action,
+	zone_zfs_io_pri_get,
+	zone_zfs_io_pri_set,
+	rcop_no_test
+};
+
+/*ARGSUSED*/
+static rctl_qty_t
 zone_lwps_usage(rctl_t *r, proc_t *p)
 {
 	rctl_qty_t nlwps;
@@ -1674,6 +1788,39 @@ static rctl_ops_t zone_max_swap_ops = {
 
 /*ARGSUSED*/
 static rctl_qty_t
+zone_phys_mem_usage(rctl_t *rctl, struct proc *p)
+{
+	rctl_qty_t q;
+	zone_t *z = p->p_zone;
+
+	ASSERT(MUTEX_HELD(&p->p_lock));
+	/* No additional lock because not enforced in the kernel */
+	q = z->zone_phys_mem;
+	return (q);
+}
+
+/*ARGSUSED*/
+static int
+zone_phys_mem_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
+    rctl_qty_t nv)
+{
+	ASSERT(MUTEX_HELD(&p->p_lock));
+	ASSERT(e->rcep_t == RCENTITY_ZONE);
+	if (e->rcep_p.zone == NULL)
+		return (0);
+	e->rcep_p.zone->zone_phys_mem_ctl = nv;
+	return (0);
+}
+
+static rctl_ops_t zone_phys_mem_ops = {
+	rcop_no_action,
+	zone_phys_mem_usage,
+	zone_phys_mem_set,
+	rcop_no_test
+};
+
+/*ARGSUSED*/
+static rctl_qty_t
 zone_max_lofi_usage(rctl_t *rctl, struct proc *p)
 {
 	rctl_qty_t q;
@@ -1767,6 +1914,20 @@ zone_lockedmem_kstat_update(kstat_t *ksp, int rw)
 }
 
 static int
+zone_physmem_kstat_update(kstat_t *ksp, int rw)
+{
+	zone_t *zone = ksp->ks_private;
+	zone_kstat_t *zk = ksp->ks_data;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	zk->zk_usage.value.ui64 = zone->zone_phys_mem;
+	zk->zk_value.value.ui64 = zone->zone_phys_mem_ctl;
+	return (0);
+}
+
+static int
 zone_nprocs_kstat_update(kstat_t *ksp, int rw)
 {
 	zone_t *zone = ksp->ks_private;
@@ -1795,7 +1956,7 @@ zone_swapresv_kstat_update(kstat_t *ksp, int rw)
 }
 
 static kstat_t *
-zone_kstat_create_common(zone_t *zone, char *name,
+zone_rctl_kstat_create_common(zone_t *zone, char *name,
     int (*updatefunc) (kstat_t *, int))
 {
 	kstat_t *ksp;
@@ -1820,26 +1981,350 @@ zone_kstat_create_common(zone_t *zone, char *name,
 	return (ksp);
 }
 
+static int
+zone_vfs_kstat_update(kstat_t *ksp, int rw)
+{
+	zone_t *zone = ksp->ks_private;
+	zone_vfs_kstat_t *zvp = ksp->ks_data;
+	kstat_io_t *kiop = &zone->zone_vfs_rwstats;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	/*
+	 * Extract the VFS statistics from the kstat_io_t structure used by
+	 * kstat_runq_enter() and related functions.  Since the slow ops
+	 * counters are updated directly by the VFS layer, there's no need to
+	 * copy those statistics here.
+	 *
+	 * Note that kstat_runq_enter() and the related functions use
+	 * gethrtime_unscaled(), so scale the time here.
+	 */
+	zvp->zv_nread.value.ui64 = kiop->nread;
+	zvp->zv_reads.value.ui64 = kiop->reads;
+	zvp->zv_rtime.value.ui64 = kiop->rtime;
+	zvp->zv_rlentime.value.ui64 = kiop->rlentime;
+	zvp->zv_nwritten.value.ui64 = kiop->nwritten;
+	zvp->zv_writes.value.ui64 = kiop->writes;
+	zvp->zv_wtime.value.ui64 = kiop->wtime;
+	zvp->zv_wlentime.value.ui64 = kiop->wlentime;
+
+	scalehrtime((hrtime_t *)&zvp->zv_rtime.value.ui64);
+	scalehrtime((hrtime_t *)&zvp->zv_rlentime.value.ui64);
+	scalehrtime((hrtime_t *)&zvp->zv_wtime.value.ui64);
+	scalehrtime((hrtime_t *)&zvp->zv_wlentime.value.ui64);
+
+	return (0);
+}
+
+static kstat_t *
+zone_vfs_kstat_create(zone_t *zone)
+{
+	kstat_t *ksp;
+	zone_vfs_kstat_t *zvp;
+
+	if ((ksp = kstat_create_zone("zone_vfs", zone->zone_id,
+	    zone->zone_name, "zone_vfs", KSTAT_TYPE_NAMED,
+	    sizeof (zone_vfs_kstat_t) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL)
+		return (NULL);
+
+	if (zone->zone_id != GLOBAL_ZONEID)
+		kstat_zone_add(ksp, GLOBAL_ZONEID);
+
+	zvp = ksp->ks_data = kmem_zalloc(sizeof (zone_vfs_kstat_t), KM_SLEEP);
+	ksp->ks_data_size += strlen(zone->zone_name) + 1;
+	ksp->ks_lock = &zone->zone_vfs_lock;
+	zone->zone_vfs_stats = zvp;
+
+	/* The kstat "name" field is not large enough for a full zonename */
+	kstat_named_init(&zvp->zv_zonename, "zonename", KSTAT_DATA_STRING);
+	kstat_named_setstr(&zvp->zv_zonename, zone->zone_name);
+	kstat_named_init(&zvp->zv_nread, "nread", KSTAT_DATA_UINT64);
+	kstat_named_init(&zvp->zv_reads, "reads", KSTAT_DATA_UINT64);
+	kstat_named_init(&zvp->zv_rtime, "rtime", KSTAT_DATA_UINT64);
+	kstat_named_init(&zvp->zv_rlentime, "rlentime", KSTAT_DATA_UINT64);
+	kstat_named_init(&zvp->zv_nwritten, "nwritten", KSTAT_DATA_UINT64);
+	kstat_named_init(&zvp->zv_writes, "writes", KSTAT_DATA_UINT64);
+	kstat_named_init(&zvp->zv_wtime, "wtime", KSTAT_DATA_UINT64);
+	kstat_named_init(&zvp->zv_wlentime, "wlentime", KSTAT_DATA_UINT64);
+	kstat_named_init(&zvp->zv_10ms_ops, "10ms_ops", KSTAT_DATA_UINT64);
+	kstat_named_init(&zvp->zv_100ms_ops, "100ms_ops", KSTAT_DATA_UINT64);
+	kstat_named_init(&zvp->zv_1s_ops, "1s_ops", KSTAT_DATA_UINT64);
+	kstat_named_init(&zvp->zv_10s_ops, "10s_ops", KSTAT_DATA_UINT64);
+	kstat_named_init(&zvp->zv_delay_cnt, "delay_cnt", KSTAT_DATA_UINT64);
+	kstat_named_init(&zvp->zv_delay_time, "delay_time", KSTAT_DATA_UINT64);
+
+	ksp->ks_update = zone_vfs_kstat_update;
+	ksp->ks_private = zone;
+
+	kstat_install(ksp);
+	return (ksp);
+}
+
+static int
+zone_zfs_kstat_update(kstat_t *ksp, int rw)
+{
+	zone_t *zone = ksp->ks_private;
+	zone_zfs_kstat_t *zzp = ksp->ks_data;
+	kstat_io_t *kiop = &zone->zone_zfs_rwstats;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	/*
+	 * Extract the ZFS statistics from the kstat_io_t structure used by
+	 * kstat_runq_enter() and related functions.  Since the I/O throttle
+	 * counters are updated directly by the ZFS layer, there's no need to
+	 * copy those statistics here.
+	 *
+	 * Note that kstat_runq_enter() and the related functions use
+	 * gethrtime_unscaled(), so scale the time here.
+	 */
+	zzp->zz_nread.value.ui64 = kiop->nread;
+	zzp->zz_reads.value.ui64 = kiop->reads;
+	zzp->zz_rtime.value.ui64 = kiop->rtime;
+	zzp->zz_rlentime.value.ui64 = kiop->rlentime;
+	zzp->zz_nwritten.value.ui64 = kiop->nwritten;
+	zzp->zz_writes.value.ui64 = kiop->writes;
+
+	scalehrtime((hrtime_t *)&zzp->zz_rtime.value.ui64);
+	scalehrtime((hrtime_t *)&zzp->zz_rlentime.value.ui64);
+
+	return (0);
+}
+
+static kstat_t *
+zone_zfs_kstat_create(zone_t *zone)
+{
+	kstat_t *ksp;
+	zone_zfs_kstat_t *zzp;
+
+	if ((ksp = kstat_create_zone("zone_zfs", zone->zone_id,
+	    zone->zone_name, "zone_zfs", KSTAT_TYPE_NAMED,
+	    sizeof (zone_zfs_kstat_t) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL)
+		return (NULL);
+
+	if (zone->zone_id != GLOBAL_ZONEID)
+		kstat_zone_add(ksp, GLOBAL_ZONEID);
+
+	zzp = ksp->ks_data = kmem_zalloc(sizeof (zone_zfs_kstat_t), KM_SLEEP);
+	ksp->ks_data_size += strlen(zone->zone_name) + 1;
+	ksp->ks_lock = &zone->zone_zfs_lock;
+	zone->zone_zfs_stats = zzp;
+
+	/* The kstat "name" field is not large enough for a full zonename */
+	kstat_named_init(&zzp->zz_zonename, "zonename", KSTAT_DATA_STRING);
+	kstat_named_setstr(&zzp->zz_zonename, zone->zone_name);
+	kstat_named_init(&zzp->zz_nread, "nread", KSTAT_DATA_UINT64);
+	kstat_named_init(&zzp->zz_reads, "reads", KSTAT_DATA_UINT64);
+	kstat_named_init(&zzp->zz_rtime, "rtime", KSTAT_DATA_UINT64);
+	kstat_named_init(&zzp->zz_rlentime, "rlentime", KSTAT_DATA_UINT64);
+	kstat_named_init(&zzp->zz_nwritten, "nwritten", KSTAT_DATA_UINT64);
+	kstat_named_init(&zzp->zz_writes, "writes", KSTAT_DATA_UINT64);
+	kstat_named_init(&zzp->zz_waittime, "waittime", KSTAT_DATA_UINT64);
+
+	ksp->ks_update = zone_zfs_kstat_update;
+	ksp->ks_private = zone;
+
+	kstat_install(ksp);
+	return (ksp);
+}
+
+static int
+zone_mcap_kstat_update(kstat_t *ksp, int rw)
+{
+	zone_t *zone = ksp->ks_private;
+	zone_mcap_kstat_t *zmp = ksp->ks_data;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	zmp->zm_rss.value.ui64 = zone->zone_phys_mem;
+	zmp->zm_phys_cap.value.ui64 = zone->zone_phys_mem_ctl;
+	zmp->zm_swap.value.ui64 = zone->zone_max_swap;
+	zmp->zm_swap_cap.value.ui64 = zone->zone_max_swap_ctl;
+	zmp->zm_nover.value.ui64 = zone->zone_mcap_nover;
+	zmp->zm_pagedout.value.ui64 = zone->zone_mcap_pagedout;
+	zmp->zm_pgpgin.value.ui64 = zone->zone_pgpgin;
+	zmp->zm_anonpgin.value.ui64 = zone->zone_anonpgin;
+	zmp->zm_execpgin.value.ui64 = zone->zone_execpgin;
+	zmp->zm_fspgin.value.ui64 = zone->zone_fspgin;
+	zmp->zm_anon_alloc_fail.value.ui64 = zone->zone_anon_alloc_fail;
+	zmp->zm_pf_throttle.value.ui64 = zone->zone_pf_throttle;
+	zmp->zm_pf_throttle_usec.value.ui64 = zone->zone_pf_throttle_usec;
+
+	return (0);
+}
+
+static kstat_t *
+zone_mcap_kstat_create(zone_t *zone)
+{
+	kstat_t *ksp;
+	zone_mcap_kstat_t *zmp;
+
+	if ((ksp = kstat_create_zone("memory_cap", zone->zone_id,
+	    zone->zone_name, "zone_memory_cap", KSTAT_TYPE_NAMED,
+	    sizeof (zone_mcap_kstat_t) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL)
+		return (NULL);
+
+	if (zone->zone_id != GLOBAL_ZONEID)
+		kstat_zone_add(ksp, GLOBAL_ZONEID);
+
+	zmp = ksp->ks_data = kmem_zalloc(sizeof (zone_mcap_kstat_t), KM_SLEEP);
+	ksp->ks_data_size += strlen(zone->zone_name) + 1;
+	ksp->ks_lock = &zone->zone_mcap_lock;
+	zone->zone_mcap_stats = zmp;
+
+	/* The kstat "name" field is not large enough for a full zonename */
+	kstat_named_init(&zmp->zm_zonename, "zonename", KSTAT_DATA_STRING);
+	kstat_named_setstr(&zmp->zm_zonename, zone->zone_name);
+	kstat_named_init(&zmp->zm_rss, "rss", KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_phys_cap, "physcap", KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_swap, "swap", KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_swap_cap, "swapcap", KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_nover, "nover", KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_pagedout, "pagedout", KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_pgpgin, "pgpgin", KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_anonpgin, "anonpgin", KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_execpgin, "execpgin", KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_fspgin, "fspgin", KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_anon_alloc_fail, "anon_alloc_fail",
+	    KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_pf_throttle, "n_pf_throttle",
+	    KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_pf_throttle_usec, "n_pf_throttle_usec",
+	    KSTAT_DATA_UINT64);
+
+	ksp->ks_update = zone_mcap_kstat_update;
+	ksp->ks_private = zone;
+
+	kstat_install(ksp);
+	return (ksp);
+}
+
+static int
+zone_misc_kstat_update(kstat_t *ksp, int rw)
+{
+	zone_t *zone = ksp->ks_private;
+	zone_misc_kstat_t *zmp = ksp->ks_data;
+	hrtime_t tmp;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	tmp = zone->zone_utime;
+	scalehrtime(&tmp);
+	zmp->zm_utime.value.ui64 = tmp;
+	tmp = zone->zone_stime;
+	scalehrtime(&tmp);
+	zmp->zm_stime.value.ui64 = tmp;
+	tmp = zone->zone_wtime;
+	scalehrtime(&tmp);
+	zmp->zm_wtime.value.ui64 = tmp;
+
+	zmp->zm_avenrun1.value.ui32 = zone->zone_avenrun[0];
+	zmp->zm_avenrun5.value.ui32 = zone->zone_avenrun[1];
+	zmp->zm_avenrun15.value.ui32 = zone->zone_avenrun[2];
+
+	zmp->zm_run_ticks.value.ui64 = zone->zone_run_ticks;
+	zmp->zm_run_wait.value.ui64 = zone->zone_runq_cntr;
+	zmp->zm_fss_shr_pct.value.ui64 = zone->zone_fss_shr_pct;
+	zmp->zm_fss_pri_hi.value.ui64 = zone->zone_fss_pri_hi;
+	zmp->zm_fss_pri_avg.value.ui64 = zone->zone_fss_pri_avg;
+
+	return (0);
+}
+
+static kstat_t *
+zone_misc_kstat_create(zone_t *zone)
+{
+	kstat_t *ksp;
+	zone_misc_kstat_t *zmp;
+
+	if ((ksp = kstat_create_zone("zones", zone->zone_id,
+	    zone->zone_name, "zone_misc", KSTAT_TYPE_NAMED,
+	    sizeof (zone_misc_kstat_t) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL)
+		return (NULL);
+
+	if (zone->zone_id != GLOBAL_ZONEID)
+		kstat_zone_add(ksp, GLOBAL_ZONEID);
+
+	zmp = ksp->ks_data = kmem_zalloc(sizeof (zone_misc_kstat_t), KM_SLEEP);
+	ksp->ks_data_size += strlen(zone->zone_name) + 1;
+	ksp->ks_lock = &zone->zone_misc_lock;
+	zone->zone_misc_stats = zmp;
+
+	/* The kstat "name" field is not large enough for a full zonename */
+	kstat_named_init(&zmp->zm_zonename, "zonename", KSTAT_DATA_STRING);
+	kstat_named_setstr(&zmp->zm_zonename, zone->zone_name);
+	kstat_named_init(&zmp->zm_utime, "nsec_user", KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_stime, "nsec_sys", KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_wtime, "nsec_waitrq", KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_avenrun1, "avenrun_1min", KSTAT_DATA_UINT32);
+	kstat_named_init(&zmp->zm_avenrun5, "avenrun_5min", KSTAT_DATA_UINT32);
+	kstat_named_init(&zmp->zm_avenrun15, "avenrun_15min",
+	    KSTAT_DATA_UINT32);
+	kstat_named_init(&zmp->zm_run_ticks, "run_ticks", KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_run_wait, "run_queue", KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_fss_shr_pct, "fss_share_percent",
+	    KSTAT_DATA_UINT32);
+	kstat_named_init(&zmp->zm_fss_pri_hi, "fss_pri_hi", KSTAT_DATA_UINT64);
+	kstat_named_init(&zmp->zm_fss_pri_avg, "fss_pri_avg",
+	    KSTAT_DATA_UINT64);
+
+	ksp->ks_update = zone_misc_kstat_update;
+	ksp->ks_private = zone;
+
+	kstat_install(ksp);
+	return (ksp);
+}
+
 static void
 zone_kstat_create(zone_t *zone)
 {
-	zone->zone_lockedmem_kstat = zone_kstat_create_common(zone,
+	zone->zone_lockedmem_kstat = zone_rctl_kstat_create_common(zone,
 	    "lockedmem", zone_lockedmem_kstat_update);
-	zone->zone_swapresv_kstat = zone_kstat_create_common(zone,
+	zone->zone_swapresv_kstat = zone_rctl_kstat_create_common(zone,
 	    "swapresv", zone_swapresv_kstat_update);
-	zone->zone_nprocs_kstat = zone_kstat_create_common(zone,
+	zone->zone_physmem_kstat = zone_rctl_kstat_create_common(zone,
+	    "physicalmem", zone_physmem_kstat_update);
+	zone->zone_nprocs_kstat = zone_rctl_kstat_create_common(zone,
 	    "nprocs", zone_nprocs_kstat_update);
+
+	if ((zone->zone_vfs_ksp = zone_vfs_kstat_create(zone)) == NULL) {
+		zone->zone_vfs_stats = kmem_zalloc(
+		    sizeof (zone_vfs_kstat_t), KM_SLEEP);
+	}
+
+	if ((zone->zone_zfs_ksp = zone_zfs_kstat_create(zone)) == NULL) {
+		zone->zone_zfs_stats = kmem_zalloc(
+		    sizeof (zone_zfs_kstat_t), KM_SLEEP);
+	}
+
+	if ((zone->zone_mcap_ksp = zone_mcap_kstat_create(zone)) == NULL) {
+		zone->zone_mcap_stats = kmem_zalloc(
+		    sizeof (zone_mcap_kstat_t), KM_SLEEP);
+	}
+
+	if ((zone->zone_misc_ksp = zone_misc_kstat_create(zone)) == NULL) {
+		zone->zone_misc_stats = kmem_zalloc(
+		    sizeof (zone_misc_kstat_t), KM_SLEEP);
+	}
 }
 
 static void
-zone_kstat_delete_common(kstat_t **pkstat)
+zone_kstat_delete_common(kstat_t **pkstat, size_t datasz)
 {
 	void *data;
 
 	if (*pkstat != NULL) {
 		data = (*pkstat)->ks_data;
 		kstat_delete(*pkstat);
-		kmem_free(data, sizeof (zone_kstat_t));
+		kmem_free(data, datasz);
 		*pkstat = NULL;
 	}
 }
@@ -1847,9 +2332,23 @@ zone_kstat_delete_common(kstat_t **pkstat)
 static void
 zone_kstat_delete(zone_t *zone)
 {
-	zone_kstat_delete_common(&zone->zone_lockedmem_kstat);
-	zone_kstat_delete_common(&zone->zone_swapresv_kstat);
-	zone_kstat_delete_common(&zone->zone_nprocs_kstat);
+	zone_kstat_delete_common(&zone->zone_lockedmem_kstat,
+	    sizeof (zone_kstat_t));
+	zone_kstat_delete_common(&zone->zone_swapresv_kstat,
+	    sizeof (zone_kstat_t));
+	zone_kstat_delete_common(&zone->zone_physmem_kstat,
+	    sizeof (zone_kstat_t));
+	zone_kstat_delete_common(&zone->zone_nprocs_kstat,
+	    sizeof (zone_kstat_t));
+
+	zone_kstat_delete_common(&zone->zone_vfs_ksp,
+	    sizeof (zone_vfs_kstat_t));
+	zone_kstat_delete_common(&zone->zone_zfs_ksp,
+	    sizeof (zone_zfs_kstat_t));
+	zone_kstat_delete_common(&zone->zone_mcap_ksp,
+	    sizeof (zone_mcap_kstat_t));
+	zone_kstat_delete_common(&zone->zone_misc_ksp,
+	    sizeof (zone_misc_kstat_t));
 }
 
 /*
@@ -1883,6 +2382,8 @@ zone_zsd_init(void)
 	zone0.zone_locked_mem_ctl = UINT64_MAX;
 	ASSERT(zone0.zone_max_swap == 0);
 	zone0.zone_max_swap_ctl = UINT64_MAX;
+	zone0.zone_phys_mem = 0;
+	zone0.zone_phys_mem_ctl = UINT64_MAX;
 	zone0.zone_max_lofi = 0;
 	zone0.zone_max_lofi_ctl = UINT64_MAX;
 	zone0.zone_shmmax = 0;
@@ -1906,7 +2407,13 @@ zone_zsd_init(void)
 	zone0.zone_initname = initname;
 	zone0.zone_lockedmem_kstat = NULL;
 	zone0.zone_swapresv_kstat = NULL;
+	zone0.zone_physmem_kstat = NULL;
 	zone0.zone_nprocs_kstat = NULL;
+	zone0.zone_zfs_io_pri = 1;
+	zone0.zone_stime = 0;
+	zone0.zone_utime = 0;
+	zone0.zone_wtime = 0;
+
 	list_create(&zone0.zone_ref_list, sizeof (zone_ref_t),
 	    offsetof(zone_ref_t, zref_linkage));
 	list_create(&zone0.zone_zsd, sizeof (struct zsd_entry),
@@ -2013,6 +2520,21 @@ zone_init(void)
 	    RCTL_GLOBAL_INFINITE,
 	    MAXCAP, MAXCAP, &zone_cpu_cap_ops);
 
+	rc_zone_cpu_baseline = rctl_register("zone.cpu-baseline",
+	    RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
+	    RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
+	    MAXCAP, MAXCAP, &zone_cpu_base_ops);
+
+	rc_zone_cpu_burst_time = rctl_register("zone.cpu-burst-time",
+	    RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
+	    RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
+	    INT_MAX, INT_MAX, &zone_cpu_burst_time_ops);
+
+	rc_zone_zfs_io_pri = rctl_register("zone.zfs-io-priority",
+	    RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
+	    RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
+	    1024, 1024, &zone_zfs_io_pri_ops);
+
 	rc_zone_nlwps = rctl_register("zone.max-lwps", RCENTITY_ZONE,
 	    RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT,
 	    INT_MAX, INT_MAX, &zone_lwps_ops);
@@ -2054,6 +2576,20 @@ zone_init(void)
 	rde = rctl_dict_lookup("zone.cpu-shares");
 	(void) rctl_val_list_insert(&rde->rcd_default_value, dval);
 
+	/*
+	 * Create a rctl_val with PRIVILEGED, NOACTION, value = 1.  Then attach
+	 * this at the head of the rctl_dict_entry for ``zone.zfs-io-priority'.
+	 */
+	dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
+	bzero(dval, sizeof (rctl_val_t));
+	dval->rcv_value = 1;
+	dval->rcv_privilege = RCPRIV_PRIVILEGED;
+	dval->rcv_flagaction = RCTL_LOCAL_NOACTION;
+	dval->rcv_action_recip_pid = -1;
+
+	rde = rctl_dict_lookup("zone.zfs-io-priority");
+	(void) rctl_val_list_insert(&rde->rcd_default_value, dval);
+
 	rc_zone_locked_mem = rctl_register("zone.max-locked-memory",
 	    RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
 	    RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
@@ -2064,6 +2600,11 @@ zone_init(void)
 	    RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
 	    &zone_max_swap_ops);
 
+	rc_zone_phys_mem = rctl_register("zone.max-physical-memory",
+	    RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
+	    RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
+	    &zone_phys_mem_ops);
+
 	rc_zone_max_lofi = rctl_register("zone.max-lofi",
 	    RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT |
 	    RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
@@ -2375,14 +2916,65 @@ zone_set_initname(zone_t *zone, const char *zone_initname)
 	return (0);
 }
 
+/*
+ * The zone_set_mcap_nover and zone_set_mcap_pageout functions are used
+ * to provide the physical memory capping kstats.  Since physical memory
+ * capping is currently implemented in userland, that code uses the setattr
+ * entry point to increment the kstats.  We always simply increment nover
+ * every time that setattr is called and we always add in the input value
+ * to zone_mcap_pagedout every time that is called.
+ */
+/*ARGSUSED*/
 static int
-zone_set_phys_mcap(zone_t *zone, const uint64_t *zone_mcap)
+zone_set_mcap_nover(zone_t *zone, const uint64_t *zone_nover)
 {
-	uint64_t mcap;
-	int err = 0;
+	zone->zone_mcap_nover++;
+
+	return (0);
+}
+
+static int
+zone_set_mcap_pageout(zone_t *zone, const uint64_t *zone_pageout)
+{
+	uint64_t pageout;
+	int err;
+
+	if ((err = copyin(zone_pageout, &pageout, sizeof (uint64_t))) == 0)
+		zone->zone_mcap_pagedout += pageout;
 
-	if ((err = copyin(zone_mcap, &mcap, sizeof (uint64_t))) == 0)
-		zone->zone_phys_mcap = mcap;
+	return (err);
+}
+
+/*
+ * The zone_set_page_fault_delay function is used to set the number of usecs
+ * to throttle page faults.  This is normally 0 but can be set to a non-0 value
+ * by the user-land memory capping code when the zone is over its physcial
+ * memory cap.
+ */
+static int
+zone_set_page_fault_delay(zone_t *zone, const uint32_t *pfdelay)
+{
+	uint32_t dusec;
+	int err;
+
+	if ((err = copyin(pfdelay, &dusec, sizeof (uint32_t))) == 0)
+		zone->zone_pg_flt_delay = dusec;
+
+	return (err);
+}
+
+/*
+ * The zone_set_rss function is used to set the zone's RSS when we do the
+ * fast, approximate calculation in user-land.
+ */
+static int
+zone_set_rss(zone_t *zone, const uint64_t *prss)
+{
+	uint64_t rss;
+	int err;
+
+	if ((err = copyin(prss, &rss, sizeof (uint64_t))) == 0)
+		zone->zone_phys_mem = rss;
 
 	return (err);
 }
@@ -2794,6 +3386,12 @@ getzoneid(void)
 	return (curproc->p_zone->zone_id);
 }
 
+zoneid_t
+getzonedid(void)
+{
+	return (curproc->p_zone->zone_did);
+}
+
 /*
  * Internal versions of zone_find_by_*().  These don't zone_hold() or
  * check the validity of a zone's state.
@@ -2977,6 +3575,92 @@ zone_find_by_path(const char *path)
 }
 
 /*
+ * Public interface for updating per-zone load averages.  Called once per
+ * second.
+ *
+ * Based on loadavg_update(), genloadavg() and calcloadavg() from clock.c.
+ */
+void
+zone_loadavg_update()
+{
+	zone_t *zp;
+	zone_status_t status;
+	struct loadavg_s *lavg;
+	hrtime_t zone_total;
+	int i;
+	hrtime_t hr_avg;
+	int nrun;
+	static int64_t f[3] = { 135, 27, 9 };
+	int64_t q, r;
+
+	mutex_enter(&zonehash_lock);
+	for (zp = list_head(&zone_active); zp != NULL;
+	    zp = list_next(&zone_active, zp)) {
+		mutex_enter(&zp->zone_lock);
+
+		/* Skip zones that are on the way down or not yet up */
+		status = zone_status_get(zp);
+		if (status < ZONE_IS_READY || status >= ZONE_IS_DOWN) {
+			/* For all practical purposes the zone doesn't exist. */
+			mutex_exit(&zp->zone_lock);
+			continue;
+		}
+
+		/*
+		 * Update the 10 second moving average data in zone_loadavg.
+		 */
+		lavg = &zp->zone_loadavg;
+
+		zone_total = zp->zone_utime + zp->zone_stime + zp->zone_wtime;
+		scalehrtime(&zone_total);
+
+		/* The zone_total should always be increasing. */
+		lavg->lg_loads[lavg->lg_cur] = (zone_total > lavg->lg_total) ?
+		    zone_total - lavg->lg_total : 0;
+		lavg->lg_cur = (lavg->lg_cur + 1) % S_LOADAVG_SZ;
+		/* lg_total holds the prev. 1 sec. total */
+		lavg->lg_total = zone_total;
+
+		/*
+		 * To simplify the calculation, we don't calculate the load avg.
+		 * until the zone has been up for at least 10 seconds and our
+		 * moving average is thus full.
+		 */
+		if ((lavg->lg_len + 1) < S_LOADAVG_SZ) {
+			lavg->lg_len++;
+			mutex_exit(&zp->zone_lock);
+			continue;
+		}
+
+		/* Now calculate the 1min, 5min, 15 min load avg. */
+		hr_avg = 0;
+		for (i = 0; i < S_LOADAVG_SZ; i++)
+			hr_avg += lavg->lg_loads[i];
+		hr_avg = hr_avg / S_LOADAVG_SZ;
+		nrun = hr_avg / (NANOSEC / LGRP_LOADAVG_IN_THREAD_MAX);
+
+		/* Compute load avg. See comment in calcloadavg() */
+		for (i = 0; i < 3; i++) {
+			q = (zp->zone_hp_avenrun[i] >> 16) << 7;
+			r = (zp->zone_hp_avenrun[i] & 0xffff) << 7;
+			zp->zone_hp_avenrun[i] +=
+			    ((nrun - q) * f[i] - ((r * f[i]) >> 16)) >> 4;
+
+			/* avenrun[] can only hold 31 bits of load avg. */
+			if (zp->zone_hp_avenrun[i] <
+			    ((uint64_t)1<<(31+16-FSHIFT)))
+				zp->zone_avenrun[i] = (int32_t)
+				    (zp->zone_hp_avenrun[i] >> (16 - FSHIFT));
+			else
+				zp->zone_avenrun[i] = 0x7fffffff;
+		}
+
+		mutex_exit(&zp->zone_lock);
+	}
+	mutex_exit(&zonehash_lock);
+}
+
+/*
  * Get the number of cpus visible to this zone.  The system-wide global
  * 'ncpus' is returned if pools are disabled, the caller is in the
  * global zone, or a NULL zone argument is passed in.
@@ -3789,7 +4473,10 @@ zsched(void *arg)
 			mutex_enter(&zone_status_lock);
 			zone_status_set(zone, ZONE_IS_SHUTTING_DOWN);
 			mutex_exit(&zone_status_lock);
+		} else {
+			zone->zone_boot_time = gethrestime_sec();
 		}
+
 		pool_unlock();
 	}
 
@@ -4081,7 +4768,7 @@ zone_create(const char *zone_name, const char *zone_root,
     caddr_t rctlbuf, size_t rctlbufsz,
     caddr_t zfsbuf, size_t zfsbufsz, int *extended_error,
     int match, uint32_t doi, const bslabel_t *label,
-    int flags)
+    int flags, zoneid_t zone_did)
 {
 	struct zsched_arg zarg;
 	nvlist_t *rctls = NULL;
@@ -4104,6 +4791,7 @@ zone_create(const char *zone_name, const char *zone_root,
 
 	zone = kmem_zalloc(sizeof (zone_t), KM_SLEEP);
 	zoneid = zone->zone_id = id_alloc(zoneid_space);
+	zone->zone_did = zone_did;
 	zone->zone_status = ZONE_IS_UNINITIALIZED;
 	zone->zone_pool = pool_default;
 	zone->zone_pool_mod = gethrtime();
@@ -4172,10 +4860,14 @@ zone_create(const char *zone_name, const char *zone_root,
 	zone->zone_locked_mem_ctl = UINT64_MAX;
 	zone->zone_max_swap = 0;
 	zone->zone_max_swap_ctl = UINT64_MAX;
+	zone->zone_phys_mem = 0;
+	zone->zone_phys_mem_ctl = UINT64_MAX;
 	zone->zone_max_lofi = 0;
 	zone->zone_max_lofi_ctl = UINT64_MAX;
-	zone0.zone_lockedmem_kstat = NULL;
-	zone0.zone_swapresv_kstat = NULL;
+	zone->zone_lockedmem_kstat = NULL;
+	zone->zone_swapresv_kstat = NULL;
+	zone->zone_physmem_kstat = NULL;
+	zone->zone_zfs_io_pri = 1;
 
 	/*
 	 * Zsched initializes the rctls.
@@ -4474,6 +5166,7 @@ zone_boot(zoneid_t zoneid)
 static int
 zone_empty(zone_t *zone)
 {
+	int cnt = 0;
 	int waitstatus;
 
 	/*
@@ -4484,7 +5177,16 @@ zone_empty(zone_t *zone)
 	ASSERT(MUTEX_NOT_HELD(&zonehash_lock));
 	while ((waitstatus = zone_status_timedwait_sig(zone,
 	    ddi_get_lbolt() + hz, ZONE_IS_EMPTY)) == -1) {
-		killall(zone->zone_id);
+		boolean_t force = B_FALSE;
+
+		/* Every 30 seconds, try harder */
+		if (cnt++ >= 30) {
+			cmn_err(CE_WARN, "attempt to force kill zone %d\n",
+			    zone->zone_id);
+			force = B_TRUE;
+			cnt = 0;
+		}
+		killall(zone->zone_id, force);
 	}
 	/*
 	 * return EINTR if we were signaled
@@ -5222,14 +5924,6 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
 				error = EFAULT;
 		}
 		break;
-	case ZONE_ATTR_PHYS_MCAP:
-		size = sizeof (zone->zone_phys_mcap);
-		if (bufsize > size)
-			bufsize = size;
-		if (buf != NULL &&
-		    copyout(&zone->zone_phys_mcap, buf, bufsize) != 0)
-			error = EFAULT;
-		break;
 	case ZONE_ATTR_SCHED_CLASS:
 		mutex_enter(&class_lock);
 
@@ -5284,6 +5978,14 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
 		}
 		kmem_free(zbuf, bufsize);
 		break;
+	case ZONE_ATTR_DID:
+		size = sizeof (zoneid_t);
+		if (bufsize > size)
+			bufsize = size;
+
+		if (buf != NULL && copyout(&zone->zone_did, buf, bufsize) != 0)
+			error = EFAULT;
+		break;
 	default:
 		if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone)) {
 			size = bufsize;
@@ -5315,10 +6017,11 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
 		return (set_errno(EPERM));
 
 	/*
-	 * Only the ZONE_ATTR_PHYS_MCAP attribute can be set on the
-	 * global zone.
+	 * Only the ZONE_ATTR_PMCAP_NOVER and ZONE_ATTR_PMCAP_PAGEOUT
+	 * attributes can be set on the global zone.
 	 */
-	if (zoneid == GLOBAL_ZONEID && attr != ZONE_ATTR_PHYS_MCAP) {
+	if (zoneid == GLOBAL_ZONEID &&
+	    attr != ZONE_ATTR_PMCAP_NOVER && attr != ZONE_ATTR_PMCAP_PAGEOUT) {
 		return (set_errno(EINVAL));
 	}
 
@@ -5335,7 +6038,9 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
 	 * non-global zones.
 	 */
 	zone_status = zone_status_get(zone);
-	if (attr != ZONE_ATTR_PHYS_MCAP && zone_status > ZONE_IS_READY) {
+	if (attr != ZONE_ATTR_PMCAP_NOVER && attr != ZONE_ATTR_PMCAP_PAGEOUT &&
+	    attr != ZONE_ATTR_PG_FLT_DELAY && attr != ZONE_ATTR_RSS &&
+	    zone_status > ZONE_IS_READY) {
 		err = EINVAL;
 		goto done;
 	}
@@ -5344,6 +6049,10 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
 	case ZONE_ATTR_INITNAME:
 		err = zone_set_initname(zone, (const char *)buf);
 		break;
+	case ZONE_ATTR_INITNORESTART:
+		zone->zone_restart_init = B_FALSE;
+		err = 0;
+		break;
 	case ZONE_ATTR_BOOTARGS:
 		err = zone_set_bootargs(zone, (const char *)buf);
 		break;
@@ -5353,8 +6062,17 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
 	case ZONE_ATTR_FS_ALLOWED:
 		err = zone_set_fs_allowed(zone, (const char *)buf);
 		break;
-	case ZONE_ATTR_PHYS_MCAP:
-		err = zone_set_phys_mcap(zone, (const uint64_t *)buf);
+	case ZONE_ATTR_PMCAP_NOVER:
+		err = zone_set_mcap_nover(zone, (const uint64_t *)buf);
+		break;
+	case ZONE_ATTR_PMCAP_PAGEOUT:
+		err = zone_set_mcap_pageout(zone, (const uint64_t *)buf);
+		break;
+	case ZONE_ATTR_PG_FLT_DELAY:
+		err = zone_set_page_fault_delay(zone, (const uint32_t *)buf);
+		break;
+	case ZONE_ATTR_RSS:
+		err = zone_set_rss(zone, (const uint64_t *)buf);
 		break;
 	case ZONE_ATTR_SCHED_CLASS:
 		err = zone_set_sched_class(zone, (const char *)buf);
@@ -6075,6 +6793,7 @@ zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4)
 			zs.doi = zs32.doi;
 			zs.label = (const bslabel_t *)(uintptr_t)zs32.label;
 			zs.flags = zs32.flags;
+			zs.zoneid = zs32.zoneid;
 #else
 			panic("get_udatamodel() returned bogus result\n");
 #endif
@@ -6085,7 +6804,7 @@ zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4)
 		    (caddr_t)zs.rctlbuf, zs.rctlbufsz,
 		    (caddr_t)zs.zfsbuf, zs.zfsbufsz,
 		    zs.extended_error, zs.match, zs.doi,
-		    zs.label, zs.flags));
+		    zs.label, zs.flags, zs.zoneid));
 	case ZONE_BOOT:
 		return (zone_boot((zoneid_t)(uintptr_t)arg1));
 	case ZONE_DESTROY:
@@ -6363,7 +7082,7 @@ zone_kadmin(int cmd, int fcn, const char *mdep, cred_t *credp)
 	 * zone_ki_call_zoneadmd() will do a more thorough job of this
 	 * later.
 	 */
-	killall(zone->zone_id);
+	killall(zone->zone_id, B_FALSE);
 	/*
 	 * Now, create the thread to contact zoneadmd and do the rest of the
 	 * work.  This thread can't be created in our zone otherwise