85 files changed, 8180 insertions, 872 deletions
diff --git a/usr/src/cmd/prstat/prstat.c b/usr/src/cmd/prstat/prstat.c
index 743990ad2a..5a4b9185ea 100644
--- a/usr/src/cmd/prstat/prstat.c
+++ b/usr/src/cmd/prstat/prstat.c
@@ -31,6 +31,7 @@
 #include <sys/loadavg.h>
 #include <sys/time.h>
 #include <sys/pset.h>
+#include <sys/vm_usage.h>
 #include <zone.h>
 #include <libzonecfg.h>
 
@@ -86,21 +87,21 @@
 #define	USAGE_HEADER_LWP \
 "   PID USERNAME USR SYS TRP TFL DFL LCK SLP LAT VCX ICX SCL SIG PROCESS/LWPID "
 #define	USER_HEADER_PROC \
-" NPROC USERNAME  SIZE   RSS MEMORY      TIME  CPU                             "
+" NPROC USERNAME  SWAP   RSS MEMORY      TIME  CPU                             "
 #define	USER_HEADER_LWP \
-"  NLWP USERNAME  SIZE   RSS MEMORY      TIME  CPU                             "
+"  NLWP USERNAME  SWAP   RSS MEMORY      TIME  CPU                             "
 #define	TASK_HEADER_PROC \
-"TASKID    NPROC  SIZE   RSS MEMORY      TIME  CPU PROJECT                     "
+"TASKID    NPROC  SWAP   RSS MEMORY      TIME  CPU PROJECT                     "
 #define	TASK_HEADER_LWP \
-"TASKID     NLWP  SIZE   RSS MEMORY      TIME  CPU PROJECT                     "
+"TASKID     NLWP  SWAP   RSS MEMORY      TIME  CPU PROJECT                     "
 #define	PROJECT_HEADER_PROC \
-"PROJID    NPROC  SIZE   RSS MEMORY      TIME  CPU PROJECT                     "
+"PROJID    NPROC  SWAP   RSS MEMORY      TIME  CPU PROJECT                     "
 #define	PROJECT_HEADER_LWP \
-"PROJID     NLWP  SIZE   RSS MEMORY      TIME  CPU PROJECT                     "
+"PROJID     NLWP  SWAP   RSS MEMORY      TIME  CPU PROJECT                     "
 #define	ZONE_HEADER_PROC \
-"ZONEID    NPROC  SIZE   RSS MEMORY      TIME  CPU ZONE                        "
+"ZONEID    NPROC  SWAP   RSS MEMORY      TIME  CPU ZONE                        "
 #define	ZONE_HEADER_LWP \
-"ZONEID     NLWP  SIZE   RSS MEMORY      TIME  CPU ZONE                        "
+"ZONEID     NLWP  SWAP   RSS MEMORY      TIME  CPU ZONE                        "
 #define	PSINFO_LINE \
 "%6d %-8s %5s %5s %-6s %3s  %3s %9s %3.3s%% %-.16s/%d"
 #define	PSINFO_LINE_LGRP \
@@ -160,6 +161,8 @@ static volatile uint_t sigwinch = 0;
 static volatile uint_t sigtstp = 0;
 static volatile uint_t sigterm = 0;
 
+static long pagesize;
+
 /* default settings */
 
 static optdesc_t opts = {
@@ -185,6 +188,129 @@ psetloadavg(long psetid, void *ptr)
 }
 
 /*
+ * Queries the memory virtual and rss size for each member of a list.
+ * This will override the values computed by /proc aggregation.
+ */
+static void
+list_getsize(list_t *list)
+{
+	id_info_t *id;
+	vmusage_t *results, *next;
+	vmusage_t *match;
+	size_t nres = 0;
+	size_t i;
+	uint_t flags = 0;
+	int ret;
+	size_t physmem = sysconf(_SC_PHYS_PAGES) * pagesize;
+
+	/*
+	 * Determine what swap/rss results to calculate.  getvmusage() will
+	 * prune results returned to non-global zones automatically, so
+	 * there is no need to pass different flags when calling from a
+	 * non-global zone.
+	 *
+	 * Currently list_getsize() is only called with a single flag.  This
+	 * is because -Z, -J, -T, and -a are mutually exclusive.  Regardless
+	 * of this, we handle multiple flags.
+	 */
+	if (opts.o_outpmode & OPT_USERS) {
+		/*
+		 * Gather rss for all users in all zones.  Treat the same
+		 * uid in different zones as the same user.
+		 */
+		flags |= VMUSAGE_COL_RUSERS;
+
+	} else if (opts.o_outpmode & OPT_TASKS) {
+		/* Gather rss for all tasks in all zones */
+		flags |= VMUSAGE_ALL_TASKS;
+
+	} else if (opts.o_outpmode & OPT_PROJECTS) {
+		/*
+		 * Gather rss for all projects in all zones.  Treat the same
+		 * projid in diffrent zones as the same project.
+		 */
+		flags |= VMUSAGE_COL_PROJECTS;
+
+	} else if (opts.o_outpmode & OPT_ZONES) {
+		/* Gather rss for all zones */
+		flags |= VMUSAGE_ALL_ZONES;
+
+	} else {
+		Die(gettext(
+		    "Cannot determine rss flags for output options %x\n"),
+		    opts.o_outpmode);
+	}
+
+	/*
+	 * getvmusage() returns an array of result structures.  One for
+	 * each zone, project, task, or user on the system, depending on
+	 * flags.
+	 *
+	 * If getvmusage() fails, prstat will use the size already gathered
+	 * from psinfo
+	 */
+	if (getvmusage(flags, opts.o_interval, NULL, &nres) != 0)
+		return;
+
+	results = (vmusage_t *)Malloc(sizeof (vmusage_t) * nres);
+	for (;;) {
+		ret = getvmusage(flags, opts.o_interval, results, &nres);
+		if (ret == 0)
+			break;
+		if (errno == EOVERFLOW) {
+			results = (vmusage_t *)Realloc(results,
+			    sizeof (vmusage_t) * nres);
+			continue;
+		}
+		/*
+		 * Failure for some other reason.  Prstat will use the size
+		 * already gathered from psinfo.
+		 */
+		return;
+	}
+	for (id = list->l_head; id != NULL; id = id->id_next) {
+
+		match = NULL;
+		next = results;
+		for (i = 0; i < nres; i++, next++) {
+			switch (flags) {
+			case VMUSAGE_COL_RUSERS:
+				if (next->vmu_id == id->id_uid)
+					match = next;
+				break;
+			case VMUSAGE_ALL_TASKS:
+				if (next->vmu_id == id->id_taskid)
+					match = next;
+				break;
+			case VMUSAGE_COL_PROJECTS:
+				if (next->vmu_id == id->id_projid)
+					match = next;
+				break;
+			case VMUSAGE_ALL_ZONES:
+				if (next->vmu_id == id->id_zoneid)
+					match = next;
+				break;
+			default:
+				Die(gettext(
+				    "Unknown vmusage flags %d\n"), flags);
+			}
+		}
+		if (match != NULL) {
+			id->id_size = match->vmu_swap_all / 1024;
+			id->id_rssize = match->vmu_rss_all / 1024;
+			id->id_pctmem = (100.0 * (float)match->vmu_rss_all) /
+			    (float)physmem;
+			/* Output using data from getvmusage() */
+			id->id_sizematch = B_TRUE;
+		}
+		/*
+		 * If no match is found, prstat will use the size already
+		 * gathered from psinfo.
+		 */
+	}
+}
+
+/*
  * A routine to display the contents of the list on the screen
  */
 static void
@@ -282,7 +408,7 @@ list_print(list_t *list)
 				cpu = (100 * id->id_pctcpu) / total_cpu;
 			else
 				cpu = id->id_pctcpu;
-			if (total_mem >= 100)
+			if (id->id_sizematch == B_FALSE && total_mem >= 100)
 				mem = (100 * id->id_pctmem) / total_mem;
 			else
 				mem = id->id_pctmem;
@@ -566,6 +692,7 @@ update:
 	id->id_zoneid	= lwp->li_info.pr_zoneid;
 	id->id_lgroup	= lwp->li_info.pr_lwp.pr_lgrp;
 	id->id_nproc++;
+	id->id_sizematch = B_FALSE;
 	if (lwp->li_flags & LWP_REPRESENT) {
 		id->id_size	= lwp->li_info.pr_size;
 		id->id_rssize	= lwp->li_info.pr_rssize;
@@ -1175,6 +1302,7 @@ Exit()
 	fd_exit();
 }
 
+
 int
 main(int argc, char **argv)
 {
@@ -1192,6 +1320,8 @@ main(int argc, char **argv)
 	lwpid_init();
 	fd_init(Setrlimit());
 
+	pagesize = sysconf(_SC_PAGESIZE);
+
 	while ((opt = getopt(argc, argv, "vcHmaRLtu:U:n:p:C:P:h:s:S:j:k:TJz:Z"))
 	    != (int)EOF) {
 		switch (opt) {
@@ -1419,21 +1549,25 @@ main(int argc, char **argv)
 			list_print(&lwps);
 		}
 		if (opts.o_outpmode & OPT_USERS) {
+			list_getsize(&users);
 			list_sort(&users);
 			list_print(&users);
 			list_clear(&users);
 		}
 		if (opts.o_outpmode & OPT_TASKS) {
+			list_getsize(&tasks);
 			list_sort(&tasks);
 			list_print(&tasks);
 			list_clear(&tasks);
 		}
 		if (opts.o_outpmode & OPT_PROJECTS) {
+			list_getsize(&projects);
 			list_sort(&projects);
 			list_print(&projects);
 			list_clear(&projects);
 		}
 		if (opts.o_outpmode & OPT_ZONES) {
+			list_getsize(&zones);
 			list_sort(&zones);
 			list_print(&zones);
 			list_clear(&zones);
diff --git a/usr/src/cmd/prstat/prstat.h b/usr/src/cmd/prstat/prstat.h
index 1a13329845..d130164e7d 100644
--- a/usr/src/cmd/prstat/prstat.h
+++ b/usr/src/cmd/prstat/prstat.h
@@ -122,6 +122,7 @@ typedef struct id_info {
 	zoneid_t	id_zoneid;	/* zone id */
 	int		id_lgroup;	/* lgroup id */
 	uint_t		id_nproc;	/* number of processes */
+	boolean_t	id_sizematch;	/* size/rssize from getvmusage() */
 	size_t		id_size;	/* memory usage */
 	size_t		id_rssize;	/* resident set size */
 	ulong_t		id_time;	/* cpu time (in secs) */
diff --git a/usr/src/cmd/rcap/common/rcapd.h b/usr/src/cmd/rcap/common/rcapd.h
index 89cf5f3d81..7a554c213b 100644
--- a/usr/src/cmd/rcap/common/rcapd.h
+++ b/usr/src/cmd/rcap/common/rcapd.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -58,7 +57,21 @@ extern "C" {
 #define	LCST_CAP_REMOVED		(1<<1)
 #define	LCST_CAP_ZERO			(1<<2)
 
-typedef int64_t rcid_t;
+typedef enum {
+	RCIDT_PROJECT,
+	RCIDT_ZONE
+} rcid_type_t;
+
+typedef struct {
+	/*
+	 * The following field could just be a rcid_type_t but it gets
+	 * written out to a file as binary data for communication between
+	 * 64-bit rcapd & 32-bit rcapstat, so we need to force a standard size
+	 * and alignment here.
+	 */
+	uint64_t	rcid_type;
+	int64_t		rcid_val;
+} rcid_t;
 
 typedef enum {
 	LCU_COMPLETE,	/* an enumeration of all possible collections */
@@ -138,7 +151,6 @@ typedef struct lcollection {
 	uint64_t lcol_rss;		/* RSS of all processes (kB) */
 	uint64_t lcol_image_size;	/* image size of all processes (kB) */
 	uint64_t lcol_rss_cap;		/* RSS cap (kB) */
-	int lcol_stat_invalidate;	/* flag to reset interval statistics */
 	lcollection_stat_t lcol_stat;	/* statistics */
 	lcollection_stat_t lcol_stat_old; /* previous interval's statistics */
 	lprocess_t *lcol_lprocess;	/* member processes */
@@ -162,12 +174,11 @@ typedef struct lcollection_report {
 
 extern int get_psinfo(pid_t, struct psinfo *, int, int(*)(void *, int), void *,
     lprocess_t *);
-extern lcollection_t *lcollection_find(id_t);
+extern lcollection_t *lcollection_find(rcid_t *);
 extern void lcollection_freq_move(lprocess_t *);
-extern lcollection_t *lcollection_insert_update(rcid_t, uint64_t, char *,
+extern lcollection_t *lcollection_insert_update(rcid_t *, uint64_t, char *,
     int *changes);
 extern int lcollection_member(lcollection_t *, lprocess_t *);
-extern void lcollection_set_type(rctype_t);
 extern void lcollection_free(lcollection_t *);
 extern void lcollection_update(lcollection_update_type_t);
 extern void list_walk_collection(int (*)(lcollection_t *, void *), void *);
@@ -178,12 +189,6 @@ extern void scan_abort(void);
 extern void check_update_statistics(void);
 
 /*
- * The collection-specific function determining the collection ID from a
- * process' psinfo.
- */
-extern rcid_t(*rc_getidbypsinfo)(struct psinfo *);
-
-/*
  * Global (in rcapd only) variables.
  */
 extern rcfg_t rcfg;
diff --git a/usr/src/cmd/rcap/common/rcapd_stat.h b/usr/src/cmd/rcap/common/rcapd_stat.h
index c34ceb36e2..fa769ba643 100644
--- a/usr/src/cmd/rcap/common/rcapd_stat.h
+++ b/usr/src/cmd/rcap/common/rcapd_stat.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -44,7 +43,10 @@ extern "C" {
  */
 #define	RC_MODE_LEN	16
 typedef struct rcapd_stat_hdr {
-	pid_t		rs_pid;			/* pid of producer */
+	/*
+	 * sizeof pid_t can vary, so we use a fixed 64-bit quantity.
+	 */
+	uint64_t	rs_pid;			/* pid of producer */
 	hrtime_t	rs_time;		/* time recorded */
 
 	/*
diff --git a/usr/src/cmd/rcap/common/utils.c b/usr/src/cmd/rcap/common/utils.c
index f9757a12f6..c01f568915 100644
--- a/usr/src/cmd/rcap/common/utils.c
+++ b/usr/src/cmd/rcap/common/utils.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -260,3 +259,77 @@ xatoi(char *p)
 		return (i);
 	}
 }
+
+/*
+ * get_running_zones() calls zone_list(2) to find out how many zones are
+ * running.  It then calls zone_list(2) again to fetch the list of running
+ * zones (stored in *zents).
+ */
+int
+get_running_zones(uint_t *nzents, zone_entry_t **zents)
+{
+	zoneid_t *zids;
+	uint_t nzents_saved;
+	int i;
+	zone_entry_t *zentp;
+	zone_state_t zstate;
+
+	*zents = NULL;
+	if (zone_list(NULL, nzents) != 0) {
+		warn(gettext("could not get zoneid list\n"));
+		return (E_ERROR);
+	}
+
+again:
+	if (*nzents == 0)
+		return (E_SUCCESS);
+
+	if ((zids = (zoneid_t *)calloc(*nzents, sizeof (zoneid_t))) == NULL) {
+		warn(gettext("out of memory: zones will not be capped\n"));
+		return (E_ERROR);
+	}
+
+	nzents_saved = *nzents;
+
+	if (zone_list(zids, nzents) != 0) {
+		warn(gettext("could not get zone list\n"));
+		free(zids);
+		return (E_ERROR);
+	}
+	if (*nzents != nzents_saved) {
+		/* list changed, try again */
+		free(zids);
+		goto again;
+	}
+
+	*zents = calloc(*nzents, sizeof (zone_entry_t));
+	if (*zents == NULL) {
+		warn(gettext("out of memory: zones will not be capped\n"));
+		free(zids);
+		return (E_ERROR);
+	}
+
+	zentp = *zents;
+	for (i = 0; i < *nzents; i++) {
+		char name[ZONENAME_MAX];
+
+		if (getzonenamebyid(zids[i], name, sizeof (name)) < 0) {
+			warn(gettext("could not get name for "
+			    "zoneid %d\n"), zids[i]);
+			continue;
+		}
+
+		(void) strlcpy(zentp->zname, name, sizeof (zentp->zname));
+		zentp->zid = zids[i];
+		if (zone_get_state(name, &zstate) != Z_OK ||
+		    zstate != ZONE_STATE_RUNNING)
+			continue;
+
+
+		zentp++;
+	}
+	*nzents = zentp - *zents;
+
+	free(zids);
+	return (E_SUCCESS);
+}
diff --git a/usr/src/cmd/rcap/common/utils.h b/usr/src/cmd/rcap/common/utils.h
index 678dee51ab..f952d59bbb 100644
--- a/usr/src/cmd/rcap/common/utils.h
+++ b/usr/src/cmd/rcap/common/utils.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -33,6 +32,7 @@
 #include <libintl.h>
 #include <stdarg.h>
 #include <time.h>
+#include <libzonecfg.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -63,6 +63,11 @@ typedef enum rcm_dst {
 	RCD_SYSLOG			/* syslog() daemon facility */
 } rcm_dst_t;
 
+typedef struct zone_entry {
+	zoneid_t	zid;
+	char		zname[ZONENAME_MAX];
+} zone_entry_t;
+
 #define	LINELEN		256		/* max. message length */
 
 #ifdef DEBUG
@@ -95,6 +100,7 @@ extern void vdprintfe(int, char *, va_list);
 extern void dprintfe(int, char *, ...);
 extern void hrt2ts(hrtime_t, timestruc_t *);
 extern int xatoi(char *);
+extern int get_running_zones(uint_t *, zone_entry_t **);
 
 #ifdef	__cplusplus
 }
diff --git a/usr/src/cmd/rcap/rcapadm/Makefile b/usr/src/cmd/rcap/rcapadm/Makefile
index 59c1530185..3b4de32953 100644
--- a/usr/src/cmd/rcap/rcapadm/Makefile
+++ b/usr/src/cmd/rcap/rcapadm/Makefile
@@ -2,9 +2,8 @@
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License").  You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 #ident	"%Z%%M%	%I%	%E% SMI"
@@ -41,7 +40,7 @@ LINTSRCS =	$(COMMON_DIR)/utils.c \
 
 $(NOT_RELEASE_BUILD)CPPFLAGS 	+= -DDEBUG
 CPPFLAGS 			+= -I$(COMMON_DIR)
-LDLIBS 				+= -lumem -ll -lscf
+LDLIBS 				+= -lumem -ll -lscf -lzonecfg
 
 LINTFLAGS 			+= $(LDLIBS) -mnu
 
diff --git a/usr/src/cmd/rcap/rcapadm/rcapadm.c b/usr/src/cmd/rcap/rcapadm/rcapadm.c
index cc9fd290a1..1951682283 100644
--- a/usr/src/cmd/rcap/rcapadm/rcapadm.c
+++ b/usr/src/cmd/rcap/rcapadm/rcapadm.c
@@ -39,6 +39,8 @@
 #include <libscf_priv.h>
 #include <libintl.h>
 #include <locale.h>
+#include <zone.h>
+#include <libzonecfg.h>
 
 #include "utils.h"
 #include "rcapd.h"
@@ -61,7 +63,9 @@ usage()
 	    "               [-c <percent>]                         "
 	    "# set memory cap\n"
 	    "                                                      "
-	    "# enforcement threshold\n"));
+	    "# enforcement threshold\n"
+	    "               [-z <zonename> -m <max-rss>]               "
+	    "# update zone memory cap\n"));
 	exit(E_USAGE);
 }
 
@@ -135,18 +139,54 @@ out:
 	scf_handle_destroy(h);
 }
 
+/*
+ * Update the in-kernel memory cap for the specified zone.
+ */
+static int
+update_zone_mcap(char *zonename, char *maxrss)
+{
+	zoneid_t zone_id;
+	uint64_t num;
+
+	if (getzoneid() != GLOBAL_ZONEID || zonecfg_in_alt_root())
+		return (E_SUCCESS);
+
+	/* get the running zone from the kernel */
+	if ((zone_id = getzoneidbyname(zonename)) == -1) {
+		(void) fprintf(stderr, gettext("zone '%s' must be running\n"),
+		    zonename);
+		return (E_ERROR);
+	}
+
+	if (zonecfg_str_to_bytes(maxrss, &num) == -1) {
+		(void) fprintf(stderr, gettext("invalid max-rss value\n"));
+		return (E_ERROR);
+	}
+
+	if (zone_setattr(zone_id, ZONE_ATTR_PHYS_MCAP, &num, 0) == -1) {
+		(void) fprintf(stderr, gettext("could not set memory "
+		    "cap for zone '%s'\n"), zonename);
+		return (E_ERROR);
+	}
+
+	return (E_SUCCESS);
+}
+
 int
 main(int argc, char *argv[])
 {
 	char *subopts, *optval;
 	int modified = 0;
+	boolean_t refresh = B_FALSE;
 	int opt;
+	char *zonename;
+	char *maxrss = NULL;
 
 	(void) setprogname("rcapadm");
 	(void) setlocale(LC_ALL, "");
 	(void) textdomain(TEXT_DOMAIN);
 
-	while ((opt = getopt(argc, argv, "DEc:i:n")) != EOF) {
+	while ((opt = getopt(argc, argv, "DEc:i:m:nz:")) != EOF) {
 		switch (opt) {
 		case 'n':
 			no_starting_stopping = 1;
@@ -203,12 +243,24 @@ main(int argc, char *argv[])
 			}
 			modified++;
 			break;
+		case 'm':
+			maxrss = optarg;
+			break;
+		case 'z':
+			refresh = B_TRUE;
+			zonename = optarg;
+			break;
 		default:
 			usage();
 		}
 	}
 
-	if (argc > optind)
+	/* the -z & -m options must be used together */
+	if (argc > optind || (refresh && maxrss == NULL) ||
+	    (!refresh && maxrss != NULL))
+		usage();
+
+	if (refresh && (no_starting_stopping > 0 || modified))
 		usage();
 
 	if (rcfg_read(fname, -1, &conf, NULL) < 0) {
@@ -232,6 +284,9 @@ main(int argc, char *argv[])
 		}
 	}
 
+	if (refresh)
+		return (update_zone_mcap(zonename, maxrss));
+
 	if (modified) {
 		if (pressure >= 0)
 			conf.rcfg_memory_cap_enforcement_pressure = pressure;
diff --git a/usr/src/cmd/rcap/rcapd/Makefile.rcapd b/usr/src/cmd/rcap/rcapd/Makefile.rcapd
index 5fd0d01416..716ea41e38 100644
--- a/usr/src/cmd/rcap/rcapd/Makefile.rcapd
+++ b/usr/src/cmd/rcap/rcapd/Makefile.rcapd
@@ -35,6 +35,7 @@
 SRCS =	rcapd_main.c \
 	rcapd_collection.c \
 	rcapd_collection_project.c \
+	rcapd_collection_zone.c \
 	rcapd_mapping.c \
 	rcapd_rfd.c \
 	rcapd_scanner.c \
@@ -44,6 +45,7 @@ SRCS =	rcapd_main.c \
 LINTSRCS = ../rcapd_main.c \
 	../rcapd_collection.c \
 	../rcapd_collection_project.c \
+	../rcapd_collection_zone.c \
 	../rcapd_mapping.c \
 	../rcapd_rfd.c \
 	../rcapd_scanner.c \
@@ -53,7 +55,7 @@ LINTSRCS = ../rcapd_main.c \
 $(NOT_RELEASE_BUILD)CPPFLAGS 	+= -DDEBUG
 CPPFLAGS			+= -DDEBUG_MSG
 CPPFLAGS 			+= -I$(COMMON_DIR)
-LDLIBS 				+= -lkstat -ll -lproc -lproject -lumem
+LDLIBS 				+= -lkstat -ll -lproc -lproject -lzonecfg -lumem
 LDLIBS				+= $(EXTRA_LDLIBS)
 
 LINTFLAGS			+= -u
diff --git a/usr/src/cmd/rcap/rcapd/rcapd_collection.c b/usr/src/cmd/rcap/rcapd/rcapd_collection.c
index 7dac0e8155..fdaf8dbfe0 100644
--- a/usr/src/cmd/rcap/rcapd/rcapd_collection.c
+++ b/usr/src/cmd/rcap/rcapd/rcapd_collection.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -41,14 +40,16 @@
 #define	MAX(x, y) (((x) > (y)) ? (x) : (y))
 
 typedef struct {
-	rcid_t		lfa_colid;
+	rcid_t		*lfa_colidp;
 	lcollection_t	*lfa_found;
 } lcollection_find_arg_t;
 
 extern void lcollection_update_project(lcollection_update_type_t,
-    void(*)(char *, int, uint64_t, int));
-extern void lcollection_set_type_project();
-static void lcollection_update_notification_cb(char *, int, uint64_t, int);
+    void(*)(char *, char *, int, uint64_t, int));
+extern void lcollection_update_zone(lcollection_update_type_t,
+    void(*)(char *, char *, int, uint64_t, int));
+static void lcollection_update_notification_cb(char *, char *, int, uint64_t,
+    int);
 
 rcid_t(*rc_getidbypsinfo)(psinfo_t *);
 uint64_t phys_total = 0;
@@ -57,28 +58,8 @@ static lcollection_t *lcollection_head = NULL;
 void
 lcollection_update(lcollection_update_type_t ut)
 {
-	if (rcfg.rcfg_mode == rctype_project)
-		lcollection_update_project(ut,
-		    lcollection_update_notification_cb);
-	else
-		die(gettext("unknown mode %s\n"), rcfg.rcfg_mode_name);
-}
-
-/*
- * Configure which collection type will be used.
- */
-void
-lcollection_set_type(rctype_t type)
-{
-	switch (type) {
-	case rctype_project:
-		lcollection_set_type_project();
-		break;
-	default:
-		/* can't happen */
-		die(gettext("unknown mode %d\n"), type);
-		/*NOTREACHED*/
-	}
+	lcollection_update_zone(ut, lcollection_update_notification_cb);
+	lcollection_update_project(ut, lcollection_update_notification_cb);
 }
 
 /*
@@ -93,7 +74,7 @@ lcollection_set_type(rctype_t type)
  *	LCSS_CAP_ZERO
  */
 lcollection_t *
-lcollection_insert_update(rcid_t colid, uint64_t rss_cap, char *name,
+lcollection_insert_update(rcid_t *colidp, uint64_t rss_cap, char *name,
     int *changes)
 {
 	lcollection_t *lcol;
@@ -103,7 +84,7 @@ lcollection_insert_update(rcid_t colid, uint64_t rss_cap, char *name,
 	if (rss_cap == 0)
 		*changes |= LCST_CAP_ZERO;
 
-	lcol = lcollection_find(colid);
+	lcol = lcollection_find(colidp);
 
 	/*
 	 * If the specified collection is capped, add it to lcollection.
@@ -120,12 +101,13 @@ lcollection_insert_update(rcid_t colid, uint64_t rss_cap, char *name,
 		lcol = malloc(sizeof (*lcol));
 		if (lcol == NULL) {
 			debug("not enough memory to monitor %s %s",
-			    rcfg.rcfg_mode_name, name);
+			    (colidp->rcid_type == RCIDT_PROJECT ?
+			    "project" : "zone"), name);
 			return (NULL);
 		}
 		(void) bzero(lcol, sizeof (*lcol));
 
-		lcol->lcol_id = colid;
+		lcol->lcol_id = *colidp;
 		debug("added collection %s\n", name);
 		lcol->lcol_prev = NULL;
 		lcol->lcol_next = lcollection_head;
@@ -157,8 +139,8 @@ lcollection_insert_update(rcid_t colid, uint64_t rss_cap, char *name,
 }
 
 static void
-lcollection_update_notification_cb(char *name, int changes, uint64_t rss_cap,
-    int mark)
+lcollection_update_notification_cb(char *col_type, char *name, int changes,
+    uint64_t rss_cap, int mark)
 {
 	/*
 	 * Assume the collection has been updated redundantly if its mark count
@@ -168,10 +150,10 @@ lcollection_update_notification_cb(char *name, int changes, uint64_t rss_cap,
 		return;
 
 	if (changes & LCST_CAP_ZERO)
-		debug("%s %s: %s\n", rcfg.rcfg_mode_name, name,
+		debug("%s %s: %s\n", col_type, name,
 		    (changes & LCST_CAP_REMOVED) ? "cap removed" : "uncapped");
 	else
-		debug("%s %s: cap: %llukB\n", rcfg.rcfg_mode_name, name,
+		debug("%s %s: cap: %llukB\n", col_type, name,
 		    (unsigned long long)rss_cap);
 }
 
@@ -215,19 +197,23 @@ lcollection_member(lcollection_t *lcol, lprocess_t *lpc)
 static int
 lcollection_find_cb(lcollection_t *lcol, void *arg)
 {
-	if (lcol->lcol_id == ((lcollection_find_arg_t *)arg)->lfa_colid) {
+	rcid_t *colidp = ((lcollection_find_arg_t *)arg)->lfa_colidp;
+
+	if (lcol->lcol_id.rcid_type == colidp->rcid_type &&
+	    lcol->lcol_id.rcid_val == colidp->rcid_val) {
 		((lcollection_find_arg_t *)arg)->lfa_found = lcol;
 		return (1);
-	} else
-		return (0);
+	}
+
+	return (0);
 }
 
 lcollection_t *
-lcollection_find(id_t colid)
+lcollection_find(rcid_t *colidp)
 {
 	lcollection_find_arg_t lfa;
 
-	lfa.lfa_colid = colid;
+	lfa.lfa_colidp = colidp;
 	lfa.lfa_found = NULL;
 	list_walk_collection(lcollection_find_cb, &lfa);
 
diff --git a/usr/src/cmd/rcap/rcapd/rcapd_collection_project.c b/usr/src/cmd/rcap/rcapd/rcapd_collection_project.c
index ba34100f05..eab6d2a94a 100644
--- a/usr/src/cmd/rcap/rcapd/rcapd_collection_project.c
+++ b/usr/src/cmd/rcap/rcapd/rcapd_collection_project.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -38,24 +37,17 @@
 				/* round up to next y = 2^n */
 #define	ROUNDUP(x, y)		(((x) + ((y) - 1)) & ~((y) - 1))
 
-static rcid_t rc_proj_getidbypsinfo(psinfo_t *);
-
-void
-lcollection_set_type_project(void)
-{
-	rc_getidbypsinfo = rc_proj_getidbypsinfo;
-}
-
 static int
 lcollection_update_project_cb(const struct project *proj, void *walk_data)
 {
-	void(*update_notification_cb)(char *, int, uint64_t, int) =
-	    (void(*)(char *, int, uint64_t, int))walk_data;
+	void(*update_notification_cb)(char *, char *, int, uint64_t, int) =
+	    (void(*)(char *, char *, int, uint64_t, int))walk_data;
 	char *capattr_abs;
 	char *end;
 	int changes;
 	int64_t max_rss;
 	lcollection_t *lcol;
+	rcid_t colid;
 
 	capattr_abs = strstr(proj->pj_attr, PJ_ABS_ATTR_NAME "=");
 	if (capattr_abs != NULL) {
@@ -70,17 +62,19 @@ lcollection_update_project_cb(const struct project *proj, void *walk_data)
 		capattr_abs += strlen(PJ_ABS_ATTR_NAME "=");
 		max_rss = ROUNDUP(strtoll(capattr_abs, &end, 10), 1024) / 1024;
 		if (end == capattr_abs || *end != ';' && *end != 0)
-			warn(gettext("%s %s: malformed %s value "
-			    "'%s'\n"), rcfg.rcfg_mode_name, proj->pj_name,
-			    PJ_ABS_ATTR_NAME, capattr_abs);
+			warn(gettext("project %s: malformed %s value '%s'\n"),
+			    proj->pj_name, PJ_ABS_ATTR_NAME, capattr_abs);
 	} else
 		max_rss = 0;
 
-	lcol = lcollection_insert_update(proj->pj_projid, max_rss,
-	    proj->pj_name, &changes);
+	colid.rcid_type = RCIDT_PROJECT;
+	colid.rcid_val = proj->pj_projid;
+
+	lcol = lcollection_insert_update(&colid, max_rss, proj->pj_name,
+	    &changes);
 	if (update_notification_cb != NULL)
-		update_notification_cb(proj->pj_name, changes, max_rss, (lcol !=
-		    NULL) ? lcol->lcol_mark : 0);
+		update_notification_cb("project", proj->pj_name, changes,
+		    max_rss, (lcol != NULL) ? lcol->lcol_mark : 0);
 
 	return (0);
 }
@@ -101,10 +95,13 @@ lcollection_update_project_byid_cb(const projid_t id, void *walk_data)
 static int
 lcollection_update_onceactive_cb(lcollection_t *lcol, void *walk_data)
 {
-	void(*update_notification_cb)(char *, int, uint64_t, int) =
-	    (void(*)(char *, int, uint64_t, int))walk_data;
+	void(*update_notification_cb)(char *, char *, int, uint64_t, int) =
+	    (void(*)(char *, char *, int, uint64_t, int))walk_data;
+
+	if (lcol->lcol_id.rcid_type != RCIDT_PROJECT)
+		return (0);
 
-	return (lcollection_update_project_byid_cb(lcol->lcol_id,
+	return (lcollection_update_project_byid_cb(lcol->lcol_id.rcid_val,
 	    (void *)update_notification_cb));
 }
 
@@ -125,7 +122,7 @@ project_walk_all(int(*cb)(const struct project *, void *), void *walk_data)
 
 void
 lcollection_update_project(lcollection_update_type_t ut,
-    void(*update_notification_cb)(char *, int, uint64_t, int))
+    void(*update_notification_cb)(char *, char *, int, uint64_t, int))
 {
 	switch (ut) {
 	case LCU_ACTIVE_ONLY:
@@ -154,9 +151,3 @@ lcollection_update_project(lcollection_update_type_t ut,
 		    (void *)update_notification_cb);
 	}
 }
-
-static rcid_t
-rc_proj_getidbypsinfo(psinfo_t *psinfo)
-{
-	return (psinfo->pr_projid);
-}
diff --git a/usr/src/cmd/rcap/rcapd/rcapd_collection_zone.c b/usr/src/cmd/rcap/rcapd/rcapd_collection_zone.c
new file mode 100644
index 0000000000..db86aa6276
--- /dev/null
+++ b/usr/src/cmd/rcap/rcapd/rcapd_collection_zone.c
@@ -0,0 +1,99 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <procfs.h>
+#include <project.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <zone.h>
+#include <libzonecfg.h>
+#include "rcapd.h"
+#include "utils.h"
+
+extern boolean_t gz_capped;
+
+				/* round up to next y = 2^n */
+#define	ROUNDUP(x, y)		(((x) + ((y) - 1)) & ~((y) - 1))
+
+static void
+update_zone(zone_entry_t *zent, void *walk_data)
+{
+	void(*update_notification_cb)(char *, char *, int, uint64_t, int) =
+	    (void(*)(char *, char *, int, uint64_t, int))walk_data;
+	int changes;
+	int64_t max_rss;
+	uint64_t mcap;
+	lcollection_t *lcol;
+	rcid_t colid;
+
+	if (zone_getattr(zent->zid, ZONE_ATTR_PHYS_MCAP, &mcap,
+	    sizeof (mcap)) != -1 && mcap != 0)
+		max_rss = ROUNDUP(mcap, 1024) / 1024;
+	else
+		max_rss = 0;
+
+	if (zent->zid == GLOBAL_ZONEID) {
+		if (max_rss > 0)
+			gz_capped = B_TRUE;
+		else
+			gz_capped = B_FALSE;
+	}
+
+
+	colid.rcid_type = RCIDT_ZONE;
+	colid.rcid_val = zent->zid;
+
+	lcol = lcollection_insert_update(&colid, max_rss, zent->zname,
+	    &changes);
+	if (update_notification_cb != NULL)
+		update_notification_cb("zone", zent->zname, changes, max_rss,
+		    (lcol != NULL) ? lcol->lcol_mark : 0);
+}
+
+
+/* ARGSUSED */
+void
+lcollection_update_zone(lcollection_update_type_t ut,
+    void(*update_notification_cb)(char *, char *, int, uint64_t, int))
+{
+	int i;
+	uint_t nzents;
+	zone_entry_t *zents;
+
+	/*
+	 * Enumerate running zones.
+	 */
+	if (get_running_zones(&nzents, &zents) != 0)
+		return;
+
+	for (i = 0; i < nzents; i++) {
+		update_zone(&zents[i], (void *)update_notification_cb);
+
+	}
+
+	free(zents);
+}
diff --git a/usr/src/cmd/rcap/rcapd/rcapd_main.c b/usr/src/cmd/rcap/rcapd/rcapd_main.c
index 9c2e8b3c48..960065826e 100644
--- a/usr/src/cmd/rcap/rcapd/rcapd_main.c
+++ b/usr/src/cmd/rcap/rcapd/rcapd_main.c
@@ -61,6 +61,7 @@
 #include <unistd.h>
 #include <zone.h>
 #include <assert.h>
+#include <sys/vm_usage.h>
 #include "rcapd.h"
 #include "rcapd_mapping.h"
 #include "rcapd_rfd.h"
@@ -80,30 +81,42 @@
 #define	STAT_TEMPLATE_SUFFIX	".XXXXXX"	/* suffix of mkstemp() arg */
 #define	DAEMON_UID		1		/* uid to use */
 
+#define	CAPPED_PROJECT	0x01
+#define	CAPPED_ZONE	0x02
+
 typedef struct soft_scan_arg {
 	uint64_t ssa_sum_excess;
 	int64_t ssa_scan_goal;
+	boolean_t ssa_project_over_cap;
 } soft_scan_arg_t;
 
+typedef struct sample_col_arg {
+	boolean_t sca_any_over_cap;
+	boolean_t sca_project_over_cap;
+} sample_col_arg_t;
+
+
 static int debug_mode = 0;		/* debug mode flag */
 static pid_t rcapd_pid;			/* rcapd's pid to ensure it's not */
 					/* scanned */
 static kstat_ctl_t *kctl;		/* kstat chain */
-static uint64_t new_sp = 0, old_sp = 0;	/* measure delta in page scan count */
-static int enforce_caps = 0;		/* cap enforcement flag, dependent on */
-					/* enforce_soft_caps and */
-					/* global_scanner_running */
-static int enforce_soft_caps = 0;	/* soft cap enforcement flag, */
-					/* depending on memory pressure */
 static int memory_pressure = 0;		/* physical memory utilization (%) */
 static int memory_pressure_sample = 0;	/* count of samples */
-static int global_scanner_running = 0;	/* global scanning flag, to avoid */
-					/* interference with kernel's page */
-					/* scanner */
+static long page_size_kb = 0;		/* system page size in KB */
+static size_t nvmu_vals = 0;		/* # of kernel RSS/swap vals in array */
+static size_t vmu_vals_len = 0;		/* size of RSS/swap vals array */
+static vmusage_t *vmu_vals = NULL;	/* snapshot of kernel RSS/swap values */
 static hrtime_t next_report;		/* time of next report */
 static int termination_signal = 0;	/* terminating signal */
+static zoneid_t my_zoneid = (zoneid_t)-1;
+static lcollection_t *gz_col;		/* global zone collection */
 
 rcfg_t rcfg;
+/*
+ * Updated when we re-read the collection configurations if this rcapd instance
+ * is running in the global zone and the global zone is capped.
+ */
+boolean_t gz_capped = B_FALSE;
 
 /*
  * Flags.
@@ -116,9 +129,9 @@ static int verify_statistics(void);
 static int update_statistics(void);
 
 /*
- * Checks if a process is marked 'system'.  Returns zero only when it is not.
+ * Checks if a process is marked 'system'.  Returns FALSE only when it is not.
  */
-static int
+static boolean_t
 proc_issystem(pid_t pid)
 {
 	char pc_clname[PC_CLNMSZ];
@@ -128,22 +141,43 @@ proc_issystem(pid_t pid)
 		return (strcmp(pc_clname, "SYS") == 0);
 	} else {
 		debug("cannot get class-specific scheduling parameters; "
-		    "assuming system process");
-		return (-1);
+		    "assuming system process\n");
+		return (B_TRUE);
 	}
 }
 
-/*
- * fname is the process name, for debugging messages, and unscannable is a flag
- * indicating whether the process should be scanned.
- */
 static void
-lprocess_insert_mark(pid_t pid, id_t colid, char *fname, int unscannable)
+lprocess_insert_mark(psinfo_t *psinfop)
 {
+	pid_t pid = psinfop->pr_pid;
+	/* flag indicating whether the process should be scanned. */
+	int unscannable = psinfop->pr_nlwp == 0;
+	rcid_t colid;
 	lcollection_t *lcol;
 	lprocess_t *lproc;
 
-	if ((lcol = lcollection_find(colid)) == NULL)
+	/*
+	 * Determine which collection to put this process into.  We only have
+	 * to worry about tracking both zone and project capped processes if
+	 * this rcapd instance is running in the global zone, since we'll only
+	 * see processes in our own projects in a non-global zone.  In the
+	 * global zone, if the process belongs to a non-global zone, we only
+	 * need to track it for the capped non-global zone collection.  For
+	 * global zone processes, we first attempt to put the process into a
+	 * capped project collection.  On the second pass into this function
+	 * the projid will be cleared so we will just track the process for the
+	 * global zone collection as a whole.
+	 */
+	if (psinfop->pr_zoneid == my_zoneid && psinfop->pr_projid != -1) {
+		colid.rcid_type = RCIDT_PROJECT;
+		colid.rcid_val = psinfop->pr_projid;
+	} else {
+		/* try to add to zone collection */
+		colid.rcid_type = RCIDT_ZONE;
+		colid.rcid_val = psinfop->pr_zoneid;
+	}
+
+	if ((lcol = lcollection_find(&colid)) == NULL)
 		return;
 
 	/*
@@ -193,7 +227,8 @@ lprocess_insert_mark(pid_t pid, id_t colid, char *fname, int unscannable)
 		if (lcollection_member(lcol, lproc)) {
 			lprocess_t *cur = lcol->lcol_lprocess;
 			debug("The collection %lld already has these members, "
-			    "including me, %d!\n", (long long)lcol->lcol_id,
+			    "including me, %d!\n",
+			    (long long)lcol->lcol_id.rcid_val,
 			    (int)lproc->lpc_pid);
 			while (cur != NULL) {
 				debug("\t%d\n", (int)cur->lpc_pid);
@@ -209,7 +244,10 @@ lprocess_insert_mark(pid_t pid, id_t colid, char *fname, int unscannable)
 		lproc->lpc_prev = NULL;
 		lcol->lcol_lprocess = lproc;
 
-		debug("tracking %d %d %s%s\n", (int)colid, (int)pid, fname,
+		debug("tracking %s %ld %d %s%s\n",
+		    (colid.rcid_type == RCIDT_PROJECT ? "project" : "zone"),
+		    (long)colid.rcid_val,
+		    (int)pid, psinfop->pr_psargs,
 		    (lproc->lpc_unscannable != 0) ? " (not scannable)" : "");
 		lcol->lcol_stat.lcols_proc_in++;
 	}
@@ -328,22 +366,28 @@ get_psinfo(pid_t pid, psinfo_t *psinfo, int cached_fd,
 }
 
 /*
- * Retrieve the collection membership of all processes in our zone, and update
- * the psinfo of those non-system, non-zombie ones in collections.
+ * Retrieve the collection membership of all processes and update the psinfo of
+ * those non-system, non-zombie ones in collections.  For global zone processes,
+ * we first attempt to put the process into a capped project collection.  We
+ * also want to track the process for the global zone collection as a whole.
  */
 static void
 proc_cb(const pid_t pid)
 {
-	static zoneid_t ours = (zoneid_t)-1;
 	psinfo_t psinfo;
 
-	if (ours == (zoneid_t)-1)
-		ours = getzoneid();
-
-	if (get_psinfo(pid, &psinfo, -1, NULL, NULL, NULL) == 0 &&
-	    psinfo.pr_zoneid == ours)
-		lprocess_insert_mark(psinfo.pr_pid, rc_getidbypsinfo(&psinfo),
-		    psinfo.pr_psargs, psinfo.pr_nlwp == 0);
+	if (get_psinfo(pid, &psinfo, -1, NULL, NULL, NULL) == 0) {
+		lprocess_insert_mark(&psinfo);
+		if (gz_capped && psinfo.pr_zoneid == GLOBAL_ZONEID) {
+			/*
+			 * We also want to track this process for the global
+			 * zone as a whole so add it to the global zone
+			 * collection as well.
+			 */
+			psinfo.pr_projid = -1;
+			lprocess_insert_mark(&psinfo);
+		}
+	}
 }
 
 /*
@@ -359,57 +403,149 @@ lprocess_update_psinfo_fd_cb(void *arg, int fd)
 }
 
 /*
- * Update the RSS of processes in monitored collections.
+ * Get the system pagesize.
  */
-/*ARGSUSED*/
-static int
-mem_sample_cb(lcollection_t *lcol, lprocess_t *lpc)
+static void
+get_page_size(void)
 {
-	psinfo_t psinfo;
+	page_size_kb = sysconf(_SC_PAGESIZE) / 1024;
+	debug("physical page size: %luKB\n", page_size_kb);
+}
+
+static void
+tm_fmt(char *msg, hrtime_t t1, hrtime_t t2)
+{
+	hrtime_t diff = t2 - t1;
+
+	if (diff < MILLISEC)
+		debug("%s: %lld nanoseconds\n", msg, diff);
+	else if (diff < MICROSEC)
+		debug("%s: %.2f microseconds\n", msg, (float)diff / MILLISEC);
+	else if (diff < NANOSEC)
+		debug("%s: %.2f milliseconds\n", msg, (float)diff / MICROSEC);
+	else
+		debug("%s: %.2f seconds\n", msg, (float)diff / NANOSEC);
+}
+
+/*
+ * Get the zone's & project's RSS from the kernel.
+ */
+static void
+rss_sample(boolean_t my_zone_only, uint_t col_types)
+{
+	size_t nres;
+	size_t i;
+	uint_t flags;
+	hrtime_t t1, t2;
 
-	if (get_psinfo(lpc->lpc_pid, &psinfo, lpc->lpc_psinfo_fd,
-	    lprocess_update_psinfo_fd_cb, lpc, lpc) == 0) {
-		lpc->lpc_rss = psinfo.pr_rssize;
-		lpc->lpc_size = psinfo.pr_size;
+	if (my_zone_only) {
+		flags = VMUSAGE_ZONE;
 	} else {
-		if (errno == ENOENT)
-			debug("process %d finished\n", (int)lpc->lpc_pid);
-		else
-			debug("process %d: cannot read psinfo",
-			    (int)lpc->lpc_pid);
-		lprocess_free(lpc);
+		flags = 0;
+		if (col_types & CAPPED_PROJECT)
+			flags |= VMUSAGE_PROJECTS;
+		if (col_types & CAPPED_ZONE && my_zoneid == GLOBAL_ZONEID)
+			flags |= VMUSAGE_ALL_ZONES;
 	}
 
-	return (0);
+	debug("vmusage sample flags 0x%x\n", flags);
+	if (flags == 0)
+		return;
+
+again:
+	/* try the current buffer to see if the list will fit */
+	nres = vmu_vals_len;
+	t1 = gethrtime();
+	if (getvmusage(flags, my_zone_only ? 0 : rcfg.rcfg_rss_sample_interval,
+	    vmu_vals, &nres) != 0) {
+		if (errno != EOVERFLOW) {
+			warn(gettext("can't read RSS from kernel\n"));
+			return;
+		}
+	}
+	t2 = gethrtime();
+	tm_fmt("getvmusage time", t1, t2);
+
+	debug("kernel nres %lu\n", (ulong_t)nres);
+
+	if (nres > vmu_vals_len) {
+		/* array size is now too small, increase it and try again */
+		free(vmu_vals);
+
+		if ((vmu_vals = (vmusage_t *)calloc(nres,
+		    sizeof (vmusage_t))) == NULL) {
+			warn(gettext("out of memory: could not read RSS from "
+			    "kernel\n"));
+			vmu_vals_len = nvmu_vals = 0;
+			return;
+		}
+		vmu_vals_len = nres;
+		goto again;
+	}
+
+	nvmu_vals = nres;
+
+	debug("vmusage_sample\n");
+	for (i = 0; i < nvmu_vals; i++) {
+		debug("%d: id: %d, type: 0x%x, rss_all: %llu (%lluKB), "
+		    "swap: %llu\n", (int)i, (int)vmu_vals[i].vmu_id,
+		    vmu_vals[i].vmu_type,
+		    (unsigned long long)vmu_vals[i].vmu_rss_all,
+		    (unsigned long long)vmu_vals[i].vmu_rss_all / 1024,
+		    (unsigned long long)vmu_vals[i].vmu_swap_all);
+	}
+}
+
+static void
+update_col_rss(lcollection_t *lcol)
+{
+	int i;
+
+	lcol->lcol_rss = 0;
+	lcol->lcol_image_size = 0;
+
+	for (i = 0; i < nvmu_vals; i++) {
+		if (vmu_vals[i].vmu_id != lcol->lcol_id.rcid_val)
+			continue;
+
+		if (vmu_vals[i].vmu_type == VMUSAGE_ZONE &&
+		    lcol->lcol_id.rcid_type != RCIDT_ZONE)
+			continue;
+
+		if (vmu_vals[i].vmu_type == VMUSAGE_PROJECTS &&
+		    lcol->lcol_id.rcid_type != RCIDT_PROJECT)
+			continue;
+
+		/* we found the right RSS entry, update the collection vals */
+		lcol->lcol_rss = vmu_vals[i].vmu_rss_all / 1024;
+		lcol->lcol_image_size = vmu_vals[i].vmu_swap_all / 1024;
+		break;
+	}
 }
 
 /*
  * Sample the collection RSS, updating the collection's statistics with the
- * results.
+ * results.  Also, sum the rss of all capped projects & return true if
+ * the collection is over cap.
  */
-/*ARGSUSED*/
 static int
 rss_sample_col_cb(lcollection_t *lcol, void *arg)
 {
 	int64_t excess;
 	uint64_t rss;
+	sample_col_arg_t *col_argp = (sample_col_arg_t *)arg;
 
-	/*
-	 * If updating statistics for a new interval, reset the affected
-	 * counters.
-	 */
-	if (lcol->lcol_stat_invalidate != 0) {
-		lcol->lcol_stat_old = lcol->lcol_stat;
-		lcol->lcol_stat.lcols_min_rss = (int64_t)-1;
-		lcol->lcol_stat.lcols_max_rss = 0;
-		lcol->lcol_stat_invalidate = 0;
-	}
+	update_col_rss(lcol);
 
 	lcol->lcol_stat.lcols_rss_sample++;
-	excess = lcol->lcol_rss - lcol->lcol_rss_cap;
 	rss = lcol->lcol_rss;
-	if (excess > 0)
+	excess = rss - lcol->lcol_rss_cap;
+	if (excess > 0) {
 		lcol->lcol_stat.lcols_rss_act_sum += rss;
+		col_argp->sca_any_over_cap = B_TRUE;
+		if (lcol->lcol_id.rcid_type == RCIDT_PROJECT)
+			col_argp->sca_project_over_cap = B_TRUE;
+	}
 	lcol->lcol_stat.lcols_rss_sum += rss;
 
 	if (lcol->lcol_stat.lcols_min_rss > rss)
@@ -421,6 +557,30 @@ rss_sample_col_cb(lcollection_t *lcol, void *arg)
 }
 
 /*
+ * Determine if we have capped projects, capped zones or both.
+ */
+static int
+col_type_cb(lcollection_t *lcol, void *arg)
+{
+	uint_t *col_type = (uint_t *)arg;
+
+	/* skip uncapped collections */
+	if (lcol->lcol_rss_cap == 0)
+		return (1);
+
+	if (lcol->lcol_id.rcid_type == RCIDT_PROJECT)
+		*col_type |= CAPPED_PROJECT;
+	else
+		*col_type |= CAPPED_ZONE;
+
+	/* once we know everything is capped, we can stop looking */
+	if ((*col_type & CAPPED_ZONE) && (*col_type & CAPPED_PROJECT))
+		return (1);
+
+	return (0);
+}
+
+/*
  * Open /proc and walk entries.
  */
 static void
@@ -449,23 +609,6 @@ proc_walk_all(void (*cb)(const pid_t))
 }
 
 /*
- * Memory update callback.
- */
-static int
-memory_all_cb(lcollection_t *lcol, lprocess_t *lpc)
-{
-	debug_high("%s %s, pid %d: rss += %llu/%llu\n", rcfg.rcfg_mode_name,
-	    lcol->lcol_name, (int)lpc->lpc_pid,
-	    (unsigned long long)lpc->lpc_rss,
-	    (unsigned long long)lpc->lpc_size);
-	ASSERT(lpc->lpc_rss <= lpc->lpc_size);
-	lcol->lcol_rss += lpc->lpc_rss;
-	lcol->lcol_image_size += lpc->lpc_size;
-
-	return (0);
-}
-
-/*
  * Clear unmarked callback.
  */
 /*ARGSUSED*/
@@ -483,19 +626,6 @@ sweep_process_cb(lcollection_t *lcol, lprocess_t *lpc)
 }
 
 /*
- * Memory clear callback.
- */
-/*ARGSUSED*/
-static int
-collection_zero_mem_cb(lcollection_t *lcol, void *arg)
-{
-	lcol->lcol_rss = 0;
-	lcol->lcol_image_size = 0;
-
-	return (0);
-}
-
-/*
  * Print, for debugging purposes, a collection's recently-sampled RSS and
  * excess.
  */
@@ -506,7 +636,8 @@ excess_print_cb(lcollection_t *lcol, void *arg)
 	int64_t excess = lcol->lcol_rss - lcol->lcol_rss_cap;
 
 	debug("%s %s rss/cap: %llu/%llu, excess = %lld kB\n",
-	    rcfg.rcfg_mode_name, lcol->lcol_name,
+	    (lcol->lcol_id.rcid_type == RCIDT_PROJECT ? "project" : "zone"),
+	    lcol->lcol_name,
 	    (unsigned long long)lcol->lcol_rss,
 	    (unsigned long long)lcol->lcol_rss_cap,
 	    (long long)excess);
@@ -516,6 +647,10 @@ excess_print_cb(lcollection_t *lcol, void *arg)
 
 /*
  * Scan those collections which have exceeded their caps.
+ *
+ * If we're running in the global zone it might have a cap.  We don't want to
+ * do any capping for the global zone yet since we might get under the cap by
+ * just capping the projects in the global zone.
  */
 /*ARGSUSED*/
 static int
@@ -523,6 +658,13 @@ scan_cb(lcollection_t *lcol, void *arg)
 {
 	int64_t excess;
 
+	/* skip over global zone collection for now but keep track for later */
+	if (lcol->lcol_id.rcid_type == RCIDT_ZONE &&
+	    lcol->lcol_id.rcid_val == GLOBAL_ZONEID) {
+		gz_col = lcol;
+		return (0);
+	}
+
 	if ((excess = lcol->lcol_rss - lcol->lcol_rss_cap) > 0) {
 		scan(lcol, excess);
 		lcol->lcol_stat.lcols_scan++;
@@ -532,6 +674,37 @@ scan_cb(lcollection_t *lcol, void *arg)
 }
 
 /*
+ * Scan the global zone collection and see if it still exceeds its cap.
+ * We take into account the effects of capping any global zone projects here.
+ */
+static void
+scan_gz(lcollection_t *lcol, boolean_t project_over_cap)
+{
+	int64_t excess;
+
+	/*
+	 * If we had projects over their cap and the global zone was also over
+	 * its cap then we need to get the up-to-date global zone rss to
+	 * determine if we are still over the global zone cap.  We might have
+	 * gone under while we scanned the capped projects.  If there were no
+	 * projects over cap then we can use the rss value we already have for
+	 * the global zone.
+	 */
+	excess = lcol->lcol_rss - lcol->lcol_rss_cap;
+	if (project_over_cap && excess > 0) {
+		rss_sample(B_TRUE, CAPPED_ZONE);
+		update_col_rss(lcol);
+		excess = lcol->lcol_rss - lcol->lcol_rss_cap;
+	}
+
+	if (excess > 0) {
+		debug("global zone excess %lldKB\n", (long long)excess);
+		scan(lcol, excess);
+		lcol->lcol_stat.lcols_scan++;
+	}
+}
+
+/*
  * Do a soft scan of those collections which have excesses.  A soft scan is one
  * in which the cap enforcement pressure is taken into account.  The difference
  * between the utilized physical memory and the cap enforcement pressure will
@@ -544,22 +717,72 @@ soft_scan_cb(lcollection_t *lcol, void *a)
 	int64_t excess;
 	soft_scan_arg_t *arg = a;
 
+	/* skip over global zone collection for now but keep track for later */
+	if (lcol->lcol_id.rcid_type == RCIDT_ZONE &&
+	    lcol->lcol_id.rcid_val == GLOBAL_ZONEID) {
+		gz_col = lcol;
+		return (0);
+	}
+
 	if ((excess = lcol->lcol_rss - lcol->lcol_rss_cap) > 0) {
-		debug("col %lld excess %lld scan_goal %lld sum_excess %llu, "
-		    "scanning %lld\n", (long long)lcol->lcol_id,
+		int64_t adjusted_excess =
+		    excess * arg->ssa_scan_goal / arg->ssa_sum_excess;
+
+		debug("%s %ld excess %lld scan_goal %lld sum_excess %llu, "
+		    "scanning %lld\n",
+		    (lcol->lcol_id.rcid_type == RCIDT_PROJECT ?
+		    "project" : "zone"),
+		    (long)lcol->lcol_id.rcid_val,
 		    (long long)excess, (long long)arg->ssa_scan_goal,
 		    (unsigned long long)arg->ssa_sum_excess,
-		    (long long)(excess * arg->ssa_scan_goal /
-		    arg->ssa_sum_excess));
+		    (long long)adjusted_excess);
 
-		scan(lcol, (int64_t)(excess * arg->ssa_scan_goal /
-		    arg->ssa_sum_excess));
+		scan(lcol, adjusted_excess);
 		lcol->lcol_stat.lcols_scan++;
 	}
 
 	return (0);
 }
 
+static void
+soft_scan_gz(lcollection_t *lcol, void *a)
+{
+	int64_t excess;
+	soft_scan_arg_t *arg = a;
+
+	/*
+	 * If we had projects over their cap and the global zone was also over
+	 * its cap then we need to get the up-to-date global zone rss to
+	 * determine if we are still over the global zone cap.  We might have
+	 * gone under while we scanned the capped projects.  If there were no
+	 * projects over cap then we can use the rss value we already have for
+	 * the global zone.
+	 */
+	excess = lcol->lcol_rss - lcol->lcol_rss_cap;
+	if (arg->ssa_project_over_cap && excess > 0) {
+		rss_sample(B_TRUE, CAPPED_ZONE);
+		update_col_rss(lcol);
+		excess = lcol->lcol_rss - lcol->lcol_rss_cap;
+	}
+
+	if (excess > 0) {
+		int64_t adjusted_excess =
+		    excess * arg->ssa_scan_goal / arg->ssa_sum_excess;
+
+		debug("%s %ld excess %lld scan_goal %lld sum_excess %llu, "
+		    "scanning %lld\n",
+		    (lcol->lcol_id.rcid_type == RCIDT_PROJECT ?
+		    "project" : "zone"),
+		    (long)lcol->lcol_id.rcid_val,
+		    (long long)excess, (long long)arg->ssa_scan_goal,
+		    (unsigned long long)arg->ssa_sum_excess,
+		    (long long)adjusted_excess);
+
+		scan(lcol, adjusted_excess);
+		lcol->lcol_stat.lcols_scan++;
+	}
+}
+
 /*
  * When a scan could happen, but caps aren't enforced tick the
  * lcols_unenforced_cap counter.
@@ -582,8 +805,7 @@ update_phys_total(void)
 	uint64_t old_phys_total;
 
 	old_phys_total = phys_total;
-	phys_total = (uint64_t)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE)
-	    / 1024;
+	phys_total = (uint64_t)sysconf(_SC_PHYS_PAGES) * page_size_kb;
 	if (phys_total != old_phys_total)
 		debug("physical memory%s: %lluM\n", (old_phys_total == 0 ?
 		    "" : " adjusted"), (unsigned long long)(phys_total / 1024));
@@ -687,7 +909,9 @@ static int
 collection_sweep_cb(lcollection_t *lcol, void *arg)
 {
 	if (lcol->lcol_mark == 0) {
-		debug("freeing %s %s\n", rcfg.rcfg_mode_name, lcol->lcol_name);
+		debug("freeing %s %s\n",
+		    (lcol->lcol_id.rcid_type == RCIDT_PROJECT ?
+		    "project" : "zone"), lcol->lcol_name);
 		lcollection_free(lcol);
 	}
 
@@ -710,8 +934,6 @@ finish_configuration(void)
 		rcfg.rcfg_mode_name = "project";
 		rcfg.rcfg_mode = rctype_project;
 	}
-
-	lcollection_set_type(rcfg.rcfg_mode);
 }
 
 /*
@@ -754,7 +976,8 @@ reread_configuration_file(void)
  * deletions to cap definitions.
  */
 static void
-reconfigure(void)
+reconfigure(hrtime_t now, hrtime_t *next_configuration,
+    hrtime_t *next_proc_walk, hrtime_t *next_rss_sample)
 {
 	debug("reconfigure...\n");
 
@@ -770,6 +993,31 @@ reconfigure(void)
 	list_walk_collection(collection_clear_cb, NULL);
 	lcollection_update(LCU_ACTIVE_ONLY); /* mark */
 	list_walk_collection(collection_sweep_cb, NULL);
+
+	*next_configuration = NEXT_EVENT_TIME(now,
+	    rcfg.rcfg_reconfiguration_interval);
+
+	/*
+	 * Reset each event time to the shorter of the previous and new
+	 * intervals.
+	 */
+	if (next_report == 0 && rcfg.rcfg_report_interval > 0)
+		next_report = now;
+	else
+		next_report = POSITIVE_MIN(next_report,
+		    NEXT_REPORT_EVENT_TIME(now, rcfg.rcfg_report_interval));
+
+	if (*next_proc_walk == 0 && rcfg.rcfg_proc_walk_interval > 0)
+		*next_proc_walk = now;
+	else
+		*next_proc_walk = POSITIVE_MIN(*next_proc_walk,
+		    NEXT_EVENT_TIME(now, rcfg.rcfg_proc_walk_interval));
+
+	if (*next_rss_sample == 0 && rcfg.rcfg_rss_sample_interval > 0)
+		*next_rss_sample = now;
+	else
+		*next_rss_sample = POSITIVE_MIN(*next_rss_sample,
+		    NEXT_EVENT_TIME(now, rcfg.rcfg_rss_sample_interval));
 }
 
 /*
@@ -791,20 +1039,20 @@ static int
 simple_report_collection_cb(lcollection_t *lcol, void *arg)
 {
 #define	DELTA(field) \
-	(unsigned long long)(lcol->lcol_stat_invalidate ? 0 : \
+	(unsigned long long)( \
 	    (lcol->lcol_stat.field - lcol->lcol_stat_old.field))
-#define	VALID(field) \
-	(unsigned long long)(lcol->lcol_stat_invalidate ? 0 : \
-	    lcol->lcol_stat.field)
 
 	debug("%s %s status: succeeded/attempted (k): %llu/%llu, "
 	    "ineffective/scans/unenforced/samplings:  %llu/%llu/%llu/%llu, RSS "
 	    "min/max (k): %llu/%llu, cap %llu kB, processes/thpt: %llu/%llu, "
-	    "%llu scans over %llu ms\n", rcfg.rcfg_mode_name, lcol->lcol_name,
+	    "%llu scans over %llu ms\n",
+	    (lcol->lcol_id.rcid_type == RCIDT_PROJECT ? "project" : "zone"),
+	    lcol->lcol_name,
 	    DELTA(lcols_pg_eff), DELTA(lcols_pg_att),
 	    DELTA(lcols_scan_ineffective), DELTA(lcols_scan),
 	    DELTA(lcols_unenforced_cap), DELTA(lcols_rss_sample),
-	    VALID(lcols_min_rss), VALID(lcols_max_rss),
+	    (unsigned long long)lcol->lcol_stat.lcols_min_rss,
+	    (unsigned long long)lcol->lcol_stat.lcols_max_rss,
 	    (unsigned long long)lcol->lcol_rss_cap,
 	    (unsigned long long)(lcol->lcol_stat.lcols_proc_in -
 	    lcol->lcol_stat.lcols_proc_out), DELTA(lcols_proc_out),
@@ -812,7 +1060,6 @@ simple_report_collection_cb(lcollection_t *lcol, void *arg)
 	    / MILLISEC));
 
 #undef DELTA
-#undef VALID
 
 	return (0);
 }
@@ -838,13 +1085,11 @@ report_collection_cb(lcollection_t *lcol, void *arg)
 	dc.lcol_stat = lcol->lcol_stat;
 
 	if (write(fd, &dc, sizeof (dc)) == sizeof (dc)) {
-		/*
-		 * Set a flag to indicate that the exported interval snapshot
-		 * values should be reset at the next sample.
-		 */
-		lcol->lcol_stat_invalidate = 1;
+		lcol->lcol_stat_old = lcol->lcol_stat;
 	} else {
-		debug("can't write %s %s statistics", rcfg.rcfg_mode_name,
+		debug("can't write %s %s statistics",
+		    (lcol->lcol_id.rcid_type == RCIDT_PROJECT ?
+		    "project" : "zone"),
 		    lcol->lcol_name);
 	}
 
@@ -871,8 +1116,9 @@ get_globally_scanned_pages(uint64_t *scannedp)
 			if (kstat_read(kctl, ksp, NULL) != -1) {
 				scanned += ((cpu_stat_t *)
 				    ksp->ks_data)->cpu_vminfo.scan;
-			} else
+			} else {
 				return (-1);
+			}
 		}
 	}
 
@@ -881,6 +1127,59 @@ get_globally_scanned_pages(uint64_t *scannedp)
 }
 
 /*
+ * Determine if the global page scanner is running, during which no memory
+ * caps should be enforced, to prevent interference with the global page
+ * scanner.
+ */
+static boolean_t
+is_global_scanner_running()
+{
+	/* measure delta in page scan count */
+	static uint64_t new_sp = 0;
+	static uint64_t old_sp = 0;
+	boolean_t res = B_FALSE;
+
+	if (get_globally_scanned_pages(&new_sp) == 0) {
+		if (old_sp != 0 && (new_sp - old_sp) > 0) {
+			debug("global memory pressure detected (%llu "
+			    "pages scanned since last interval)\n",
+			    (unsigned long long)(new_sp - old_sp));
+			res = B_TRUE;
+		}
+		old_sp = new_sp;
+	} else {
+		warn(gettext("unable to read cpu statistics"));
+		new_sp = old_sp;
+	}
+
+	return (res);
+}
+
+/*
+ * If soft caps are in use, determine if global memory pressure exceeds the
+ * configured maximum above which soft caps are enforced.
+ */
+static boolean_t
+must_enforce_soft_caps()
+{
+	/*
+	 * Check for changes to the amount of installed physical memory, to
+	 * compute the current memory pressure.
+	 */
+	update_phys_total();
+
+	memory_pressure = 100 - (int)((sysconf(_SC_AVPHYS_PAGES) * page_size_kb)
+	    * 100.0 / phys_total);
+	memory_pressure_sample++;
+	if (rcfg.rcfg_memory_cap_enforcement_pressure > 0 &&
+	    memory_pressure > rcfg.rcfg_memory_cap_enforcement_pressure) {
+		return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+/*
  * Update the shared statistics file with each collection's current statistics.
  * Return zero on success.
  */
@@ -973,6 +1272,26 @@ sum_excess_cb(lcollection_t *lcol, void *arg)
 	return (0);
 }
 
+/*
+ * Compute the quantity of memory (in kilobytes) above the cap enforcement
+ * pressure.  Set the scan goal to that quantity (or at most the excess).
+ */
+static void
+compute_soft_scan_goal(soft_scan_arg_t *argp)
+{
+	/*
+	 * Compute the sum of the collections' excesses, which will be the
+	 * denominator.
+	 */
+	argp->ssa_sum_excess = 0;
+	list_walk_collection(sum_excess_cb, &(argp->ssa_sum_excess));
+
+	argp->ssa_scan_goal = MIN((sysconf(_SC_PHYS_PAGES) *
+	    (100 - rcfg.rcfg_memory_cap_enforcement_pressure) / 100 -
+	    sysconf(_SC_AVPHYS_PAGES)) * page_size_kb,
+	    argp->ssa_sum_excess);
+}
+
 static void
 rcapd_usage(void)
 {
@@ -1017,6 +1336,112 @@ verify_and_set_privileges(void)
 	priv_freeset(required);
 }
 
+/*
+ * This function does the top-level work to determine if we should do any
+ * memory capping, and if so, it invokes the right call-backs to do the work.
+ */
+static void
+do_capping(hrtime_t now, hrtime_t *next_proc_walk)
+{
+	boolean_t enforce_caps;
+	/* soft cap enforcement flag, depending on memory pressure */
+	boolean_t enforce_soft_caps;
+	/* avoid interference with kernel's page scanner */
+	boolean_t global_scanner_running;
+	sample_col_arg_t col_arg;
+	soft_scan_arg_t arg;
+	uint_t col_types = 0;
+
+	/* check what kind of collections (project/zone) are capped */
+	list_walk_collection(col_type_cb, &col_types);
+	debug("collection types: 0x%x\n", col_types);
+
+	/* no capped collections, skip checking rss */
+	if (col_types == 0)
+		return;
+
+	/* Determine if soft caps are enforced. */
+	enforce_soft_caps = must_enforce_soft_caps();
+
+	/* Determine if the global page scanner is running. */
+	global_scanner_running = is_global_scanner_running();
+
+	/*
+	 * Sample collections' member processes RSSes and recompute
+	 * collections' excess.
+	 */
+	rss_sample(B_FALSE, col_types);
+
+	col_arg.sca_any_over_cap = B_FALSE;
+	col_arg.sca_project_over_cap = B_FALSE;
+	list_walk_collection(rss_sample_col_cb, &col_arg);
+	list_walk_collection(excess_print_cb, NULL);
+	debug("any collection/project over cap = %d, %d\n",
+	    col_arg.sca_any_over_cap, col_arg.sca_project_over_cap);
+
+	if (enforce_soft_caps)
+		debug("memory pressure %d%%\n", memory_pressure);
+
+	/*
+	 * Cap enforcement is determined by the previous conditions.
+	 */
+	enforce_caps = !global_scanner_running && col_arg.sca_any_over_cap &&
+	    (rcfg.rcfg_memory_cap_enforcement_pressure == 0 ||
+	    enforce_soft_caps);
+
+	debug("%senforcing caps\n", enforce_caps ? "" : "not ");
+
+	/*
+	 * If soft caps are in use, determine the size of the portion from each
+	 * collection to scan for.
+	 */
+	if (enforce_caps && enforce_soft_caps)
+		compute_soft_scan_goal(&arg);
+
+	/*
+	 * Victimize offending collections.
+	 */
+	if (enforce_caps && (!enforce_soft_caps ||
+	    (arg.ssa_scan_goal > 0 && arg.ssa_sum_excess > 0))) {
+
+		/*
+		 * Since at least one collection is over its cap & needs
+		 * enforcing, check if it is at least time for a process walk
+		 * (we could be well past time since we only walk /proc when
+		 * we need to) and if so, update each collections process list
+		 * in a single pass through /proc.
+		 */
+		if (EVENT_TIME(now, *next_proc_walk)) {
+			debug("scanning process list...\n");
+			proc_walk_all(proc_cb);		 /* insert & mark */
+			list_walk_all(sweep_process_cb); /* free dead procs */
+			*next_proc_walk = NEXT_EVENT_TIME(now,
+			    rcfg.rcfg_proc_walk_interval);
+		}
+
+		gz_col = NULL;
+		if (enforce_soft_caps) {
+			debug("scan goal is %lldKB\n",
+			    (long long)arg.ssa_scan_goal);
+			list_walk_collection(soft_scan_cb, &arg);
+			if (gz_capped && gz_col != NULL) {
+				/* process global zone */
+				arg.ssa_project_over_cap =
+				    col_arg.sca_project_over_cap;
+				soft_scan_gz(gz_col, &arg);
+			}
+		} else {
+			list_walk_collection(scan_cb, NULL);
+			if (gz_capped && gz_col != NULL) {
+				/* process global zone */
+				scan_gz(gz_col, col_arg.sca_project_over_cap);
+			}
+		}
+	} else if (col_arg.sca_any_over_cap) {
+		list_walk_collection(unenforced_cap_cb, NULL);
+	}
+}
+
 int
 main(int argc, char *argv[])
 {
@@ -1029,9 +1454,6 @@ main(int argc, char *argv[])
 	hrtime_t next_proc_walk;	/* time of next /proc scan */
 	hrtime_t next_configuration;	/* time of next configuration */
 	hrtime_t next_rss_sample;	/* (latest) time of next RSS sample */
-	int old_enforce_caps;		/* track changes in enforcement */
-					/* conditions */
-	soft_scan_arg_t arg;
 
 	(void) set_message_priority(RCM_INFO);
 	(void) setprogname("rcapd");
@@ -1125,13 +1547,6 @@ main(int argc, char *argv[])
 	next_configuration = NEXT_EVENT_TIME(gethrtime(),
 	    rcfg.rcfg_reconfiguration_interval);
 
-	if (rcfg.rcfg_memory_cap_enforcement_pressure == 0) {
-		/*
-		 * Always enforce caps when strict caps are used.
-		 */
-		enforce_caps = 1;
-	}
-
 	/*
 	 * Open the kstat chain.
 	 */
@@ -1158,6 +1573,9 @@ main(int argc, char *argv[])
 	else
 		debug("fd limit: unknown\n");
 
+	get_page_size();
+	my_zoneid = getzoneid();
+
 	/*
 	 * Handle those signals whose (default) exit disposition
 	 * prevents rcapd from finishing scanning before terminating.
@@ -1194,9 +1612,9 @@ main(int argc, char *argv[])
 
 	/*
 	 * Loop forever, monitoring collections' resident set sizes and
-	 * enforcing their caps.  Look for changes in caps and process
-	 * membership, as well as responding to requests to reread the
-	 * configuration.  Update per-collection statistics periodically.
+	 * enforcing their caps.  Look for changes in caps as well as
+	 * responding to requests to reread the configuration.  Update
+	 * per-collection statistics periodically.
 	 */
 	while (should_run != 0) {
 		struct timespec ts;
@@ -1210,9 +1628,10 @@ main(int argc, char *argv[])
 		}
 
 		/*
-		 * Update the process list once every proc_walk_interval.  The
-		 * condition of global memory pressure is also checked at the
-		 * same frequency, if strict caps are in use.
+		 * Check the configuration at every next_configuration interval.
+		 * Update the rss data once every next_rss_sample interval.
+		 * The condition of global memory pressure is also checked at
+		 * the same frequency, if strict caps are in use.
 		 */
 		now = gethrtime();
 
@@ -1222,178 +1641,16 @@ main(int argc, char *argv[])
 		 */
 		if (EVENT_TIME(now, next_configuration) ||
 		    should_reconfigure == 1) {
-			reconfigure();
-			next_configuration = NEXT_EVENT_TIME(now,
-			    rcfg.rcfg_reconfiguration_interval);
-
-			/*
-			 * Reset each event time to the shorter of the
-			 * previous and new intervals.
-			 */
-			if (next_report == 0 &&
-			    rcfg.rcfg_report_interval > 0)
-				next_report = now;
-			else
-				next_report = POSITIVE_MIN(next_report,
-				    NEXT_REPORT_EVENT_TIME(now,
-				    rcfg.rcfg_report_interval));
-			if (next_proc_walk == 0 &&
-			    rcfg.rcfg_proc_walk_interval > 0)
-				next_proc_walk = now;
-			else
-				next_proc_walk = POSITIVE_MIN(next_proc_walk,
-				    NEXT_EVENT_TIME(now,
-				    rcfg.rcfg_proc_walk_interval));
-			if (next_rss_sample == 0 &&
-			    rcfg.rcfg_rss_sample_interval > 0)
-				next_rss_sample = now;
-			else
-				next_rss_sample = POSITIVE_MIN(next_rss_sample,
-				    NEXT_EVENT_TIME(now,
-				    rcfg.rcfg_rss_sample_interval));
-
+			reconfigure(now, &next_configuration, &next_proc_walk,
+			    &next_rss_sample);
 			should_reconfigure = 0;
-			continue;
-		}
-
-		if (EVENT_TIME(now, next_proc_walk)) {
-			debug("scanning process list...\n");
-			proc_walk_all(proc_cb); /* mark */
-			list_walk_all(sweep_process_cb);
-			next_proc_walk = NEXT_EVENT_TIME(now,
-			    rcfg.rcfg_proc_walk_interval);
 		}
 
+		/*
+		 * Do the main work for enforcing caps.
+		 */
 		if (EVENT_TIME(now, next_rss_sample)) {
-			/*
-			 * Check for changes to the amount of installed
-			 * physical memory, to compute the current memory
-			 * pressure.
-			 */
-			update_phys_total();
-
-			/*
-			 * If soft caps are in use, determine if global memory
-			 * pressure exceeds the configured maximum above which
-			 * soft caps are enforced.
-			 */
-			memory_pressure = 100 -
-			    (int)((sysconf(_SC_AVPHYS_PAGES) *
-			    (sysconf(_SC_PAGESIZE) / 1024)) * 100.0 /
-			    phys_total);
-			memory_pressure_sample++;
-			if (rcfg.rcfg_memory_cap_enforcement_pressure > 0) {
-				if (memory_pressure >
-				    rcfg.rcfg_memory_cap_enforcement_pressure) {
-					if (enforce_soft_caps == 0) {
-						debug("memory pressure %d%%\n",
-						    memory_pressure);
-						enforce_soft_caps = 1;
-					}
-				} else {
-					if (enforce_soft_caps == 1)
-						enforce_soft_caps = 0;
-				}
-			}
-
-			/*
-			 * Determine if the global page scanner is running,
-			 * while which no memory caps should be enforced, to
-			 * prevent interference with the global page scanner.
-			 */
-			if (get_globally_scanned_pages(&new_sp) == 0) {
-				if (old_sp == 0)
-					/*EMPTY*/
-					;
-				else if ((new_sp - old_sp) > 0) {
-					if (global_scanner_running == 0) {
-						debug("global memory pressure "
-						    "detected (%llu pages "
-						    "scanned since last "
-						    "interval)\n",
-						    (unsigned long long)
-						    (new_sp - old_sp));
-						global_scanner_running = 1;
-					}
-				} else if (global_scanner_running == 1) {
-					debug("global memory pressure "
-					    "relieved\n");
-					global_scanner_running = 0;
-				}
-				old_sp = new_sp;
-			} else {
-				warn(gettext("kstat_read() failed"));
-				new_sp = old_sp;
-			}
-
-			/*
-			 * Cap enforcement is determined by the previous two
-			 * conditions.
-			 */
-			old_enforce_caps = enforce_caps;
-			enforce_caps =
-			    (rcfg.rcfg_memory_cap_enforcement_pressure ==
-			    0 || enforce_soft_caps == 1) &&
-			    !global_scanner_running;
-			if (old_enforce_caps != enforce_caps)
-				debug("%senforcing caps\n", enforce_caps == 0 ?
-				    "not " : "");
-
-			/*
-			 * Sample collections' member processes' RSSes and
-			 * recompute collections' excess.
-			 */
-			list_walk_all(mem_sample_cb);
-			list_walk_collection(collection_zero_mem_cb, NULL);
-			list_walk_all(memory_all_cb);
-			list_walk_collection(rss_sample_col_cb, NULL);
-			if (rcfg.rcfg_memory_cap_enforcement_pressure > 0)
-				debug("memory pressure %d%%\n",
-				    memory_pressure);
-			list_walk_collection(excess_print_cb, NULL);
-
-			/*
-			 * If soft caps are in use, determine the size of the
-			 * portion from each collection to scan for.
-			 */
-			if (enforce_soft_caps == 1) {
-				/*
-				 * Compute the sum of the collections'
-				 * excesses, which will be the denominator.
-				 */
-				arg.ssa_sum_excess = 0;
-				list_walk_collection(sum_excess_cb,
-				    &arg.ssa_sum_excess);
-
-				/*
-				 * Compute the quantity of memory (in
-				 * kilobytes) above the cap enforcement
-				 * pressure.  Set the scan goal to that
-				 * quantity (or at most the excess).
-				 */
-				arg.ssa_scan_goal = MIN((
-				    sysconf(_SC_PHYS_PAGES) * (100 -
-				    rcfg.rcfg_memory_cap_enforcement_pressure)
-				    / 100 - sysconf(_SC_AVPHYS_PAGES)) *
-				    (sysconf(_SC_PAGESIZE) / 1024),
-				    arg.ssa_sum_excess);
-			}
-
-			/*
-			 * Victimize offending collections.
-			 */
-			if (enforce_caps == 1 && ((enforce_soft_caps == 1 &&
-			    arg.ssa_scan_goal > 0 && arg.ssa_sum_excess > 0) ||
-			    (enforce_soft_caps == 0)))
-				if (enforce_soft_caps == 1) {
-					debug("scan goal is %lldKB\n",
-					    (long long)arg.ssa_scan_goal);
-					list_walk_collection(soft_scan_cb,
-					    &arg);
-				} else
-					list_walk_collection(scan_cb, NULL);
-			else
-				list_walk_collection(unenforced_cap_cb, NULL);
+			do_capping(now, &next_proc_walk);
 
 			next_rss_sample = NEXT_EVENT_TIME(now,
 			    rcfg.rcfg_rss_sample_interval);
@@ -1409,7 +1666,6 @@ main(int argc, char *argv[])
 		 */
 		now = gethrtime();
 		next = next_configuration;
-		next = POSITIVE_MIN(next, next_proc_walk);
 		next = POSITIVE_MIN(next, next_report);
 		next = POSITIVE_MIN(next, next_rss_sample);
 		if (next > now && should_run != 0) {
diff --git a/usr/src/cmd/rcap/rcapd/rcapd_scanner.c b/usr/src/cmd/rcap/rcapd/rcapd_scanner.c
index 15c503d1b4..b39811b552 100644
--- a/usr/src/cmd/rcap/rcapd/rcapd_scanner.c
+++ b/usr/src/cmd/rcap/rcapd/rcapd_scanner.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -104,7 +103,8 @@ st_debug(st_debug_level_t level, lcollection_t *lcol, char *msg, ...)
 	buf = malloc(len);
 	if (buf == NULL)
 		return;
-	(void) snprintf(buf, len, "%s %s scanner %s", rcfg.rcfg_mode_name,
+	(void) snprintf(buf, len, "%s %s scanner %s",
+	    (lcol->lcol_id.rcid_type == RCIDT_PROJECT ? "project" : "zone"),
 	    lcol->lcol_name, msg);
 
 	va_start(alist, msg);
@@ -471,6 +471,7 @@ merge_current_pagedata(lprocess_t *lpc,
 {
 	prpageheader_t *pghp;
 	int mappings_changed = 0;
+	uint64_t cnt;
 
 	if (lpc->lpc_pgdata_fd < 0 || get_pagedata(&pghp, lpc->lpc_pgdata_fd) !=
 	    0) {
@@ -485,9 +486,12 @@ merge_current_pagedata(lprocess_t *lpc,
 		debug("starting/resuming pagedata collection for %d\n",
 		    (int)lpc->lpc_pid);
 	}
-	debug("process %d: %llu/%llukB r/m'd since last read\n",
-	    (int)lpc->lpc_pid, (unsigned long long)count_pages(pghp, 0,
-	    PG_MODIFIED | PG_REFERENCED, 0), (unsigned long long)lpc->lpc_rss);
+
+	cnt = count_pages(pghp, 0, PG_MODIFIED | PG_REFERENCED, 0);
+	if (cnt != 0 || lpc->lpc_rss != 0)
+		debug("process %d: %llu/%llukB rfd/mdfd since last read\n",
+		    (int)lpc->lpc_pid, (unsigned long long)cnt,
+		    (unsigned long long)lpc->lpc_rss);
 	if (lpc->lpc_prpageheader != NULL) {
 		/*
 		 * OR the two snapshots.
@@ -519,10 +523,12 @@ merge_current_pagedata(lprocess_t *lpc,
 	} else
 		mappings_changed = 1;
 	lpc->lpc_prpageheader = pghp;
-	debug("process %d: %llu/%llukB r/m'd since hand swept\n",
-	    (int)lpc->lpc_pid, (unsigned long long)count_pages(pghp, 0,
-	    PG_MODIFIED | PG_REFERENCED, 0),
-	    (unsigned long long)lpc->lpc_rss);
+
+	cnt = count_pages(pghp, 0, PG_MODIFIED | PG_REFERENCED, 0);
+	if (cnt != 0 || lpc->lpc_rss != 0)
+		debug("process %d: %llu/%llukB rfd/mdfd since hand swept\n",
+		    (int)lpc->lpc_pid, (unsigned long long)cnt,
+		    (unsigned long long)lpc->lpc_rss);
 	if (mappings_changed != 0) {
 		debug("process %d: mappings changed\n", (int)lpc->lpc_pid);
 		if (mappings_changed_cb != NULL)
@@ -589,7 +595,6 @@ rss_delta(psinfo_t *new_psinfo, psinfo_t *old_psinfo, lprocess_t *vic)
 static void
 unignore_mappings(lprocess_t *lpc)
 {
-	debug("clearing ignored set\n");
 	lmapping_free(&lpc->lpc_ignore);
 }
 
diff --git a/usr/src/cmd/rcap/rcapstat/Makefile b/usr/src/cmd/rcap/rcapstat/Makefile
index 47b9bcfb71..fb436f5684 100644
--- a/usr/src/cmd/rcap/rcapstat/Makefile
+++ b/usr/src/cmd/rcap/rcapstat/Makefile
@@ -2,9 +2,8 @@
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License").  You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
 # CDDL HEADER END
 #
 #
-# Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 #ident	"%Z%%M%	%I%	%E% SMI"
@@ -39,7 +38,7 @@ LINTSRCS =	$(COMMON_DIR)/utils.c \
 
 $(NOT_RELEASE_BUILD)CPPFLAGS 	+= -DDEBUG
 CPPFLAGS 			+= -I$(COMMON_DIR)
-LDLIBS 				+= -lumem -ll
+LDLIBS 				+= -lumem -ll -lzonecfg
 
 LINTFLAGS			+= $(LDLIBS) -mnu
 
diff --git a/usr/src/cmd/rcap/rcapstat/rcapstat.c b/usr/src/cmd/rcap/rcapstat/rcapstat.c
index 722502d05d..47eca3f2fa 100644
--- a/usr/src/cmd/rcap/rcapstat/rcapstat.c
+++ b/usr/src/cmd/rcap/rcapstat/rcapstat.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -77,7 +76,8 @@ col_find(rcid_t id)
 {
 	col_t *col;
 	for (col = col_head; col != NULL; col = col->col_next)
-		if (col->col_id == id)
+		if (col->col_id.rcid_type == id.rcid_type &&
+		    col->col_id.rcid_val == id.rcid_val)
 			return (col);
 	return (NULL);
 }
@@ -119,7 +119,7 @@ static void
 usage()
 {
 	(void) fprintf(stderr,
-	    gettext("usage: rcapstat [-g] [interval [count]]\n"));
+	    gettext("usage: rcapstat [-g] [-p | -z] [interval [count]]\n"));
 	exit(E_USAGE);
 }
 
@@ -139,12 +139,12 @@ format_size(char *str, uint64_t size, int length)
 }
 
 static int
-read_stats()
+read_stats(rcid_type_t stat_type)
 {
 	int fd;
 	int proc_fd;
 	char procfile[20];
-	pid_t pid;
+	uint64_t pid;
 	col_t *col, *col_next;
 	lcollection_report_t report;
 	struct stat st;
@@ -169,7 +169,7 @@ read_stats()
 	 * Check if rcapd is running
 	 */
 	pid = hdr.rs_pid;
-	(void) snprintf(procfile, 20, "/proc/%ld/psinfo", pid);
+	(void) snprintf(procfile, 20, "/proc/%lld/psinfo", pid);
 	if ((proc_fd = open(procfile, O_RDONLY)) < 0) {
 		warn(gettext("rcapd is not active\n"));
 		(void) close(fd);
@@ -185,6 +185,9 @@ read_stats()
 	}
 
 	while (read(fd, &report, sizeof (report)) == sizeof (report)) {
+		if (report.lcol_id.rcid_type != stat_type)
+			continue;
+
 		col = col_find(report.lcol_id);
 		if (col == NULL) {
 			col = col_insert(report.lcol_id);
@@ -291,12 +294,13 @@ print_unformatted_stats(void)
 }
 
 static void
-print_stats()
+print_stats(rcid_type_t stat_type)
 {
 	col_t *col;
 	char size[6];
 	char limit[6];
 	char rss[6];
+	char nproc[6];
 	char paged_att[6];
 	char paged_eff[6];
 	char paged_att_avg[6];
@@ -310,12 +314,21 @@ print_stats()
 	 */
 	if (count == 0 || ncol != 1)
 		(void) printf("%6s %-15s %5s %5s %5s %5s %5s %5s %5s %5s\n",
-		    "id", mode, "nproc", "vm", "rss", "cap",
+		    "id", (stat_type == RCIDT_PROJECT ?  "project" : "zone"),
+		    "nproc", "vm", "rss", "cap",
 		    "at", "avgat", "pg", "avgpg");
 	if (++count >= 20 || (count >= 10 && global != 0) || ncol != 1)
 		count = 0;
 
 	for (col = col_head; col != NULL; col = col->col_next) {
+		if (col->col_id.rcid_type != stat_type)
+			continue;
+
+		if (col->col_paged_att == 0)
+			strlcpy(nproc, "-", sizeof (nproc));
+		else
+			(void) snprintf(nproc, sizeof (nproc), "%lld",
+			    col->col_nproc);
 		format_size(size, col->col_vmsize, 6);
 		format_size(rss, col->col_rsssize, 6);
 		format_size(limit, col->col_rsslimit, 6);
@@ -323,8 +336,9 @@ print_stats()
 		format_size(paged_eff, col->col_paged_eff, 6);
 		format_size(paged_att_avg, col->col_paged_att_avg, 6);
 		format_size(paged_eff_avg, col->col_paged_eff_avg, 6);
-		(void) printf("%6lld %-15s %5lld %5s %5s %5s %5s %5s %5s %5s\n",
-		    (long long)col->col_id, col->col_name, col->col_nproc,
+		(void) printf("%6lld %-15s %5s %5s %5s %5s %5s %5s %5s %5s\n",
+		    col->col_id.rcid_val, col->col_name,
+		    nproc,
 		    size, rss, limit,
 		    paged_att, paged_att_avg,
 		    paged_eff, paged_eff_avg);
@@ -342,20 +356,32 @@ main(int argc, char *argv[])
 	int count;
 	int always = 1;
 	int opt;
+	int projects = 0;
+	int zones = 0;
+	/* project reporting is the default if no option is specified */
+	rcid_type_t stat_type = RCIDT_PROJECT;
 
 	(void) setlocale(LC_ALL, "");
 	(void) textdomain(TEXT_DOMAIN);
 	(void) setprogname("rcapstat");
 
 	global = unformatted = 0;
-	while ((opt = getopt(argc, argv, "gu")) != (int)EOF) {
+	while ((opt = getopt(argc, argv, "gpuz")) != (int)EOF) {
 		switch (opt) {
 		case 'g':
 			global = 1;
 			break;
+		case 'p':
+			projects = 1;
+			stat_type = RCIDT_PROJECT;
+			break;
 		case 'u':
 			unformatted = 1;
 			break;
+		case 'z':
+			stat_type = RCIDT_ZONE;
+			zones = 1;
+			break;
 		default:
 			usage();
 		}
@@ -369,22 +395,22 @@ main(int argc, char *argv[])
 			die(gettext("invalid count specified\n"));
 		always = 0;
 	}
-	if (argc > optind)
+	if (argc > optind || (projects > 0 && zones > 0))
 		usage();
 
 	while (always || count-- > 0) {
-		if (read_stats() != E_SUCCESS)
+		if (read_stats(stat_type) != E_SUCCESS)
 			return (E_ERROR);
 		if (!unformatted) {
-			print_stats();
-			fflush(stdout);
+			print_stats(stat_type);
+			(void) fflush(stdout);
 			if (count || always)
 				(void) sleep(interval);
 		} else {
 			struct stat st;
 
 			print_unformatted_stats();
-			fflush(stdout);
+			(void) fflush(stdout);
 			while (stat(STAT_FILE_DEFAULT, &st) == 0 &&
 			    st.st_mtime == stat_mod)
 				usleep((useconds_t)(0.2 * MICROSEC));
diff --git a/usr/src/cmd/truss/print.c b/usr/src/cmd/truss/print.c
index 92739f2b1e..4dc70b0d37 100644
--- a/usr/src/cmd/truss/print.c
+++ b/usr/src/cmd/truss/print.c
@@ -2325,6 +2325,7 @@ prt_zga(private_t *pri, int raw, long val)
 		case ZONE_ATTR_INITNAME:	s = "ZONE_ATTR_INITNAME"; break;
 		case ZONE_ATTR_BOOTARGS:	s = "ZONE_ATTR_BOOTARGS"; break;
 		case ZONE_ATTR_BRAND:	s = "ZONE_ATTR_BRAND"; break;
+		case ZONE_ATTR_PHYS_MCAP: s = "ZONE_ATTR_PHYS_MCAP"; break;
 		}
 	}
 
diff --git a/usr/src/cmd/truss/systable.c b/usr/src/cmd/truss/systable.c
index 695d0e28c2..f46e028bf5 100644
--- a/usr/src/cmd/truss/systable.c
+++ b/usr/src/cmd/truss/systable.c
@@ -404,7 +404,7 @@ const struct systable systable[] = {
 {"kaio",	7, DEC, NOV, AIO, HEX, HEX, HEX, HEX, HEX, HEX}, /* 178 */
 {"cpc",		5, DEC, NOV, CPC, DEC, HEX, HEX, HEX},		/* 179 */
 {"lgrpsys",	3, DEC, NOV, DEC, DEC, HEX},			/* 180 */
-{"rusagesys",	2, DEC, NOV, DEC, HEX},				/* 181 */
+{"rusagesys",	5, DEC, NOV, DEC, HEX, DEC, HEX, HEX},		/* 181 */
 {"portfs",	6, HEX, HEX, DEC, HEX, HEX, HEX, HEX, HEX},	/* 182 */
 {"pollsys",	4, DEC, NOV, HEX, DEC, HEX, HEX},		/* 183 */
 {"labelsys",	2, DEC, NOV, DEC, HEX},				/* 184 */
@@ -761,6 +761,7 @@ static	const	struct systable rusagesystable[] = {
 {"getrusage",		2, DEC, NOV, HID, HEX},			/* 0 */
 {"getrusage_chld",	2, DEC, NOV, HID, HEX},			/* 1 */
 {"getrusage_lwp",	2, DEC, NOV, HID, HEX},			/* 2 */
+{"getvmusage",		5, DEC, NOV, HID, HEX, DEC, HEX, HEX},	/* 3 */
 };
 #define	NRUSAGESYSCODE \
 		(sizeof (rusagesystable) / sizeof (struct systable))
@@ -942,6 +943,7 @@ const	struct sysalias sysalias[] = {
 	{ "getrusage",		SYS_rusagesys	},
 	{ "getrusage_chld",	SYS_rusagesys	},
 	{ "getrusage_lwp",	SYS_rusagesys	},
+	{ "getvmusage",		SYS_rusagesys	},
 	{ "getpeerucred",	SYS_ucredsys	},
 	{ "ucred_get",		SYS_ucredsys	},
 	{ "port_create",	SYS_port	},
@@ -956,6 +958,7 @@ const	struct sysalias sysalias[] = {
 	{ "zone_create",	SYS_zone	},
 	{ "zone_destroy",	SYS_zone	},
 	{ "zone_getattr",	SYS_zone	},
+	{ "zone_setattr",	SYS_zone	},
 	{ "zone_enter",		SYS_zone	},
 	{ "getzoneid",		SYS_zone	},
 	{ "zone_list",		SYS_zone	},
diff --git a/usr/src/cmd/zoneadm/Makefile b/usr/src/cmd/zoneadm/Makefile
index 4d0f91a6f3..e11609c6dd 100644
--- a/usr/src/cmd/zoneadm/Makefile
+++ b/usr/src/cmd/zoneadm/Makefile
@@ -27,8 +27,8 @@
 #
 
 PROG=		zoneadm
-MANIFEST=	zones.xml
-SVCMETHOD=	svc-zones
+MANIFEST=	zones.xml resource-mgmt.xml
+SVCMETHOD=	svc-zones svc-resource-mgmt
 
 include ../Makefile.cmd
 
diff --git a/usr/src/cmd/zoneadm/resource-mgmt.xml b/usr/src/cmd/zoneadm/resource-mgmt.xml
new file mode 100644
index 0000000000..264f26733f
--- /dev/null
+++ b/usr/src/cmd/zoneadm/resource-mgmt.xml
@@ -0,0 +1,116 @@
+<?xml version="1.0"?>
+<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1">
+<!--
+ Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ Use is subject to license terms.
+
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+    ident	"%Z%%M%	%I%	%E% SMI"
+
+    NOTE:  This service manifest is not editable; its contents will
+    be overwritten by package or patch operations, including
+    operating system upgrade.  Make customizations in a different
+    file.
+-->
+
+<service_bundle type='manifest' name='SUNWzoner:zones'>
+
+<!--
+	This service applies global zone resource management settings
+	at system startup.
+-->
+<service
+	name='system/resource-mgmt'
+	type='service'
+	version='1'>
+
+	<create_default_instance enabled='true' />
+
+	<single_instance />
+
+	<dependency
+		name='usr'
+		type='service'
+		grouping='require_all'
+		restart_on='none'>
+		<service_fmri value='svc:/system/filesystem/minimal' />
+	</dependency>
+
+	<dependency
+		name='scheduler'
+		type='service'
+		grouping='optional_all'
+		restart_on='none'>
+		<service_fmri value='svc:/system/scheduler' />
+	</dependency>
+
+	<dependency
+		name='pools'
+		type='service'
+		grouping='optional_all'
+		restart_on='none'>
+		<service_fmri value='svc:/system/pools' />
+	</dependency>
+
+	<dependent
+		name='rcap'
+		grouping='optional_all'
+		restart_on='none'>
+		<service_fmri value='svc:/system/rcap' />
+	</dependent>
+
+	<exec_method
+		type='method'
+		name='start'
+		exec='/lib/svc/method/svc-resource-mgmt %m'
+		timeout_seconds='60'>
+	</exec_method>
+
+	<exec_method
+		type='method'
+		name='stop'
+		exec=':true'
+		timeout_seconds='3'>
+	</exec_method>
+
+	<property_group name='startd' type='framework'>
+		<propval name='duration' type='astring' value='transient' />
+	</property_group>
+
+	<stability value='Unstable' />
+
+	<template>
+		<common_name>
+			<loctext xml:lang='C'>
+			Global zone resource management settings
+			</loctext>
+		</common_name>
+		<documentation>
+			<manpage title='zones' section='5' manpath='/usr/share/man' />
+			<manpage
+				title='zonecfg'
+				section='1M'
+				manpath='/usr/share/man' />
+		</documentation>
+	</template>
+</service>
+
+</service_bundle>
diff --git a/usr/src/cmd/zoneadm/svc-resource-mgmt b/usr/src/cmd/zoneadm/svc-resource-mgmt
new file mode 100644
index 0000000000..762de4c0d8
--- /dev/null
+++ b/usr/src/cmd/zoneadm/svc-resource-mgmt
@@ -0,0 +1,54 @@
+#!/sbin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+
+# ident	"%Z%%M%	%I%	%E% SMI"
+#
+
+. /lib/svc/share/smf_include.sh
+
+[ ! -f /etc/zones/global.xml ] && exit $SMF_EXIT_OK	# No global zone
+							# resource mgmt.
+							# configuration
+
+[ ! -x /usr/sbin/zoneadm ] && exit $SMF_EXIT_OK # SUNWzoneu not installed
+
+# Make sure working directory is / to prevent unmounting problems.
+cd /
+PATH=/usr/sbin:/usr/bin; export PATH
+
+case "$1" in
+'start')
+	zoneadm -z global apply
+	if [ $? -ne 0 ]; then
+                exit $SMF_EXIT_ERR_FATAL
+        fi
+	;;
+
+*)
+	echo "Usage: $0 start"
+	exit $SMF_EXIT_ERR_FATAL
+	;;
+esac
+exit $SMF_EXIT_OK
diff --git a/usr/src/cmd/zoneadm/zoneadm.c b/usr/src/cmd/zoneadm/zoneadm.c
index bff26cd356..b7ae32b30d 100644
--- a/usr/src/cmd/zoneadm/zoneadm.c
+++ b/usr/src/cmd/zoneadm/zoneadm.c
@@ -74,9 +74,12 @@
 #include <fnmatch.h>
 #include <sys/modctl.h>
 #include <libbrand.h>
+#include <libscf.h>
 
 #include <pool.h>
 #include <sys/pool.h>
+#include <sys/priocntl.h>
+#include <sys/fsspriocntl.h>
 
 #include "zoneadm.h"
 
@@ -154,6 +157,7 @@ static int move_func(int argc, char *argv[]);
 static int detach_func(int argc, char *argv[]);
 static int attach_func(int argc, char *argv[]);
 static int mark_func(int argc, char *argv[]);
+static int apply_func(int argc, char *argv[]);
 static int sanity_check(char *zone, int cmd_num, boolean_t running,
     boolean_t unsafe_when_running, boolean_t force);
 static int cmd_match(char *cmd);
@@ -177,7 +181,8 @@ static struct cmd cmdtab[] = {
 	{ CMD_MOVE,		"move",		SHELP_MOVE,	move_func },
 	{ CMD_DETACH,		"detach",	SHELP_DETACH,	detach_func },
 	{ CMD_ATTACH,		"attach",	SHELP_ATTACH,	attach_func },
-	{ CMD_MARK,		"mark",		SHELP_MARK,	mark_func }
+	{ CMD_MARK,		"mark",		SHELP_MARK,	mark_func },
+	{ CMD_APPLY,		"apply",	NULL,		apply_func }
 };
 
 /* global variables */
@@ -1501,6 +1506,7 @@ boot_func(int argc, char *argv[])
 		zerror(gettext("call to %s failed"), "zoneadmd");
 		return (Z_ERR);
 	}
+
 	return (Z_OK);
 }
 
@@ -4355,15 +4361,22 @@ dev_fix(zone_dochandle_t handle)
 	zarg.cmd = Z_READY;
 	if (call_zoneadmd(target_zone, &zarg) != 0) {
 		zerror(gettext("call to %s failed"), "zoneadmd");
+		/* attempt to restore zone to configured state */
+		(void) zone_set_state(target_zone, ZONE_STATE_CONFIGURED);
 		return (Z_ERR);
 	}
 
 	zarg.cmd = Z_HALT;
 	if (call_zoneadmd(target_zone, &zarg) != 0) {
 		zerror(gettext("call to %s failed"), "zoneadmd");
+		/* attempt to restore zone to configured state */
+		(void) zone_set_state(target_zone, ZONE_STATE_CONFIGURED);
 		return (Z_ERR);
 	}
 
+	/* attempt to restore zone to configured state */
+	(void) zone_set_state(target_zone, ZONE_STATE_CONFIGURED);
+
 	if (zonecfg_setdevperment(handle) != Z_OK) {
 		(void) fprintf(stderr,
 		    gettext("unable to enumerate device entries\n"));
@@ -4845,6 +4858,177 @@ mark_func(int argc, char *argv[])
 	return (err);
 }
 
+/*
+ * Check what scheduling class we're running under and print a warning if
+ * we're not using FSS.
+ */
+static int
+check_sched_fss(zone_dochandle_t handle)
+{
+	char class_name[PC_CLNMSZ];
+
+	if (zonecfg_get_dflt_sched_class(handle, class_name,
+	    sizeof (class_name)) != Z_OK) {
+		zerror(gettext("WARNING: unable to determine the zone's "
+		    "scheduling class"));
+	} else if (strcmp("FSS", class_name) != 0) {
+		zerror(gettext("WARNING: The zone.cpu-shares rctl is set but\n"
+		    "FSS is not the default scheduling class for this zone.  "
+		    "FSS will be\nused for processes in the zone but to get "
+		    "the full benefit of FSS,\nit should be the default "
+		    "scheduling class.  See dispadmin(1M) for\nmore details."));
+		return (Z_SYSTEM);
+	}
+
+	return (Z_OK);
+}
+
+static int
+check_cpu_shares_sched(zone_dochandle_t handle)
+{
+	int err;
+	int res = Z_OK;
+	struct zone_rctltab rctl;
+
+	if ((err = zonecfg_setrctlent(handle)) != Z_OK) {
+		errno = err;
+		zperror(cmd_to_str(CMD_APPLY), B_TRUE);
+		return (err);
+	}
+
+	while (zonecfg_getrctlent(handle, &rctl) == Z_OK) {
+		if (strcmp(rctl.zone_rctl_name, "zone.cpu-shares") == 0) {
+			if (check_sched_fss(handle) != Z_OK)
+				res = Z_SYSTEM;
+			break;
+		}
+	}
+
+	(void) zonecfg_endrctlent(handle);
+
+	return (res);
+}
+
+/*
+ * This is an undocumented interface which is currently only used to apply
+ * the global zone resource management settings when the system boots.
+ * This function does not yet properly handle updating a running system so
+ * any projects running in the zone would be trashed if this function
+ * were to run after the zone had booted.  It also does not reset any
+ * rctl settings that were removed from zonecfg.  There is still work to be
+ * done before we can properly support dynamically updating the resource
+ * management settings for a running zone (global or non-global).  Thus, this
+ * functionality is undocumented for now.
+ */
+/* ARGSUSED */
+static int
+apply_func(int argc, char *argv[])
+{
+	int err;
+	int res = Z_OK;
+	priv_set_t *privset;
+	zoneid_t zoneid;
+	zone_dochandle_t handle;
+	struct zone_mcaptab mcap;
+	char pool_err[128];
+
+	zoneid = getzoneid();
+
+	if (zonecfg_in_alt_root() || zoneid != GLOBAL_ZONEID ||
+	    target_zone == NULL || strcmp(target_zone, GLOBAL_ZONENAME) != 0)
+		return (usage(B_FALSE));
+
+	if ((privset = priv_allocset()) == NULL) {
+		zerror(gettext("%s failed"), "priv_allocset");
+		return (Z_ERR);
+	}
+
+	if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
+		zerror(gettext("%s failed"), "getppriv");
+		priv_freeset(privset);
+		return (Z_ERR);
+	}
+
+	if (priv_isfullset(privset) == B_FALSE) {
+		(void) usage(B_FALSE);
+		priv_freeset(privset);
+		return (Z_ERR);
+	}
+	priv_freeset(privset);
+
+	if ((handle = zonecfg_init_handle()) == NULL) {
+		zperror(cmd_to_str(CMD_APPLY), B_TRUE);
+		return (Z_ERR);
+	}
+
+	if ((err = zonecfg_get_handle(target_zone, handle)) != Z_OK) {
+		errno = err;
+		zperror(cmd_to_str(CMD_APPLY), B_TRUE);
+		zonecfg_fini_handle(handle);
+		return (Z_ERR);
+	}
+
+	/* specific error msgs are printed within apply_rctls */
+	if ((err = zonecfg_apply_rctls(target_zone, handle)) != Z_OK) {
+		errno = err;
+		zperror(cmd_to_str(CMD_APPLY), B_TRUE);
+		res = Z_ERR;
+	}
+
+	if ((err = check_cpu_shares_sched(handle)) != Z_OK)
+		res = Z_ERR;
+
+	/*
+	 * The next two blocks of code attempt to set up temporary pools as
+	 * well as persistent pools.  In both cases we call the functions
+	 * unconditionally.  Within each funtion the code will check if the
+	 * zone is actually configured for a temporary pool or persistent pool
+	 * and just return if there is nothing to do.
+	 */
+	if ((err = zonecfg_bind_tmp_pool(handle, zoneid, pool_err,
+	    sizeof (pool_err))) != Z_OK) {
+		if (err == Z_POOL || err == Z_POOL_CREATE || err == Z_POOL_BIND)
+			zerror("%s: %s", zonecfg_strerror(err), pool_err);
+		else
+			zerror(gettext("could not bind zone to temporary "
+			    "pool: %s"), zonecfg_strerror(err));
+		res = Z_ERR;
+	}
+
+	if ((err = zonecfg_bind_pool(handle, zoneid, pool_err,
+	    sizeof (pool_err))) != Z_OK) {
+		if (err == Z_POOL || err == Z_POOL_BIND)
+			zerror("%s: %s", zonecfg_strerror(err), pool_err);
+		else
+			zerror("%s", zonecfg_strerror(err));
+	}
+
+	/*
+	 * If a memory cap is configured, set the cap in the kernel using
+	 * zone_setattr() and make sure the rcapd SMF service is enabled.
+	 */
+	if (zonecfg_getmcapent(handle, &mcap) == Z_OK) {
+		uint64_t num;
+		char smf_err[128];
+
+		num = (uint64_t)strtoll(mcap.zone_physmem_cap, NULL, 10);
+		if (zone_setattr(zoneid, ZONE_ATTR_PHYS_MCAP, &num, 0) == -1) {
+			zerror(gettext("could not set zone memory cap"));
+			res = Z_ERR;
+		}
+
+		if (zonecfg_enable_rcapd(smf_err, sizeof (smf_err)) != Z_OK) {
+			zerror(gettext("enabling system/rcap service failed: "
+			    "%s"), smf_err);
+			res = Z_ERR;
+		}
+	}
+
+	zonecfg_fini_handle(handle);
+
+	return (res);
+}
+
 static int
 help_func(int argc, char *argv[])
 {
diff --git a/usr/src/cmd/zoneadm/zoneadm.h b/usr/src/cmd/zoneadm/zoneadm.h
index a94053e258..a299ece135 100644
--- a/usr/src/cmd/zoneadm/zoneadm.h
+++ b/usr/src/cmd/zoneadm/zoneadm.h
@@ -45,9 +45,10 @@
 #define	CMD_DETACH	13
 #define	CMD_ATTACH	14
 #define	CMD_MARK	15
+#define	CMD_APPLY	16
 
 #define	CMD_MIN		CMD_HELP
-#define	CMD_MAX		CMD_MARK
+#define	CMD_MAX		CMD_APPLY
 
 #if !defined(TEXT_DOMAIN)		/* should be defined by cc -D */
 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it wasn't */
diff --git a/usr/src/cmd/zoneadmd/Makefile b/usr/src/cmd/zoneadmd/Makefile
index 8b77f8234c..34914694a8 100644
--- a/usr/src/cmd/zoneadmd/Makefile
+++ b/usr/src/cmd/zoneadmd/Makefile
@@ -42,7 +42,7 @@ POFILES= $(OBJS:%.o=%.po)
 CFLAGS += $(CCVERBOSE)
 LAZYLIBS = $(ZLAZYLOAD) -ltsnet -ltsol $(ZNOLAZYLOAD)
 lint := LAZYLIBS = -ltsnet -ltsol
-LDLIBS += -lsocket -lzonecfg -lnsl -ldevinfo -ldevice -lnvpair -lpool \
+LDLIBS += -lsocket -lzonecfg -lnsl -ldevinfo -ldevice -lnvpair \
 	-lgen -lbsm -lcontract -lzfs -luuid -lbrand $(LAZYLIBS)
 XGETFLAGS += -a -x zoneadmd.xcl
 
diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c
index ca93b1c696..513921e5e2 100644
--- a/usr/src/cmd/zoneadmd/vplat.c
+++ b/usr/src/cmd/zoneadmd/vplat.c
@@ -106,6 +106,7 @@
 
 #include <pool.h>
 #include <sys/pool.h>
+#include <sys/priocntl.h>
 
 #include <libbrand.h>
 #include <sys/brand.h>
@@ -2661,27 +2662,6 @@ out:
 }
 
 static int
-get_zone_pool(zlog_t *zlogp, char *poolbuf, size_t bufsz)
-{
-	zone_dochandle_t handle;
-	int error;
-
-	if ((handle = zonecfg_init_handle()) == NULL) {
-		zerror(zlogp, B_TRUE, "getting zone configuration handle");
-		return (Z_NOMEM);
-	}
-	error = zonecfg_get_snapshot_handle(zone_name, handle);
-	if (error != Z_OK) {
-		zerror(zlogp, B_FALSE, "invalid configuration");
-		zonecfg_fini_handle(handle);
-		return (error);
-	}
-	error = zonecfg_get_pool(handle, poolbuf, bufsz);
-	zonecfg_fini_handle(handle);
-	return (error);
-}
-
-static int
 get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
 {
 	zone_dochandle_t handle;
@@ -2818,75 +2798,6 @@ validate_datasets(zlog_t *zlogp)
 	return (0);
 }
 
-static int
-bind_to_pool(zlog_t *zlogp, zoneid_t zoneid)
-{
-	pool_conf_t *poolconf;
-	pool_t *pool;
-	char poolname[MAXPATHLEN];
-	int status;
-	int error;
-
-	/*
-	 * Find the pool mentioned in the zone configuration, and bind to it.
-	 */
-	error = get_zone_pool(zlogp, poolname, sizeof (poolname));
-	if (error == Z_NO_ENTRY || (error == Z_OK && strlen(poolname) == 0)) {
-		/*
-		 * The property is not set on the zone, so the pool
-		 * should be bound to the default pool.  But that's
-		 * already done by the kernel, so we can just return.
-		 */
-		return (0);
-	}
-	if (error != Z_OK) {
-		/*
-		 * Not an error, even though it shouldn't be happening.
-		 */
-		zerror(zlogp, B_FALSE,
-		    "WARNING: unable to retrieve default pool.");
-		return (0);
-	}
-	/*
-	 * Don't do anything if pools aren't enabled.
-	 */
-	if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED) {
-		zerror(zlogp, B_FALSE, "WARNING: pools facility not active; "
-		    "zone will not be bound to pool '%s'.", poolname);
-		return (0);
-	}
-	/*
-	 * Try to provide a sane error message if the requested pool doesn't
-	 * exist.
-	 */
-	if ((poolconf = pool_conf_alloc()) == NULL) {
-		zerror(zlogp, B_FALSE, "%s failed", "pool_conf_alloc");
-		return (-1);
-	}
-	if (pool_conf_open(poolconf, pool_dynamic_location(), PO_RDONLY) !=
-	    PO_SUCCESS) {
-		zerror(zlogp, B_FALSE, "%s failed", "pool_conf_open");
-		pool_conf_free(poolconf);
-		return (-1);
-	}
-	pool = pool_get_pool(poolconf, poolname);
-	(void) pool_conf_close(poolconf);
-	pool_conf_free(poolconf);
-	if (pool == NULL) {
-		zerror(zlogp, B_FALSE, "WARNING: pool '%s' not found; "
-		    "using default pool.", poolname);
-		return (0);
-	}
-	/*
-	 * Bind the zone to the pool.
-	 */
-	if (pool_set_binding(poolname, P_ZONEID, zoneid) != PO_SUCCESS) {
-		zerror(zlogp, B_FALSE, "WARNING: unable to bind to pool '%s'; "
-		    "using default pool.", poolname);
-	}
-	return (0);
-}
-
 /*
  * Mount lower level home directories into/from current zone
  * Share exported directories specified in dfstab for zone
@@ -3482,6 +3393,149 @@ duplicate_reachable_path(zlog_t *zlogp, const char *rootpath)
 	return (B_FALSE);
 }
 
+/*
+ * Set memory cap and pool info for the zone's resource management
+ * configuration.
+ */
+static int
+setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
+{
+	int res;
+	uint64_t tmp;
+	struct zone_mcaptab mcap;
+	char sched[MAXNAMELEN];
+	zone_dochandle_t handle = NULL;
+	char pool_err[128];
+
+	if ((handle = zonecfg_init_handle()) == NULL) {
+		zerror(zlogp, B_TRUE, "getting zone configuration handle");
+		return (Z_BAD_HANDLE);
+	}
+
+	if ((res = zonecfg_get_snapshot_handle(zone_name, handle)) != Z_OK) {
+		zerror(zlogp, B_FALSE, "invalid configuration");
+		zonecfg_fini_handle(handle);
+		return (res);
+	}
+
+	/*
+	 * If a memory cap is configured, set the cap in the kernel using
+	 * zone_setattr() and make sure the rcapd SMF service is enabled.
+	 */
+	if (zonecfg_getmcapent(handle, &mcap) == Z_OK) {
+		uint64_t num;
+		char smf_err[128];
+
+		num = (uint64_t)strtoull(mcap.zone_physmem_cap, NULL, 10);
+		if (zone_setattr(zoneid, ZONE_ATTR_PHYS_MCAP, &num, 0) == -1) {
+			zerror(zlogp, B_TRUE, "could not set zone memory cap");
+			zonecfg_fini_handle(handle);
+			return (Z_INVAL);
+		}
+
+		if (zonecfg_enable_rcapd(smf_err, sizeof (smf_err)) != Z_OK) {
+			zerror(zlogp, B_FALSE, "enabling system/rcap service "
+			    "failed: %s", smf_err);
+			zonecfg_fini_handle(handle);
+			return (Z_INVAL);
+		}
+	}
+
+	/* Get the scheduling class set in the zone configuration. */
+	if (zonecfg_get_sched_class(handle, sched, sizeof (sched)) == Z_OK &&
+	    strlen(sched) > 0) {
+		if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, sched,
+		    strlen(sched)) == -1)
+			zerror(zlogp, B_TRUE, "WARNING: unable to set the "
+			    "default scheduling class");
+
+	} else if (zonecfg_get_aliased_rctl(handle, ALIAS_SHARES, &tmp)
+	    == Z_OK) {
+		/*
+		 * If the zone has the zone.cpu-shares rctl set then we want to
+		 * use the Fair Share Scheduler (FSS) for processes in the
+		 * zone.  Check what scheduling class the zone would be running
+		 * in by default so we can print a warning and modify the class
+		 * if we wouldn't be using FSS.
+		 */
+		char class_name[PC_CLNMSZ];
+
+		if (zonecfg_get_dflt_sched_class(handle, class_name,
+		    sizeof (class_name)) != Z_OK) {
+			zerror(zlogp, B_FALSE, "WARNING: unable to determine "
+			    "the zone's scheduling class");
+
+		} else if (strcmp("FSS", class_name) != 0) {
+			zerror(zlogp, B_FALSE, "WARNING: The zone.cpu-shares "
+			    "rctl is set but\nFSS is not the default "
+			    "scheduling class for\nthis zone.  FSS will be "
+			    "used for processes\nin the zone but to get the "
+			    "full benefit of FSS,\nit should be the default "
+			    "scheduling class.\nSee dispadmin(1M) for more "
+			    "details.");
+
+			if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, "FSS",
+			    strlen("FSS")) == -1)
+				zerror(zlogp, B_TRUE, "WARNING: unable to set "
+				    "zone scheduling class to FSS");
+		}
+	}
+
+	/*
+	 * The next few blocks of code attempt to set up temporary pools as
+	 * well as persistent pools.  In all cases we call the functions
+	 * unconditionally.  Within each funtion the code will check if the
+	 * zone is actually configured for a temporary pool or persistent pool
+	 * and just return if there is nothing to do.
+	 *
+	 * If we are rebooting we want to attempt to reuse any temporary pool
+	 * that was previously set up.  zonecfg_bind_tmp_pool() will do the
+	 * right thing in all cases (reuse or create) based on the current
+	 * zonecfg.
+	 */
+	if ((res = zonecfg_bind_tmp_pool(handle, zoneid, pool_err,
+	    sizeof (pool_err))) != Z_OK) {
+		if (res == Z_POOL || res == Z_POOL_CREATE || res == Z_POOL_BIND)
+			zerror(zlogp, B_FALSE, "%s: %s\ndedicated-cpu setting "
+			    "cannot be instantiated", zonecfg_strerror(res),
+			    pool_err);
+		else
+			zerror(zlogp, B_FALSE, "could not bind zone to "
+			    "temporary pool: %s", zonecfg_strerror(res));
+		zonecfg_fini_handle(handle);
+		return (Z_POOL_BIND);
+	}
+
+	/*
+	 * Check if we need to warn about poold not being enabled.
+	 */
+	if (zonecfg_warn_poold(handle)) {
+		zerror(zlogp, B_FALSE, "WARNING: A range of dedicated-cpus has "
+		    "been specified\nbut the dynamic pool service is not "
+		    "enabled.\nThe system will not dynamically adjust the\n"
+		    "processor allocation within the specified range\n"
+		    "until svc:/system/pools/dynamic is enabled.\n"
+		    "See poold(1M).");
+	}
+
+	/* The following is a warning, not an error. */
+	if ((res = zonecfg_bind_pool(handle, zoneid, pool_err,
+	    sizeof (pool_err))) != Z_OK) {
+		if (res == Z_POOL_BIND)
+			zerror(zlogp, B_FALSE, "WARNING: unable to bind to "
+			    "pool '%s'; using default pool.", pool_err);
+		else if (res == Z_POOL)
+			zerror(zlogp, B_FALSE, "WARNING: %s: %s",
+			    zonecfg_strerror(res), pool_err);
+		else
+			zerror(zlogp, B_FALSE, "WARNING: %s",
+			    zonecfg_strerror(res));
+	}
+
+	zonecfg_fini_handle(handle);
+	return (Z_OK);
+}
+
 zoneid_t
 vplat_create(zlog_t *zlogp, boolean_t mount_cmd)
 {
@@ -3668,14 +3722,18 @@ vplat_create(zlog_t *zlogp, boolean_t mount_cmd)
 	}
 
 	/*
-	 * The following is a warning, not an error, and is not performed when
-	 * merely mounting a zone for administrative use.
+	 * The following actions are not performed when merely mounting a zone
+	 * for administrative use.
 	 */
-	if (!mount_cmd && bind_to_pool(zlogp, zoneid) != 0)
-		zerror(zlogp, B_FALSE, "WARNING: unable to bind zone to "
-		    "requested pool; using default pool.");
-	if (!mount_cmd)
+	if (!mount_cmd) {
+		if (setup_zone_rm(zlogp, zone_name, zoneid) != Z_OK) {
+			(void) zone_shutdown(zoneid);
+			goto error;
+		}
+
 		set_mlps(zlogp, zoneid, zcent);
+	}
+
 	rval = zoneid;
 	zoneid = -1;
 
@@ -3878,10 +3936,12 @@ unmounted:
 }
 
 int
-vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd)
+vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
 {
 	char *kzone;
 	zoneid_t zoneid;
+	int res;
+	char pool_err[128];
 	char zroot[MAXPATHLEN];
 	char cmdbuf[MAXPATHLEN];
 	char brand[MAXNAMELEN];
@@ -3972,6 +4032,19 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd)
 		goto error;
 	}
 
+	/*
+	 * If we are rebooting then we don't want to destroy an existing
+	 * temporary pool at this point so that we can just reuse it when the
+	 * zone boots back up.
+	 */
+	if (!unmount_cmd && !rebooting) {
+		if ((res = zonecfg_destroy_tmp_pool(zone_name, pool_err,
+		    sizeof (pool_err))) != Z_OK) {
+			if (res == Z_POOL)
+				zerror(zlogp, B_FALSE, pool_err);
+		}
+	}
+
 	remove_mlps(zlogp, zoneid);
 
 	if (zone_destroy(zoneid) != 0) {
diff --git a/usr/src/cmd/zoneadmd/zoneadmd.c b/usr/src/cmd/zoneadmd/zoneadmd.c
index 313d24d95b..35206384b9 100644
--- a/usr/src/cmd/zoneadmd/zoneadmd.c
+++ b/usr/src/cmd/zoneadmd/zoneadmd.c
@@ -463,7 +463,7 @@ zone_ready(zlog_t *zlogp, boolean_t mount_cmd)
 	}
 	if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
 		bringup_failure_recovery = B_TRUE;
-		(void) vplat_teardown(NULL, mount_cmd);
+		(void) vplat_teardown(NULL, mount_cmd, B_FALSE);
 		if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
 			zerror(zlogp, B_FALSE, "destroying snapshot: %s",
 			    zonecfg_strerror(err));
@@ -738,11 +738,11 @@ zone_bootup(zlog_t *zlogp, const char *bootargs)
 }
 
 static int
-zone_halt(zlog_t *zlogp, boolean_t unmount_cmd)
+zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
 {
 	int err;
 
-	if (vplat_teardown(zlogp, unmount_cmd) != 0) {
+	if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
 		if (!bringup_failure_recovery)
 			zerror(zlogp, B_FALSE, "unable to destroy zone");
 		return (-1);
@@ -985,7 +985,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
 			audit_put_record(zlogp, uc, rval, "boot");
 			if (rval != 0) {
 				bringup_failure_recovery = B_TRUE;
-				(void) zone_halt(zlogp, B_FALSE);
+				(void) zone_halt(zlogp, B_FALSE, B_FALSE);
 				eventstream_write(Z_EVT_ZONE_BOOTFAILED);
 			}
 			break;
@@ -1094,7 +1094,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
 			audit_put_record(zlogp, uc, rval, "boot");
 			if (rval != 0) {
 				bringup_failure_recovery = B_TRUE;
-				(void) zone_halt(zlogp, B_FALSE);
+				(void) zone_halt(zlogp, B_FALSE, B_TRUE);
 				eventstream_write(Z_EVT_ZONE_BOOTFAILED);
 			}
 			boot_args[0] = '\0';
@@ -1102,7 +1102,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
 		case Z_HALT:
 			if (kernelcall)	/* Invalid; can't happen */
 				abort();
-			if ((rval = zone_halt(zlogp, B_FALSE)) != 0)
+			if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE)) != 0)
 				break;
 			eventstream_write(Z_EVT_ZONE_HALTED);
 			break;
@@ -1125,7 +1125,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
 		case Z_UNMOUNT:
 			if (kernelcall)	/* Invalid; can't happen */
 				abort();
-			rval = zone_halt(zlogp, B_TRUE);
+			rval = zone_halt(zlogp, B_TRUE, B_FALSE);
 			if (rval == 0) {
 				eventstream_write(Z_EVT_ZONE_HALTED);
 				(void) sema_post(&scratch_sem);
@@ -1147,7 +1147,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
 	case ZONE_STATE_DOWN:
 		switch (cmd) {
 		case Z_READY:
-			if ((rval = zone_halt(zlogp, B_FALSE)) != 0)
+			if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE)) != 0)
 				break;
 			if ((rval = zone_ready(zlogp, B_FALSE)) == 0)
 				eventstream_write(Z_EVT_ZONE_READIED);
@@ -1165,7 +1165,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
 			rval = 0;
 			break;
 		case Z_HALT:
-			if ((rval = zone_halt(zlogp, B_FALSE)) != 0)
+			if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE)) != 0)
 				break;
 			eventstream_write(Z_EVT_ZONE_HALTED);
 			break;
@@ -1173,7 +1173,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
 			(void) strlcpy(boot_args, zargp->bootbuf,
 			    sizeof (boot_args));
 			eventstream_write(Z_EVT_ZONE_REBOOTING);
-			if ((rval = zone_halt(zlogp, B_FALSE)) != 0) {
+			if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE)) != 0) {
 				eventstream_write(Z_EVT_ZONE_BOOTFAILED);
 				boot_args[0] = '\0';
 				break;
@@ -1186,7 +1186,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
 			rval = zone_bootup(zlogp, zargp->bootbuf);
 			audit_put_record(zlogp, uc, rval, "reboot");
 			if (rval != 0) {
-				(void) zone_halt(zlogp, B_FALSE);
+				(void) zone_halt(zlogp, B_FALSE, B_TRUE);
 				eventstream_write(Z_EVT_ZONE_BOOTFAILED);
 			}
 			boot_args[0] = '\0';
diff --git a/usr/src/cmd/zoneadmd/zoneadmd.h b/usr/src/cmd/zoneadmd/zoneadmd.h
index cfb90f93f3..a4aba27b5c 100644
--- a/usr/src/cmd/zoneadmd/zoneadmd.h
+++ b/usr/src/cmd/zoneadmd/zoneadmd.h
@@ -106,7 +106,7 @@ extern void eventstream_write(zone_evt_t evt);
  */
 extern zoneid_t vplat_create(zlog_t *, boolean_t);
 extern int vplat_bringup(zlog_t *, boolean_t, zoneid_t);
-extern int vplat_teardown(zlog_t *, boolean_t);
+extern int vplat_teardown(zlog_t *, boolean_t, boolean_t);
 
 /*
  * Console subsystem routines.
diff --git a/usr/src/cmd/zonecfg/zonecfg.c b/usr/src/cmd/zonecfg/zonecfg.c
index ea745cbb61..34d6b99480 100644
--- a/usr/src/cmd/zonecfg/zonecfg.c
+++ b/usr/src/cmd/zonecfg/zonecfg.c
@@ -101,6 +101,8 @@ extern int lex_lineno;
 #define	MAX_CMD_HIST	1024
 #define	MAX_CMD_LEN	1024
 
+#define	ONE_MB		1048576
+
 /*
  * Each SHELP_ should be a simple string.
  */
@@ -108,6 +110,7 @@ extern int lex_lineno;
 #define	SHELP_ADD	"add <resource-type>\n\t(global scope)\n" \
 	"add <property-name> <property-value>\n\t(resource scope)"
 #define	SHELP_CANCEL	"cancel"
+#define	SHELP_CLEAR	"clear <property-name>"
 #define	SHELP_COMMIT	"commit"
 #define	SHELP_CREATE	"create [-F] [ -a <path> | -b | -t <template> ]"
 #define	SHELP_DELETE	"delete [-F]"
@@ -116,9 +119,11 @@ extern int lex_lineno;
 #define	SHELP_EXPORT	"export [-f output-file]"
 #define	SHELP_HELP	"help [commands] [syntax] [usage] [<command-name>]"
 #define	SHELP_INFO	"info [<resource-type> [property-name=property-value]*]"
-#define	SHELP_REMOVE	"remove <resource-type> { <property-name>=<property-" \
-	"value> }\n\t(global scope)\nremove <property-name> <property-value>" \
-	"\n\t(resource scope)"
+#define	SHELP_REMOVE	"remove [-F] <resource-type> " \
+	"[ <property-name>=<property-value> ]*\n" \
+	"\t(global scope)\n" \
+	"remove <property-name> <property-value>\n" \
+	"\t(resource scope)"
 #define	SHELP_REVERT	"revert [-F]"
 #define	SHELP_SELECT	"select <resource-type> { <property-name>=" \
 	"<property-value> }"
@@ -128,6 +133,7 @@ extern int lex_lineno;
 static struct help helptab[] = {
 	{ CMD_ADD,	"add",		HELP_RES_PROPS,	SHELP_ADD, },
 	{ CMD_CANCEL,	"cancel",	0,		SHELP_CANCEL, },
+	{ CMD_CLEAR,	"clear",	HELP_PROPS,	SHELP_CLEAR, },
 	{ CMD_COMMIT,	"commit",	0,		SHELP_COMMIT, },
 	{ CMD_CREATE,	"create",	0,		SHELP_CREATE, },
 	{ CMD_DELETE,	"delete",	0,		SHELP_DELETE, },
@@ -163,6 +169,15 @@ static char *res_types[] = {
 	"limitpriv",
 	"bootargs",
 	"brand",
+	"dedicated-cpu",
+	"capped-memory",
+	ALIAS_MAXLWPS,
+	ALIAS_MAXSHMMEM,
+	ALIAS_MAXSHMIDS,
+	ALIAS_MAXMSGIDS,
+	ALIAS_MAXSEMIDS,
+	ALIAS_SHARES,
+	"scheduling-class",
 	NULL
 };
 
@@ -189,6 +204,19 @@ static char *prop_types[] = {
 	"limitpriv",
 	"bootargs",
 	"brand",
+	"ncpus",
+	"importance",
+	"swap",
+	"locked",
+	ALIAS_SHARES,
+	ALIAS_MAXLWPS,
+	ALIAS_MAXSHMMEM,
+	ALIAS_MAXSHMIDS,
+	ALIAS_MAXMSGIDS,
+	ALIAS_MAXSEMIDS,
+	ALIAS_MAXLOCKEDMEM,
+	ALIAS_MAXSWAP,
+	"scheduling-class",
 	NULL
 };
 
@@ -205,11 +233,12 @@ static char *prop_val_types[] = {
 
 /*
  * remove has a space afterwards because it has qualifiers; the other commands
- * that have qualifiers (add, select and set) don't need a space here because
+ * that have qualifiers (add, select, etc.) don't need a space here because
  * they have their own _cmds[] lists below.
  */
 static const char *global_scope_cmds[] = {
 	"add",
+	"clear",
 	"commit",
 	"create",
 	"delete",
@@ -233,6 +262,23 @@ static const char *add_cmds[] = {
 	"add rctl",
 	"add attr",
 	"add dataset",
+	"add dedicated-cpu",
+	"add capped-memory",
+	NULL
+};
+
+static const char *clear_cmds[] = {
+	"clear autoboot",
+	"clear pool",
+	"clear limitpriv",
+	"clear bootargs",
+	"clear scheduling-class",
+	"clear " ALIAS_MAXLWPS,
+	"clear " ALIAS_MAXSHMMEM,
+	"clear " ALIAS_MAXSHMIDS,
+	"clear " ALIAS_MAXMSGIDS,
+	"clear " ALIAS_MAXSEMIDS,
+	"clear " ALIAS_SHARES,
 	NULL
 };
 
@@ -244,6 +290,8 @@ static const char *remove_cmds[] = {
 	"remove rctl ",
 	"remove attr ",
 	"remove dataset ",
+	"remove dedicated-cpu ",
+	"remove capped-memory ",
 	NULL
 };
 
@@ -255,6 +303,8 @@ static const char *select_cmds[] = {
 	"select rctl ",
 	"select attr ",
 	"select dataset ",
+	"select dedicated-cpu",
+	"select capped-memory",
 	NULL
 };
 
@@ -266,6 +316,13 @@ static const char *set_cmds[] = {
 	"set pool=",
 	"set limitpriv=",
 	"set bootargs=",
+	"set scheduling-class=",
+	"set " ALIAS_MAXLWPS "=",
+	"set " ALIAS_MAXSHMMEM "=",
+	"set " ALIAS_MAXSHMIDS "=",
+	"set " ALIAS_MAXMSGIDS "=",
+	"set " ALIAS_MAXSEMIDS "=",
+	"set " ALIAS_SHARES "=",
 	NULL
 };
 
@@ -277,12 +334,22 @@ static const char *info_cmds[] = {
 	"info rctl ",
 	"info attr ",
 	"info dataset ",
+	"info capped-memory",
+	"info dedicated-cpu",
 	"info zonename",
 	"info zonepath",
 	"info autoboot",
 	"info pool",
 	"info limitpriv",
 	"info bootargs",
+	"info brand",
+	"info scheduling-class",
+	"info max-lwps",
+	"info max-shm-memory",
+	"info max-shm-ids",
+	"info max-msg-ids",
+	"info max-sem-ids",
+	"info cpu-shares",
 	NULL
 };
 
@@ -298,6 +365,7 @@ static const char *fs_res_scope_cmds[] = {
 	"set raw=",
 	"set special=",
 	"set type=",
+	"clear raw",
 	NULL
 };
 
@@ -366,6 +434,33 @@ static const char *dataset_res_scope_cmds[] = {
 	NULL
 };
 
+static const char *pset_res_scope_cmds[] = {
+	"cancel",
+	"end",
+	"exit",
+	"help",
+	"info",
+	"set ncpus=",
+	"set importance=",
+	"clear importance",
+	NULL
+};
+
+static const char *mcap_res_scope_cmds[] = {
+	"cancel",
+	"end",
+	"exit",
+	"help",
+	"info",
+	"set physical=",
+	"set swap=",
+	"set locked=",
+	"clear physical",
+	"clear swap",
+	"clear locked",
+	NULL
+};
+
 /* Global variables */
 
 /* set early in main(), never modified thereafter, used all over the place */
@@ -406,6 +501,9 @@ static bool got_handle = FALSE;
 /* initialized in do_interactive(), checked in initialize() */
 static bool interactive_mode;
 
+/* set if configuring the global zone */
+static bool global_zone = FALSE;
+
 /* set in main(), checked in multiple places */
 static bool read_only_mode;
 
@@ -427,9 +525,13 @@ static struct zone_devtab	old_devtab, in_progress_devtab;
 static struct zone_rctltab	old_rctltab, in_progress_rctltab;
 static struct zone_attrtab	old_attrtab, in_progress_attrtab;
 static struct zone_dstab	old_dstab, in_progress_dstab;
+static struct zone_psettab	old_psettab, in_progress_psettab;
+static struct zone_mcaptab	old_mcaptab, in_progress_mcaptab;
 
 static GetLine *gl;	/* The gl_get_line() resource object */
 
+static void bytes_to_units(char *str, char *buf, int bufsize);
+
 /* Functions begin here */
 
 static bool
@@ -469,6 +571,8 @@ CPL_MATCH_FN(cmd_cpl_fn)
 		 */
 		if (strncmp(line, "add ", MAX(MIN(word_end, 4), 1)) == 0)
 			return (add_stuff(cpl, line, add_cmds, word_end));
+		if (strncmp(line, "clear ", MAX(MIN(word_end, 6), 2)) == 0)
+			return (add_stuff(cpl, line, clear_cmds, word_end));
 		if (strncmp(line, "select ", MAX(MIN(word_end, 7), 3)) == 0)
 			return (add_stuff(cpl, line, select_cmds, word_end));
 		if (strncmp(line, "set ", MAX(MIN(word_end, 4), 3)) == 0)
@@ -494,6 +598,10 @@ CPL_MATCH_FN(cmd_cpl_fn)
 		return (add_stuff(cpl, line, attr_res_scope_cmds, word_end));
 	case RT_DATASET:
 		return (add_stuff(cpl, line, dataset_res_scope_cmds, word_end));
+	case RT_DCPU:
+		return (add_stuff(cpl, line, pset_res_scope_cmds, word_end));
+	case RT_MCAP:
+		return (add_stuff(cpl, line, mcap_res_scope_cmds, word_end));
 	}
 	return (0);
 }
@@ -669,9 +777,8 @@ long_help(int cmd_num)
 			    "flag can be used to force the\n\taction."));
 		case CMD_REMOVE:
 			return (gettext("Remove specified resource from "
-			    "configuration.  Note that the curly\n\tbraces "
-			    "('{', '}') mean one or more of whatever "
-			    "is between them."));
+			    "configuration.  The -F flag can be used\n\tto "
+			    "force the action."));
 		case CMD_SELECT:
 			(void) snprintf(line, sizeof (line),
 			    gettext("Selects a resource to modify.  "
@@ -684,6 +791,8 @@ long_help(int cmd_num)
 			return (line);
 		case CMD_SET:
 			return (gettext("Sets property values."));
+		case CMD_CLEAR:
+			return (gettext("Clears property values."));
 		case CMD_INFO:
 			return (gettext("Displays information about the "
 			    "current configuration.  If resource\n\ttype is "
@@ -870,6 +979,37 @@ usage(bool verbose, uint_t flags)
 			(void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
 			    pt_to_str(PT_NAME), gettext("<name>"));
 			break;
+		case RT_DCPU:
+			(void) fprintf(fp, gettext("The '%s' resource scope "
+			    "configures the 'pools' facility to dedicate\na "
+			    "subset of the system's processors to this zone "
+			    "while it is running.\n"),
+			    rt_to_str(resource_scope));
+			(void) fprintf(fp, gettext("Valid commands:\n"));
+			(void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
+			    pt_to_str(PT_NCPUS),
+			    gettext("<unsigned integer | range>"));
+			(void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
+			    pt_to_str(PT_IMPORTANCE),
+			    gettext("<unsigned integer>"));
+			break;
+		case RT_MCAP:
+			(void) fprintf(fp, gettext("The '%s' resource scope is "
+			    "used to set an upper limit (a cap) on the\n"
+			    "amount of physical memory, swap space and locked "
+			    "memory that can be used by\nthis zone.\n"),
+			    rt_to_str(resource_scope));
+			(void) fprintf(fp, gettext("Valid commands:\n"));
+			(void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
+			    pt_to_str(PT_PHYSICAL),
+			    gettext("<qualified unsigned decimal>"));
+			(void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
+			    pt_to_str(PT_SWAP),
+			    gettext("<qualified unsigned decimal>"));
+			(void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
+			    pt_to_str(PT_LOCKED),
+			    gettext("<qualified unsigned decimal>"));
+			break;
 		}
 		(void) fprintf(fp, gettext("And from any resource scope, you "
 		    "can:\n"));
@@ -928,11 +1068,12 @@ usage(bool verbose, uint_t flags)
 	}
 	if (flags & HELP_RESOURCES) {
 		(void) fprintf(fp, "<%s> := %s | %s | %s | %s | %s | %s |\n\t"
-		    "%s\n\n",
+		    "%s | %s | %s\n\n",
 		    gettext("resource type"), rt_to_str(RT_FS),
 		    rt_to_str(RT_IPD), rt_to_str(RT_NET), rt_to_str(RT_DEVICE),
 		    rt_to_str(RT_RCTL), rt_to_str(RT_ATTR),
-		    rt_to_str(RT_DATASET));
+		    rt_to_str(RT_DATASET), rt_to_str(RT_DCPU),
+		    rt_to_str(RT_MCAP));
 	}
 	if (flags & HELP_PROPS) {
 		(void) fprintf(fp, gettext("For resource type ... there are "
@@ -951,6 +1092,20 @@ usage(bool verbose, uint_t flags)
 		    pt_to_str(PT_POOL));
 		(void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"),
 		    pt_to_str(PT_LIMITPRIV));
+		(void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"),
+		    pt_to_str(PT_SCHED));
+		(void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"),
+		    pt_to_str(PT_MAXLWPS));
+		(void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"),
+		    pt_to_str(PT_MAXSHMMEM));
+		(void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"),
+		    pt_to_str(PT_MAXSHMIDS));
+		(void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"),
+		    pt_to_str(PT_MAXMSGIDS));
+		(void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"),
+		    pt_to_str(PT_MAXSEMIDS));
+		(void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"),
+		    pt_to_str(PT_SHARES));
 		(void) fprintf(fp, "\t%s\t\t%s, %s, %s, %s\n", rt_to_str(RT_FS),
 		    pt_to_str(PT_DIR), pt_to_str(PT_SPECIAL),
 		    pt_to_str(PT_RAW), pt_to_str(PT_TYPE),
@@ -968,6 +1123,11 @@ usage(bool verbose, uint_t flags)
 		    pt_to_str(PT_VALUE));
 		(void) fprintf(fp, "\t%s\t\t%s\n", rt_to_str(RT_DATASET),
 		    pt_to_str(PT_NAME));
+		(void) fprintf(fp, "\t%s\t%s, %s\n", rt_to_str(RT_DCPU),
+		    pt_to_str(PT_NCPUS), pt_to_str(PT_IMPORTANCE));
+		(void) fprintf(fp, "\t%s\t%s, %s, %s\n", rt_to_str(RT_MCAP),
+		    pt_to_str(PT_PHYSICAL), pt_to_str(PT_SWAP),
+		    pt_to_str(PT_LOCKED));
 	}
 	if (need_to_close)
 		(void) pclose(fp);
@@ -1040,6 +1200,33 @@ initialize(bool handle_expected)
 				    "  Unable to continue", zone, brandname);
 				exit(Z_ERR);
 			}
+		} else if (global_zone && err == Z_NO_ZONE && !got_handle &&
+		    !read_only_mode) {
+			/*
+			 * We implicitly create the global zone config if it
+			 * doesn't exist.
+			 */
+			zone_dochandle_t tmphandle;
+
+			if ((tmphandle = zonecfg_init_handle()) == NULL) {
+				zone_perror(execname, Z_NOMEM, TRUE);
+				exit(Z_ERR);
+			}
+
+			err = zonecfg_get_template_handle("SUNWblank", zone,
+			    tmphandle);
+
+			if (err != Z_OK) {
+				zonecfg_fini_handle(tmphandle);
+				zone_perror("SUNWblank", err, TRUE);
+				return (err);
+			}
+
+			need_to_commit = TRUE;
+			zonecfg_fini_handle(handle);
+			handle = tmphandle;
+			got_handle = TRUE;
+
 		} else {
 			zone_perror(zone, err, handle_expected || got_handle);
 			if (err == Z_NO_ZONE && !got_handle &&
@@ -1373,10 +1560,13 @@ export_func(cmd_t *cmd)
 	struct zone_attrtab attrtab;
 	struct zone_rctltab rctltab;
 	struct zone_dstab dstab;
+	struct zone_psettab psettab;
+	struct zone_mcaptab mcaptab;
 	struct zone_rctlvaltab *valptr;
 	int err, arg;
 	char zonepath[MAXPATHLEN], outfile[MAXPATHLEN], pool[MAXNAMELEN];
 	char bootargs[BOOTARGS_MAX];
+	char sched[MAXNAMELEN];
 	char brand[MAXNAMELEN];
 	char *limitpriv;
 	FILE *of;
@@ -1456,6 +1646,10 @@ export_func(cmd_t *cmd)
 		free(limitpriv);
 	}
 
+	if (zonecfg_get_sched_class(handle, sched, sizeof (sched)) == Z_OK &&
+	    strlen(sched) > 0)
+		(void) fprintf(of, "%s %s=%s\n", cmd_to_str(CMD_SET),
+		    pt_to_str(PT_SCHED), sched);
 
 	if ((err = zonecfg_setipdent(handle)) != Z_OK) {
 		zone_perror(zone, err, FALSE);
@@ -1576,6 +1770,33 @@ export_func(cmd_t *cmd)
 	}
 	(void) zonecfg_enddsent(handle);
 
+	if (zonecfg_getpsetent(handle, &psettab) == Z_OK) {
+		(void) fprintf(of, "%s %s\n", cmd_to_str(CMD_ADD),
+		    rt_to_str(RT_DCPU));
+		if (strcmp(psettab.zone_ncpu_min, psettab.zone_ncpu_max) == 0)
+			(void) fprintf(of, "%s %s=%s\n", cmd_to_str(CMD_SET),
+			    pt_to_str(PT_NCPUS), psettab.zone_ncpu_max);
+		else
+			(void) fprintf(of, "%s %s=%s-%s\n", cmd_to_str(CMD_SET),
+			    pt_to_str(PT_NCPUS), psettab.zone_ncpu_min,
+			    psettab.zone_ncpu_max);
+		if (psettab.zone_importance[0] != '\0')
+			(void) fprintf(of, "%s %s=%s\n", cmd_to_str(CMD_SET),
+			    pt_to_str(PT_IMPORTANCE), psettab.zone_importance);
+		(void) fprintf(of, "%s\n", cmd_to_str(CMD_END));
+	}
+
+	if (zonecfg_getmcapent(handle, &mcaptab) == Z_OK) {
+		char buf[128];
+
+		(void) fprintf(of, "%s %s\n", cmd_to_str(CMD_ADD),
+		    rt_to_str(RT_MCAP));
+		bytes_to_units(mcaptab.zone_physmem_cap, buf, sizeof (buf));
+		(void) fprintf(of, "%s %s=%s\n", cmd_to_str(CMD_SET),
+		    pt_to_str(PT_PHYSICAL), buf);
+		(void) fprintf(of, "%s\n", cmd_to_str(CMD_END));
+	}
+
 done:
 	if (need_to_close)
 		(void) fclose(of);
@@ -1641,6 +1862,10 @@ static void
 add_resource(cmd_t *cmd)
 {
 	int type;
+	struct zone_psettab tmp_psettab;
+	struct zone_mcaptab tmp_mcaptab;
+	uint64_t tmp_mcap;
+	char pool[MAXNAMELEN];
 
 	if ((type = cmd->cmd_res_type) == RT_UNKNOWN) {
 		long_usage(CMD_ADD, TRUE);
@@ -1667,6 +1892,12 @@ add_resource(cmd_t *cmd)
 		bzero(&in_progress_devtab, sizeof (in_progress_devtab));
 		return;
 	case RT_RCTL:
+		if (global_zone)
+			zerr(gettext("WARNING: Setting a global zone resource "
+			    "control too low could deny\nservice "
+			    "to even the root user; "
+			    "this could render the system impossible\n"
+			    "to administer.  Please use caution."));
 		bzero(&in_progress_rctltab, sizeof (in_progress_rctltab));
 		return;
 	case RT_ATTR:
@@ -1675,6 +1906,48 @@ add_resource(cmd_t *cmd)
 	case RT_DATASET:
 		bzero(&in_progress_dstab, sizeof (in_progress_dstab));
 		return;
+	case RT_DCPU:
+		/* Make sure there isn't already a cpu-set entry. */
+		if (zonecfg_lookup_pset(handle, &tmp_psettab) == Z_OK) {
+			zerr(gettext("The %s resource already exists."),
+			    rt_to_str(RT_DCPU));
+			goto bad;
+		}
+
+		/* Make sure the pool property isn't set. */
+		if (zonecfg_get_pool(handle, pool, sizeof (pool)) == Z_OK &&
+		    strlen(pool) > 0) {
+			zerr(gettext("The %s property is already set.  "
+			    "A persistent pool is incompatible with\nthe %s "
+			    "resource."),
+			    pt_to_str(PT_POOL), rt_to_str(RT_DCPU));
+			goto bad;
+		}
+
+		bzero(&in_progress_psettab, sizeof (in_progress_psettab));
+		return;
+	case RT_MCAP:
+		/*
+		 * Make sure there isn't already a mem-cap entry or max-swap
+		 * or max-locked rctl.
+		 */
+		if (zonecfg_lookup_mcap(handle, &tmp_mcaptab) == Z_OK ||
+		    zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP, &tmp_mcap)
+		    == Z_OK ||
+		    zonecfg_get_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM,
+		    &tmp_mcap) == Z_OK) {
+			zerr(gettext("The %s resource or a related resource "
+			    "control already exists."), rt_to_str(RT_MCAP));
+			goto bad;
+		}
+		if (global_zone)
+			zerr(gettext("WARNING: Setting a global zone memory "
+			    "cap too low could deny\nservice "
+			    "to even the root user; "
+			    "this could render the system impossible\n"
+			    "to administer.  Please use caution."));
+		bzero(&in_progress_mcaptab, sizeof (in_progress_mcaptab));
+		return;
 	default:
 		zone_perror(rt_to_str(type), Z_NO_RESOURCE_TYPE, TRUE);
 		long_usage(CMD_ADD, TRUE);
@@ -1871,6 +2144,30 @@ add_property(cmd_t *cmd)
 	}
 }
 
+static boolean_t
+gz_invalid_resource(int type)
+{
+	return (global_zone && (type == RT_FS || type == RT_IPD ||
+	    type == RT_NET || type == RT_DEVICE || type == RT_ATTR ||
+	    type == RT_DATASET));
+}
+
+static boolean_t
+gz_invalid_rt_property(int type)
+{
+	return (global_zone && (type == RT_ZONENAME || type == RT_ZONEPATH ||
+	    type == RT_AUTOBOOT || type == RT_LIMITPRIV ||
+	    type == RT_BOOTARGS || type == RT_BRAND || type == RT_SCHED));
+}
+
+static boolean_t
+gz_invalid_property(int type)
+{
+	return (global_zone && (type == PT_ZONENAME || type == PT_ZONEPATH ||
+	    type == PT_AUTOBOOT || type == PT_LIMITPRIV ||
+	    type == PT_BOOTARGS || type == PT_BRAND || type == PT_SCHED));
+}
+
 void
 add_func(cmd_t *cmd)
 {
@@ -1900,6 +2197,13 @@ add_func(cmd_t *cmd)
 	if (initialize(TRUE) != Z_OK)
 		return;
 	if (global_scope) {
+		if (gz_invalid_resource(cmd->cmd_res_type)) {
+			zerr(gettext("Cannot add a %s resource to the "
+			    "global zone."), rt_to_str(cmd->cmd_res_type));
+			saw_error = TRUE;
+			return;
+		}
+
 		global_scope = FALSE;
 		resource_scope = cmd->cmd_res_type;
 		end_op = CMD_ADD;
@@ -2273,26 +2577,85 @@ fill_in_dstab(cmd_t *cmd, struct zone_dstab *dstab, bool fill_in_only)
 }
 
 static void
-remove_resource(cmd_t *cmd)
+remove_aliased_rctl(int type, char *name)
 {
-	int err, type;
-	struct zone_fstab fstab;
-	struct zone_nwiftab nwiftab;
-	struct zone_devtab devtab;
-	struct zone_attrtab attrtab;
-	struct zone_rctltab rctltab;
-	struct zone_dstab dstab;
+	int err;
+	uint64_t tmp;
 
-	if ((type = cmd->cmd_res_type) == RT_UNKNOWN) {
-		long_usage(CMD_REMOVE, TRUE);
+	if ((err = zonecfg_get_aliased_rctl(handle, name, &tmp)) != Z_OK) {
+		zerr("%s %s: %s", cmd_to_str(CMD_CLEAR), pt_to_str(type),
+		    zonecfg_strerror(err));
+		saw_error = TRUE;
 		return;
 	}
+	if ((err = zonecfg_rm_aliased_rctl(handle, name)) != Z_OK) {
+		zerr("%s %s: %s", cmd_to_str(CMD_CLEAR), pt_to_str(type),
+		    zonecfg_strerror(err));
+		saw_error = TRUE;
+	} else {
+		need_to_commit = TRUE;
+	}
+}
 
-	if (initialize(TRUE) != Z_OK)
-		return;
+static boolean_t
+prompt_remove_resource(cmd_t *cmd, char *rsrc)
+{
+	int num;
+	int answer;
+	int arg;
+	boolean_t force = B_FALSE;
+	char prompt[128];
+
+	optind = 0;
+	while ((arg = getopt(cmd->cmd_argc, cmd->cmd_argv, "F")) != EOF) {
+		switch (arg) {
+		case 'F':
+			force = B_TRUE;
+			break;
+		default:
+			return (B_FALSE);
+		}
+	}
+
+	num = zonecfg_num_resources(handle, rsrc);
+
+	if (num == 0) {
+		z_cmd_rt_perror(CMD_REMOVE, cmd->cmd_res_type, Z_NO_ENTRY,
+		    TRUE);
+		return (B_FALSE);
+	}
+	if (num > 1 && !force) {
+		if (!interactive_mode) {
+			zerr(gettext("There are multiple instances of this "
+			    "resource.  Either qualify the resource to\n"
+			    "remove a single instance or use the -F option to "
+			    "remove all instances."));
+			saw_error = TRUE;
+			return (B_FALSE);
+		}
+		(void) snprintf(prompt, sizeof (prompt), gettext(
+		    "Are you sure you want to remove ALL '%s' resources"),
+		    rsrc);
+		answer = ask_yesno(FALSE, prompt);
+		if (answer == -1) {
+			zerr(gettext("Resource incomplete."));
+			return (B_FALSE);
+		}
+		if (answer != 1)
+			return (B_FALSE);
+	}
+	return (B_TRUE);
+}
+
+static void
+remove_fs(cmd_t *cmd)
+{
+	int err;
+
+	/* traditional, qualified fs removal */
+	if (cmd->cmd_prop_nv_pairs > 0) {
+		struct zone_fstab fstab;
 
-	switch (type) {
-	case RT_FS:
 		if ((err = fill_in_fstab(cmd, &fstab, FALSE)) != Z_OK) {
 			z_cmd_rt_perror(CMD_REMOVE, RT_FS, err, TRUE);
 			return;
@@ -2303,13 +2666,36 @@ remove_resource(cmd_t *cmd)
 			need_to_commit = TRUE;
 		zonecfg_free_fs_option_list(fstab.zone_fs_options);
 		return;
-	case RT_IPD:
-		if (state_atleast(ZONE_STATE_INSTALLED)) {
-			zerr(gettext("Zone %s already installed; %s %s not "
-			    "allowed."), zone, cmd_to_str(CMD_REMOVE),
-			    rt_to_str(RT_IPD));
-			return;
-		}
+	}
+
+	/*
+	 * unqualified fs removal.  remove all fs's but prompt if more
+	 * than one.
+	 */
+	if (!prompt_remove_resource(cmd, "fs"))
+		return;
+
+	if ((err = zonecfg_del_all_resources(handle, "fs")) != Z_OK)
+		z_cmd_rt_perror(CMD_REMOVE, RT_FS, err, TRUE);
+	else
+		need_to_commit = TRUE;
+}
+
+static void
+remove_ipd(cmd_t *cmd)
+{
+	int err;
+
+	if (state_atleast(ZONE_STATE_INSTALLED)) {
+		zerr(gettext("Zone %s already installed; %s %s not allowed."),
+		    zone, cmd_to_str(CMD_REMOVE), rt_to_str(RT_IPD));
+		return;
+	}
+
+	/* traditional, qualified ipd removal */
+	if (cmd->cmd_prop_nv_pairs > 0) {
+		struct zone_fstab fstab;
+
 		if ((err = fill_in_ipdtab(cmd, &fstab, FALSE)) != Z_OK) {
 			z_cmd_rt_perror(CMD_REMOVE, RT_IPD, err, TRUE);
 			return;
@@ -2319,7 +2705,31 @@ remove_resource(cmd_t *cmd)
 		else
 			need_to_commit = TRUE;
 		return;
-	case RT_NET:
+	}
+
+	/*
+	 * unqualified ipd removal.  remove all ipds but prompt if more
+	 * than one.
+	 */
+	if (!prompt_remove_resource(cmd, "inherit-pkg-dir"))
+		return;
+
+	if ((err = zonecfg_del_all_resources(handle, "inherit-pkg-dir"))
+	    != Z_OK)
+		z_cmd_rt_perror(CMD_REMOVE, RT_IPD, err, TRUE);
+	else
+		need_to_commit = TRUE;
+}
+
+static void
+remove_net(cmd_t *cmd)
+{
+	int err;
+
+	/* traditional, qualified net removal */
+	if (cmd->cmd_prop_nv_pairs > 0) {
+		struct zone_nwiftab nwiftab;
+
 		if ((err = fill_in_nwiftab(cmd, &nwiftab, FALSE)) != Z_OK) {
 			z_cmd_rt_perror(CMD_REMOVE, RT_NET, err, TRUE);
 			return;
@@ -2329,7 +2739,30 @@ remove_resource(cmd_t *cmd)
 		else
 			need_to_commit = TRUE;
 		return;
-	case RT_DEVICE:
+	}
+
+	/*
+	 * unqualified net removal.  remove all nets but prompt if more
+	 * than one.
+	 */
+	if (!prompt_remove_resource(cmd, "net"))
+		return;
+
+	if ((err = zonecfg_del_all_resources(handle, "net")) != Z_OK)
+		z_cmd_rt_perror(CMD_REMOVE, RT_NET, err, TRUE);
+	else
+		need_to_commit = TRUE;
+}
+
+static void
+remove_device(cmd_t *cmd)
+{
+	int err;
+
+	/* traditional, qualified device removal */
+	if (cmd->cmd_prop_nv_pairs > 0) {
+		struct zone_devtab devtab;
+
 		if ((err = fill_in_devtab(cmd, &devtab, FALSE)) != Z_OK) {
 			z_cmd_rt_perror(CMD_REMOVE, RT_DEVICE, err, TRUE);
 			return;
@@ -2339,18 +2772,30 @@ remove_resource(cmd_t *cmd)
 		else
 			need_to_commit = TRUE;
 		return;
-	case RT_RCTL:
-		if ((err = fill_in_rctltab(cmd, &rctltab, FALSE)) != Z_OK) {
-			z_cmd_rt_perror(CMD_REMOVE, RT_RCTL, err, TRUE);
-			return;
-		}
-		if ((err = zonecfg_delete_rctl(handle, &rctltab)) != Z_OK)
-			z_cmd_rt_perror(CMD_REMOVE, RT_RCTL, err, TRUE);
-		else
-			need_to_commit = TRUE;
-		zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
+	}
+
+	/*
+	 * unqualified device removal.  remove all devices but prompt if more
+	 * than one.
+	 */
+	if (!prompt_remove_resource(cmd, "device"))
 		return;
-	case RT_ATTR:
+
+	if ((err = zonecfg_del_all_resources(handle, "device")) != Z_OK)
+		z_cmd_rt_perror(CMD_REMOVE, RT_DEVICE, err, TRUE);
+	else
+		need_to_commit = TRUE;
+}
+
+static void
+remove_attr(cmd_t *cmd)
+{
+	int err;
+
+	/* traditional, qualified attr removal */
+	if (cmd->cmd_prop_nv_pairs > 0) {
+		struct zone_attrtab attrtab;
+
 		if ((err = fill_in_attrtab(cmd, &attrtab, FALSE)) != Z_OK) {
 			z_cmd_rt_perror(CMD_REMOVE, RT_ATTR, err, TRUE);
 			return;
@@ -2360,7 +2805,30 @@ remove_resource(cmd_t *cmd)
 		else
 			need_to_commit = TRUE;
 		return;
-	case RT_DATASET:
+	}
+
+	/*
+	 * unqualified attr removal.  remove all attrs but prompt if more
+	 * than one.
+	 */
+	if (!prompt_remove_resource(cmd, "attr"))
+		return;
+
+	if ((err = zonecfg_del_all_resources(handle, "attr")) != Z_OK)
+		z_cmd_rt_perror(CMD_REMOVE, RT_ATTR, err, TRUE);
+	else
+		need_to_commit = TRUE;
+}
+
+static void
+remove_dataset(cmd_t *cmd)
+{
+	int err;
+
+	/* traditional, qualified dataset removal */
+	if (cmd->cmd_prop_nv_pairs > 0) {
+		struct zone_dstab dstab;
+
 		if ((err = fill_in_dstab(cmd, &dstab, FALSE)) != Z_OK) {
 			z_cmd_rt_perror(CMD_REMOVE, RT_DATASET, err, TRUE);
 			return;
@@ -2370,6 +2838,177 @@ remove_resource(cmd_t *cmd)
 		else
 			need_to_commit = TRUE;
 		return;
+	}
+
+	/*
+	 * unqualified dataset removal.  remove all datasets but prompt if more
+	 * than one.
+	 */
+	if (!prompt_remove_resource(cmd, "dataset"))
+		return;
+
+	if ((err = zonecfg_del_all_resources(handle, "dataset")) != Z_OK)
+		z_cmd_rt_perror(CMD_REMOVE, RT_DATASET, err, TRUE);
+	else
+		need_to_commit = TRUE;
+}
+
+static void
+remove_rctl(cmd_t *cmd)
+{
+	int err;
+
+	/* traditional, qualified rctl removal */
+	if (cmd->cmd_prop_nv_pairs > 0) {
+		struct zone_rctltab rctltab;
+
+		if ((err = fill_in_rctltab(cmd, &rctltab, FALSE)) != Z_OK) {
+			z_cmd_rt_perror(CMD_REMOVE, RT_RCTL, err, TRUE);
+			return;
+		}
+		if ((err = zonecfg_delete_rctl(handle, &rctltab)) != Z_OK)
+			z_cmd_rt_perror(CMD_REMOVE, RT_RCTL, err, TRUE);
+		else
+			need_to_commit = TRUE;
+		zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
+		return;
+	}
+
+	/*
+	 * unqualified rctl removal.  remove all rctls but prompt if more
+	 * than one.
+	 */
+	if (!prompt_remove_resource(cmd, "rctl"))
+		return;
+
+	if ((err = zonecfg_del_all_resources(handle, "rctl")) != Z_OK)
+		z_cmd_rt_perror(CMD_REMOVE, RT_RCTL, err, TRUE);
+	else
+		need_to_commit = TRUE;
+}
+
+static void
+remove_pset()
+{
+	int err;
+	struct zone_psettab psettab;
+
+	if ((err = zonecfg_lookup_pset(handle, &psettab)) != Z_OK) {
+		z_cmd_rt_perror(CMD_REMOVE, RT_DCPU, err, TRUE);
+		return;
+	}
+	if ((err = zonecfg_delete_pset(handle)) != Z_OK)
+		z_cmd_rt_perror(CMD_REMOVE, RT_DCPU, err, TRUE);
+	else
+		need_to_commit = TRUE;
+}
+
+static void
+remove_mcap()
+{
+	int err, res1, res2, res3;
+	uint64_t tmp;
+	struct zone_mcaptab mcaptab;
+	boolean_t revert = B_FALSE;
+
+	res1 = zonecfg_lookup_mcap(handle, &mcaptab);
+	res2 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP, &tmp);
+	res3 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM, &tmp);
+
+	/* if none of these exist, there is no resource to remove */
+	if (res1 != Z_OK && res2 != Z_OK && res3 != Z_OK) {
+		zerr("%s %s: %s", cmd_to_str(CMD_REMOVE), rt_to_str(RT_MCAP),
+		    zonecfg_strerror(Z_NO_RESOURCE_TYPE));
+		saw_error = TRUE;
+		return;
+	}
+	if (res1 == Z_OK) {
+		if ((err = zonecfg_delete_mcap(handle)) != Z_OK) {
+			z_cmd_rt_perror(CMD_REMOVE, RT_MCAP, err, TRUE);
+			revert = B_TRUE;
+		} else {
+			need_to_commit = TRUE;
+		}
+	}
+	if (res2 == Z_OK) {
+		if ((err = zonecfg_rm_aliased_rctl(handle, ALIAS_MAXSWAP))
+		    != Z_OK) {
+			z_cmd_rt_perror(CMD_REMOVE, RT_MCAP, err, TRUE);
+			revert = B_TRUE;
+		} else {
+			need_to_commit = TRUE;
+		}
+	}
+	if (res3 == Z_OK) {
+		if ((err = zonecfg_rm_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM))
+		    != Z_OK) {
+			z_cmd_rt_perror(CMD_REMOVE, RT_MCAP, err, TRUE);
+			revert = B_TRUE;
+		} else {
+			need_to_commit = TRUE;
+		}
+	}
+
+	if (revert)
+		need_to_commit = FALSE;
+}
+
+static void
+remove_resource(cmd_t *cmd)
+{
+	int type;
+	int arg;
+
+	if ((type = cmd->cmd_res_type) == RT_UNKNOWN) {
+		long_usage(CMD_REMOVE, TRUE);
+		return;
+	}
+
+	optind = 0;
+	while ((arg = getopt(cmd->cmd_argc, cmd->cmd_argv, "?F")) != EOF) {
+		switch (arg) {
+		case '?':
+			longer_usage(CMD_REMOVE);
+			return;
+		case 'F':
+			break;
+		default:
+			short_usage(CMD_REMOVE);
+			return;
+		}
+	}
+
+	if (initialize(TRUE) != Z_OK)
+		return;
+
+	switch (type) {
+	case RT_FS:
+		remove_fs(cmd);
+		return;
+	case RT_IPD:
+		remove_ipd(cmd);
+		return;
+	case RT_NET:
+		remove_net(cmd);
+		return;
+	case RT_DEVICE:
+		remove_device(cmd);
+		return;
+	case RT_RCTL:
+		remove_rctl(cmd);
+		return;
+	case RT_ATTR:
+		remove_attr(cmd);
+		return;
+	case RT_DATASET:
+		remove_dataset(cmd);
+		return;
+	case RT_DCPU:
+		remove_pset();
+		return;
+	case RT_MCAP:
+		remove_mcap();
+		return;
 	default:
 		zone_perror(rt_to_str(type), Z_NO_RESOURCE_TYPE, TRUE);
 		long_usage(CMD_REMOVE, TRUE);
@@ -2513,16 +3152,175 @@ remove_func(cmd_t *cmd)
 
 	assert(cmd != NULL);
 
-	if (global_scope)
+	if (global_scope) {
+		if (gz_invalid_resource(cmd->cmd_res_type)) {
+			zerr(gettext("%s is not a valid resource for the "
+			    "global zone."), rt_to_str(cmd->cmd_res_type));
+			saw_error = TRUE;
+			return;
+		}
 		remove_resource(cmd);
-	else
+	} else {
 		remove_property(cmd);
+	}
+}
+
+static void
+clear_property(cmd_t *cmd)
+{
+	int res_type, prop_type;
+
+	res_type = resource_scope;
+	prop_type = cmd->cmd_res_type;
+	if (res_type == RT_UNKNOWN || prop_type == PT_UNKNOWN) {
+		long_usage(CMD_CLEAR, TRUE);
+		return;
+	}
+
+	if (initialize(TRUE) != Z_OK)
+		return;
+
+	switch (res_type) {
+	case RT_FS:
+		if (prop_type == PT_RAW) {
+			in_progress_fstab.zone_fs_raw[0] = '\0';
+			need_to_commit = TRUE;
+			return;
+		}
+		break;
+	case RT_DCPU:
+		if (prop_type == PT_IMPORTANCE) {
+			in_progress_psettab.zone_importance[0] = '\0';
+			need_to_commit = TRUE;
+			return;
+		}
+		break;
+	case RT_MCAP:
+		switch (prop_type) {
+		case PT_PHYSICAL:
+			in_progress_mcaptab.zone_physmem_cap[0] = '\0';
+			need_to_commit = TRUE;
+			return;
+		case PT_SWAP:
+			remove_aliased_rctl(PT_SWAP, ALIAS_MAXSWAP);
+			return;
+		case PT_LOCKED:
+			remove_aliased_rctl(PT_LOCKED, ALIAS_MAXLOCKEDMEM);
+			return;
+		}
+		break;
+	default:
+		break;
+	}
+
+	zone_perror(pt_to_str(prop_type), Z_CLEAR_DISALLOW, TRUE);
+}
+
+static void
+clear_global(cmd_t *cmd)
+{
+	int err, type;
+
+	if ((type = cmd->cmd_res_type) == RT_UNKNOWN) {
+		long_usage(CMD_CLEAR, TRUE);
+		return;
+	}
+
+	if (initialize(TRUE) != Z_OK)
+		return;
+
+	switch (type) {
+	case PT_ZONENAME:
+		/* FALLTHRU */
+	case PT_ZONEPATH:
+		/* FALLTHRU */
+	case PT_BRAND:
+		zone_perror(pt_to_str(type), Z_CLEAR_DISALLOW, TRUE);
+		return;
+	case PT_AUTOBOOT:
+		/* false is default; we'll treat as equivalent to clearing */
+		if ((err = zonecfg_set_autoboot(handle, B_FALSE)) != Z_OK)
+			z_cmd_rt_perror(CMD_CLEAR, RT_AUTOBOOT, err, TRUE);
+		else
+			need_to_commit = TRUE;
+		return;
+	case PT_POOL:
+		if ((err = zonecfg_set_pool(handle, NULL)) != Z_OK)
+			z_cmd_rt_perror(CMD_CLEAR, RT_POOL, err, TRUE);
+		else
+			need_to_commit = TRUE;
+		return;
+	case PT_LIMITPRIV:
+		if ((err = zonecfg_set_limitpriv(handle, NULL)) != Z_OK)
+			z_cmd_rt_perror(CMD_CLEAR, RT_LIMITPRIV, err, TRUE);
+		else
+			need_to_commit = TRUE;
+		return;
+	case PT_BOOTARGS:
+		if ((err = zonecfg_set_bootargs(handle, NULL)) != Z_OK)
+			z_cmd_rt_perror(CMD_CLEAR, RT_BOOTARGS, err, TRUE);
+		else
+			need_to_commit = TRUE;
+		return;
+	case PT_SCHED:
+		if ((err = zonecfg_set_sched(handle, NULL)) != Z_OK)
+			z_cmd_rt_perror(CMD_CLEAR, RT_SCHED, err, TRUE);
+		else
+			need_to_commit = TRUE;
+		return;
+	case PT_MAXLWPS:
+		remove_aliased_rctl(PT_MAXLWPS, ALIAS_MAXLWPS);
+		return;
+	case PT_MAXSHMMEM:
+		remove_aliased_rctl(PT_MAXSHMMEM, ALIAS_MAXSHMMEM);
+		return;
+	case PT_MAXSHMIDS:
+		remove_aliased_rctl(PT_MAXSHMIDS, ALIAS_MAXSHMIDS);
+		return;
+	case PT_MAXMSGIDS:
+		remove_aliased_rctl(PT_MAXMSGIDS, ALIAS_MAXMSGIDS);
+		return;
+	case PT_MAXSEMIDS:
+		remove_aliased_rctl(PT_MAXSEMIDS, ALIAS_MAXSEMIDS);
+		return;
+	case PT_SHARES:
+		remove_aliased_rctl(PT_SHARES, ALIAS_SHARES);
+		return;
+	default:
+		zone_perror(pt_to_str(type), Z_NO_PROPERTY_TYPE, TRUE);
+		long_usage(CMD_CLEAR, TRUE);
+		usage(FALSE, HELP_PROPS);
+		return;
+	}
+}
+
+void
+clear_func(cmd_t *cmd)
+{
+	if (zone_is_read_only(CMD_CLEAR))
+		return;
+
+	assert(cmd != NULL);
+
+	if (global_scope) {
+		if (gz_invalid_property(cmd->cmd_res_type)) {
+			zerr(gettext("%s is not a valid property for the "
+			    "global zone."), pt_to_str(cmd->cmd_res_type));
+			saw_error = TRUE;
+			return;
+		}
+
+		clear_global(cmd);
+	} else {
+		clear_property(cmd);
+	}
 }
 
 void
 select_func(cmd_t *cmd)
 {
-	int type, err;
+	int type, err, res;
+	uint64_t limit;
 
 	if (zone_is_read_only(CMD_SELECT))
 		return;
@@ -2612,6 +3410,32 @@ select_func(cmd_t *cmd)
 		bcopy(&old_dstab, &in_progress_dstab,
 		    sizeof (struct zone_dstab));
 		return;
+	case RT_DCPU:
+		if ((err = zonecfg_lookup_pset(handle, &old_psettab)) != Z_OK) {
+			z_cmd_rt_perror(CMD_SELECT, RT_DCPU, err, TRUE);
+			global_scope = TRUE;
+		}
+		bcopy(&old_psettab, &in_progress_psettab,
+		    sizeof (struct zone_psettab));
+		return;
+	case RT_MCAP:
+		/* if none of these exist, there is no resource to select */
+		if ((res = zonecfg_lookup_mcap(handle, &old_mcaptab)) != Z_OK &&
+		    zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP, &limit)
+		    != Z_OK &&
+		    zonecfg_get_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM, &limit)
+		    != Z_OK) {
+			z_cmd_rt_perror(CMD_SELECT, RT_MCAP, Z_NO_RESOURCE_TYPE,
+			    TRUE);
+			global_scope = TRUE;
+		}
+		if (res == Z_OK)
+			bcopy(&old_mcaptab, &in_progress_mcaptab,
+			    sizeof (struct zone_mcaptab));
+		else
+			bzero(&in_progress_mcaptab,
+			    sizeof (in_progress_mcaptab));
+		return;
 	default:
 		zone_perror(rt_to_str(type), Z_NO_RESOURCE_TYPE, TRUE);
 		long_usage(CMD_SELECT, TRUE);
@@ -2731,6 +3555,49 @@ valid_fs_type(const char *type)
 	return (B_TRUE);
 }
 
+static void
+set_aliased_rctl(char *alias, int prop_type, char *s)
+{
+	uint64_t limit;
+	int err;
+	char tmp[128];
+
+	if (global_zone && strcmp(alias, ALIAS_SHARES) != 0)
+		zerr(gettext("WARNING: Setting a global zone resource "
+		    "control too low could deny\nservice "
+		    "to even the root user; "
+		    "this could render the system impossible\n"
+		    "to administer.  Please use caution."));
+
+	/* convert memory based properties */
+	if (prop_type == PT_MAXSHMMEM) {
+		if (!zonecfg_valid_memlimit(s, &limit)) {
+			zerr(gettext("A non-negative number with a required "
+			    "scale suffix (K, M, G or T) was expected\nhere."));
+			saw_error = TRUE;
+			return;
+		}
+
+		(void) snprintf(tmp, sizeof (tmp), "%llu", limit);
+		s = tmp;
+	}
+
+	if (!zonecfg_aliased_rctl_ok(handle, alias)) {
+		zone_perror(pt_to_str(prop_type), Z_ALIAS_DISALLOW, FALSE);
+		saw_error = TRUE;
+	} else if (!zonecfg_valid_alias_limit(alias, s, &limit)) {
+		zerr(gettext("%s property is out of range."),
+		    pt_to_str(prop_type));
+		saw_error = TRUE;
+	} else if ((err = zonecfg_set_aliased_rctl(handle, alias, limit))
+	    != Z_OK) {
+		zone_perror(zone, err, TRUE);
+		saw_error = TRUE;
+	} else {
+		need_to_commit = TRUE;
+	}
+}
+
 void
 set_func(cmd_t *cmd)
 {
@@ -2739,6 +3606,9 @@ set_func(cmd_t *cmd)
 	property_value_ptr_t pp;
 	boolean_t autoboot;
 	boolean_t force_set = FALSE;
+	size_t physmem_size = sizeof (in_progress_mcaptab.zone_physmem_cap);
+	uint64_t mem_cap, mem_limit;
+	struct zone_psettab tmp_psettab;
 
 	if (zone_is_read_only(CMD_SET))
 		return;
@@ -2762,6 +3632,13 @@ set_func(cmd_t *cmd)
 
 	prop_type = cmd->cmd_prop_name[0];
 	if (global_scope) {
+		if (gz_invalid_property(prop_type)) {
+			zerr(gettext("%s is not a valid property for the "
+			    "global zone."), pt_to_str(prop_type));
+			saw_error = TRUE;
+			return;
+		}
+
 		if (prop_type == PT_ZONENAME) {
 			res_type = RT_ZONENAME;
 		} else if (prop_type == PT_ZONEPATH) {
@@ -2776,6 +3653,20 @@ set_func(cmd_t *cmd)
 			res_type = RT_LIMITPRIV;
 		} else if (prop_type == PT_BOOTARGS) {
 			res_type = RT_BOOTARGS;
+		} else if (prop_type == PT_SCHED) {
+			res_type = RT_SCHED;
+		} else if (prop_type == PT_MAXLWPS) {
+			res_type = RT_MAXLWPS;
+		} else if (prop_type == PT_MAXSHMMEM) {
+			res_type = RT_MAXSHMMEM;
+		} else if (prop_type == PT_MAXSHMIDS) {
+			res_type = RT_MAXSHMIDS;
+		} else if (prop_type == PT_MAXMSGIDS) {
+			res_type = RT_MAXMSGIDS;
+		} else if (prop_type == PT_MAXSEMIDS) {
+			res_type = RT_MAXSEMIDS;
+		} else if (prop_type == PT_SHARES) {
+			res_type = RT_SHARES;
 		} else {
 			zerr(gettext("Cannot set a resource-specific property "
 			    "from the global scope."));
@@ -2899,6 +3790,24 @@ set_func(cmd_t *cmd)
 			need_to_commit = TRUE;
 		return;
 	case RT_POOL:
+		/* don't allow use of the reserved temporary pool names */
+		if (strncmp("SUNW", prop_id, 4) == 0) {
+			zerr(gettext("pool names starting with SUNW are "
+			    "reserved."));
+			saw_error = TRUE;
+			return;
+		}
+
+		/* can't set pool if dedicated-cpu exists */
+		if (zonecfg_lookup_pset(handle, &tmp_psettab) == Z_OK) {
+			zerr(gettext("The %s resource already exists.  "
+			    "A persistent pool is incompatible\nwith the %s "
+			    "resource."), rt_to_str(RT_DCPU),
+			    rt_to_str(RT_DCPU));
+			saw_error = TRUE;
+			return;
+		}
+
 		if ((err = zonecfg_set_pool(handle, prop_id)) != Z_OK)
 			zone_perror(zone, err, TRUE);
 		else
@@ -2916,6 +3825,30 @@ set_func(cmd_t *cmd)
 		else
 			need_to_commit = TRUE;
 		return;
+	case RT_SCHED:
+		if ((err = zonecfg_set_sched(handle, prop_id)) != Z_OK)
+			zone_perror(zone, err, TRUE);
+		else
+			need_to_commit = TRUE;
+		return;
+	case RT_MAXLWPS:
+		set_aliased_rctl(ALIAS_MAXLWPS, prop_type, prop_id);
+		return;
+	case RT_MAXSHMMEM:
+		set_aliased_rctl(ALIAS_MAXSHMMEM, prop_type, prop_id);
+		return;
+	case RT_MAXSHMIDS:
+		set_aliased_rctl(ALIAS_MAXSHMIDS, prop_type, prop_id);
+		return;
+	case RT_MAXMSGIDS:
+		set_aliased_rctl(ALIAS_MAXMSGIDS, prop_type, prop_id);
+		return;
+	case RT_MAXSEMIDS:
+		set_aliased_rctl(ALIAS_MAXSEMIDS, prop_type, prop_id);
+		return;
+	case RT_SHARES:
+		set_aliased_rctl(ALIAS_SHARES, prop_type, prop_id);
+		return;
 	case RT_FS:
 		switch (prop_type) {
 		case PT_DIR:
@@ -3095,6 +4028,146 @@ set_func(cmd_t *cmd)
 		long_usage(CMD_SET, TRUE);
 		usage(FALSE, HELP_PROPS);
 		return;
+	case RT_DCPU:
+		switch (prop_type) {
+		char *lowp, *highp;
+
+		case PT_NCPUS:
+			lowp = prop_id;
+			if ((highp = strchr(prop_id, '-')) != NULL)
+				*highp++ = '\0';
+			else
+				highp = lowp;
+
+			/* Make sure the input makes sense. */
+			if (!zonecfg_valid_ncpus(lowp, highp)) {
+				zerr(gettext("%s property is out of range."),
+				    pt_to_str(PT_NCPUS));
+				saw_error = TRUE;
+				return;
+			}
+
+			(void) strlcpy(
+			    in_progress_psettab.zone_ncpu_min, lowp,
+			    sizeof (in_progress_psettab.zone_ncpu_min));
+			(void) strlcpy(
+			    in_progress_psettab.zone_ncpu_max, highp,
+			    sizeof (in_progress_psettab.zone_ncpu_max));
+			return;
+		case PT_IMPORTANCE:
+			/* Make sure the value makes sense. */
+			if (!zonecfg_valid_importance(prop_id)) {
+				zerr(gettext("%s property is out of range."),
+				    pt_to_str(PT_IMPORTANCE));
+				saw_error = TRUE;
+				return;
+			}
+
+			(void) strlcpy(in_progress_psettab.zone_importance,
+			    prop_id,
+			    sizeof (in_progress_psettab.zone_importance));
+			return;
+		default:
+			break;
+		}
+		zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE, TRUE);
+		long_usage(CMD_SET, TRUE);
+		usage(FALSE, HELP_PROPS);
+		return;
+	case RT_MCAP:
+		switch (prop_type) {
+		case PT_PHYSICAL:
+			if (!zonecfg_valid_memlimit(prop_id, &mem_cap)) {
+				zerr(gettext("A positive number with a "
+				    "required scale suffix (K, M, G or T) was "
+				    "expected here."));
+				saw_error = TRUE;
+			} else if (mem_cap < ONE_MB) {
+				zerr(gettext("%s value is too small.  It must "
+				    "be at least 1M."), pt_to_str(PT_PHYSICAL));
+				saw_error = TRUE;
+			} else {
+				snprintf(in_progress_mcaptab.zone_physmem_cap,
+				    physmem_size, "%llu", mem_cap);
+			}
+			break;
+		case PT_SWAP:
+			/*
+			 * We have to check if an rctl is allowed here since
+			 * there might already be a rctl defined that blocks
+			 * the alias.
+			 */
+			if (!zonecfg_aliased_rctl_ok(handle, ALIAS_MAXSWAP)) {
+				zone_perror(pt_to_str(PT_MAXSWAP),
+				    Z_ALIAS_DISALLOW, FALSE);
+				saw_error = TRUE;
+				return;
+			}
+
+			if (global_zone)
+				mem_limit = ONE_MB * 100;
+			else
+				mem_limit = ONE_MB * 50;
+
+			if (!zonecfg_valid_memlimit(prop_id, &mem_cap)) {
+				zerr(gettext("A positive number with a "
+				    "required scale suffix (K, M, G or T) was "
+				    "expected here."));
+				saw_error = TRUE;
+			} else if (mem_cap < mem_limit) {
+				char buf[128];
+
+				(void) snprintf(buf, sizeof (buf), "%llu",
+				    mem_limit);
+				bytes_to_units(buf, buf, sizeof (buf));
+				zerr(gettext("%s value is too small.  It must "
+				    "be at least %s."), pt_to_str(PT_SWAP),
+				    buf);
+				saw_error = TRUE;
+			} else {
+				if ((err = zonecfg_set_aliased_rctl(handle,
+				    ALIAS_MAXSWAP, mem_cap)) != Z_OK)
+					zone_perror(zone, err, TRUE);
+				else
+					need_to_commit = TRUE;
+			}
+			break;
+		case PT_LOCKED:
+			/*
+			 * We have to check if an rctl is allowed here since
+			 * there might already be a rctl defined that blocks
+			 * the alias.
+			 */
+			if (!zonecfg_aliased_rctl_ok(handle,
+			    ALIAS_MAXLOCKEDMEM)) {
+				zone_perror(pt_to_str(PT_LOCKED),
+				    Z_ALIAS_DISALLOW, FALSE);
+				saw_error = TRUE;
+				return;
+			}
+
+			if (!zonecfg_valid_memlimit(prop_id, &mem_cap)) {
+				zerr(gettext("A non-negative number with a "
+				    "required scale suffix (K, M, G or T) was "
+				    "expected\nhere."));
+				saw_error = TRUE;
+			} else {
+				if ((err = zonecfg_set_aliased_rctl(handle,
+				    ALIAS_MAXLOCKEDMEM, mem_cap)) != Z_OK)
+					zone_perror(zone, err, TRUE);
+				else
+					need_to_commit = TRUE;
+			}
+			break;
+		default:
+			zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
+			    TRUE);
+			long_usage(CMD_SET, TRUE);
+			usage(FALSE, HELP_PROPS);
+			return;
+		}
+
+		return;
 	default:
 		zone_perror(rt_to_str(res_type), Z_NO_RESOURCE_TYPE, TRUE);
 		long_usage(CMD_SET, TRUE);
@@ -3110,7 +4183,11 @@ output_prop(FILE *fp, int pnum, char *pval, bool print_notspec)
 
 	if (*pval != '\0') {
 		qstr = quoteit(pval);
-		(void) fprintf(fp, "\t%s: %s\n", pt_to_str(pnum), qstr);
+		if (pnum == PT_SWAP || pnum == PT_LOCKED)
+			(void) fprintf(fp, "\t[%s: %s]\n", pt_to_str(pnum),
+			    qstr);
+		else
+			(void) fprintf(fp, "\t%s: %s\n", pt_to_str(pnum), qstr);
 		free(qstr);
 	} else if (print_notspec)
 		(void) fprintf(fp, gettext("\t%s not specified\n"),
@@ -3213,6 +4290,20 @@ info_bootargs(zone_dochandle_t handle, FILE *fp)
 }
 
 static void
+info_sched(zone_dochandle_t handle, FILE *fp)
+{
+	char sched[MAXNAMELEN];
+	int err;
+
+	if ((err = zonecfg_get_sched_class(handle, sched, sizeof (sched)))
+	    == Z_OK) {
+		(void) fprintf(fp, "%s: %s\n", pt_to_str(PT_SCHED), sched);
+	} else {
+		zone_perror(zone, err, TRUE);
+	}
+}
+
+static void
 output_fs(FILE *fp, struct zone_fstab *fstab)
 {
 	zone_fsopt_t *this;
@@ -3499,7 +4590,7 @@ info_ds(zone_dochandle_t handle, FILE *fp, cmd_t *cmd)
 	struct zone_dstab lookup, user;
 	bool output = FALSE;
 
-	if (zonecfg_setdevent(handle) != Z_OK)
+	if (zonecfg_setdsent(handle) != Z_OK)
 		return;
 	while (zonecfg_getdsent(handle, &lookup) == Z_OK) {
 		if (cmd->cmd_prop_nv_pairs == 0) {
@@ -3525,12 +4616,132 @@ info_ds(zone_dochandle_t handle, FILE *fp, cmd_t *cmd)
 		    rt_to_str(RT_DATASET));
 }
 
+static void
+output_pset(FILE *fp, struct zone_psettab *psettab)
+{
+	(void) fprintf(fp, "%s:\n", rt_to_str(RT_DCPU));
+	if (strcmp(psettab->zone_ncpu_min, psettab->zone_ncpu_max) == 0)
+		(void) fprintf(fp, "\t%s: %s\n", pt_to_str(PT_NCPUS),
+		    psettab->zone_ncpu_max);
+	else
+		(void) fprintf(fp, "\t%s: %s-%s\n", pt_to_str(PT_NCPUS),
+		    psettab->zone_ncpu_min, psettab->zone_ncpu_max);
+	if (psettab->zone_importance[0] != '\0')
+		(void) fprintf(fp, "\t%s: %s\n", pt_to_str(PT_IMPORTANCE),
+		    psettab->zone_importance);
+}
+
+static void
+info_pset(zone_dochandle_t handle, FILE *fp)
+{
+	struct zone_psettab lookup;
+
+	if (zonecfg_getpsetent(handle, &lookup) == Z_OK)
+		output_pset(fp, &lookup);
+}
+
+static void
+info_aliased_rctl(zone_dochandle_t handle, FILE *fp, char *alias)
+{
+	uint64_t limit;
+
+	if (zonecfg_get_aliased_rctl(handle, alias, &limit) == Z_OK) {
+		/* convert memory based properties */
+		if (strcmp(alias, ALIAS_MAXSHMMEM) == 0) {
+			char buf[128];
+
+			(void) snprintf(buf, sizeof (buf), "%llu", limit);
+			bytes_to_units(buf, buf, sizeof (buf));
+			(void) fprintf(fp, "[%s: %s]\n", alias, buf);
+			return;
+		}
+
+		(void) fprintf(fp, "[%s: %llu]\n", alias, limit);
+	}
+}
+
+static void
+bytes_to_units(char *str, char *buf, int bufsize)
+{
+	unsigned long long num;
+	unsigned long long save = 0;
+	char *units = "BKMGT";
+	char *up = units;
+
+	num = strtoll(str, NULL, 10);
+
+	if (num < 1024) {
+		(void) snprintf(buf, bufsize, "%llu", num);
+		return;
+	}
+
+	while ((num >= 1024) && (*up != 'T')) {
+		up++; /* next unit of measurement */
+		save = num;
+		num = (num + 512) >> 10;
+	}
+
+	/* check if we should output a fraction.  snprintf will round for us */
+	if (save % 1024 != 0 && ((save >> 10) < 10))
+		(void) snprintf(buf, bufsize, "%2.1f%c", ((float)save / 1024),
+		    *up);
+	else
+		(void) snprintf(buf, bufsize, "%llu%c", num, *up);
+}
+
+static void
+output_mcap(FILE *fp, struct zone_mcaptab *mcaptab, int showswap,
+    uint64_t maxswap, int showlocked, uint64_t maxlocked)
+{
+	char buf[128];
+
+	(void) fprintf(fp, "%s:\n", rt_to_str(RT_MCAP));
+	if (mcaptab->zone_physmem_cap[0] != '\0') {
+		bytes_to_units(mcaptab->zone_physmem_cap, buf, sizeof (buf));
+		output_prop(fp, PT_PHYSICAL, buf, B_TRUE);
+	}
+
+	if (showswap == Z_OK) {
+		(void) snprintf(buf, sizeof (buf), "%llu", maxswap);
+		bytes_to_units(buf, buf, sizeof (buf));
+		output_prop(fp, PT_SWAP, buf, B_TRUE);
+	}
+
+	if (showlocked == Z_OK) {
+		(void) snprintf(buf, sizeof (buf), "%llu", maxlocked);
+		bytes_to_units(buf, buf, sizeof (buf));
+		output_prop(fp, PT_LOCKED, buf, B_TRUE);
+	}
+}
+
+static void
+info_mcap(zone_dochandle_t handle, FILE *fp)
+{
+	int res1, res2, res3;
+	uint64_t swap_limit;
+	uint64_t locked_limit;
+	struct zone_mcaptab lookup;
+
+	bzero(&lookup, sizeof (lookup));
+	res1 = zonecfg_getmcapent(handle, &lookup);
+	res2 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP, &swap_limit);
+	res3 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM,
+	    &locked_limit);
+
+	if (res1 == Z_OK || res2 == Z_OK || res3 == Z_OK)
+		output_mcap(fp, &lookup, res2, swap_limit, res3, locked_limit);
+}
+
 void
 info_func(cmd_t *cmd)
 {
 	FILE *fp = stdout;
 	bool need_to_close = FALSE;
 	char *pager;
+	int type;
+	int res1, res2;
+	uint64_t swap_limit;
+	uint64_t locked_limit;
 
 	assert(cmd != NULL);
 
@@ -3569,26 +4780,68 @@ info_func(cmd_t *cmd)
 		case RT_DATASET:
 			output_ds(fp, &in_progress_dstab);
 			break;
+		case RT_DCPU:
+			output_pset(fp, &in_progress_psettab);
+			break;
+		case RT_MCAP:
+			res1 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP,
+			    &swap_limit);
+			res2 = zonecfg_get_aliased_rctl(handle,
+			    ALIAS_MAXLOCKEDMEM, &locked_limit);
+			output_mcap(fp, &in_progress_mcaptab, res1, swap_limit,
+			    res2, locked_limit);
+			break;
 		}
 		goto cleanup;
 	}
 
+	type = cmd->cmd_res_type;
+
+	if (gz_invalid_rt_property(type)) {
+		zerr(gettext("%s is not a valid property for the global zone."),
+		    rt_to_str(type));
+		goto cleanup;
+	}
+
+	if (gz_invalid_resource(type)) {
+		zerr(gettext("%s is not a valid resource for the global zone."),
+		    rt_to_str(type));
+		goto cleanup;
+	}
+
 	switch (cmd->cmd_res_type) {
 	case RT_UNKNOWN:
 		info_zonename(handle, fp);
-		info_zonepath(handle, fp);
-		info_brand(handle, fp);
-		info_autoboot(handle, fp);
-		info_bootargs(handle, fp);
+		if (!global_zone) {
+			info_zonepath(handle, fp);
+			info_brand(handle, fp);
+			info_autoboot(handle, fp);
+			info_bootargs(handle, fp);
+		}
 		info_pool(handle, fp);
-		info_limitpriv(handle, fp);
-		info_ipd(handle, fp, cmd);
-		info_fs(handle, fp, cmd);
-		info_net(handle, fp, cmd);
-		info_dev(handle, fp, cmd);
+		if (!global_zone) {
+			info_limitpriv(handle, fp);
+			info_sched(handle, fp);
+		}
+		info_aliased_rctl(handle, fp, ALIAS_MAXLWPS);
+		info_aliased_rctl(handle, fp, ALIAS_MAXSHMMEM);
+		info_aliased_rctl(handle, fp, ALIAS_MAXSHMIDS);
+		info_aliased_rctl(handle, fp, ALIAS_MAXMSGIDS);
+		info_aliased_rctl(handle, fp, ALIAS_MAXSEMIDS);
+		info_aliased_rctl(handle, fp, ALIAS_SHARES);
+		if (!global_zone) {
+			info_ipd(handle, fp, cmd);
+			info_fs(handle, fp, cmd);
+			info_net(handle, fp, cmd);
+			info_dev(handle, fp, cmd);
+		}
+		info_pset(handle, fp);
+		info_mcap(handle, fp);
+		if (!global_zone) {
+			info_attr(handle, fp, cmd);
+			info_ds(handle, fp, cmd);
+		}
 		info_rctl(handle, fp, cmd);
-		info_attr(handle, fp, cmd);
-		info_ds(handle, fp, cmd);
 		break;
 	case RT_ZONENAME:
 		info_zonename(handle, fp);
@@ -3611,6 +4864,27 @@ info_func(cmd_t *cmd)
 	case RT_BOOTARGS:
 		info_bootargs(handle, fp);
 		break;
+	case RT_SCHED:
+		info_sched(handle, fp);
+		break;
+	case RT_MAXLWPS:
+		info_aliased_rctl(handle, fp, ALIAS_MAXLWPS);
+		break;
+	case RT_MAXSHMMEM:
+		info_aliased_rctl(handle, fp, ALIAS_MAXSHMMEM);
+		break;
+	case RT_MAXSHMIDS:
+		info_aliased_rctl(handle, fp, ALIAS_MAXSHMIDS);
+		break;
+	case RT_MAXMSGIDS:
+		info_aliased_rctl(handle, fp, ALIAS_MAXMSGIDS);
+		break;
+	case RT_MAXSEMIDS:
+		info_aliased_rctl(handle, fp, ALIAS_MAXSEMIDS);
+		break;
+	case RT_SHARES:
+		info_aliased_rctl(handle, fp, ALIAS_SHARES);
+		break;
 	case RT_FS:
 		info_fs(handle, fp, cmd);
 		break;
@@ -3632,6 +4906,12 @@ info_func(cmd_t *cmd)
 	case RT_DATASET:
 		info_ds(handle, fp, cmd);
 		break;
+	case RT_DCPU:
+		info_pset(handle, fp);
+		break;
+	case RT_MCAP:
+		info_mcap(handle, fp);
+		break;
 	default:
 		zone_perror(rt_to_str(cmd->cmd_res_type), Z_NO_RESOURCE_TYPE,
 		    TRUE);
@@ -3765,10 +5045,13 @@ verify_func(cmd_t *cmd)
 	struct zone_attrtab attrtab;
 	struct zone_rctltab rctltab;
 	struct zone_dstab dstab;
+	struct zone_psettab psettab;
 	char zonepath[MAXPATHLEN];
+	char sched[MAXNAMELEN];
 	char brand[MAXNAMELEN];
 	int err, ret_val = Z_OK, arg;
 	bool save = FALSE;
+	boolean_t has_cpu_shares = B_FALSE;
 
 	optind = 0;
 	if ((arg = getopt(cmd->cmd_argc, cmd->cmd_argv, "?")) != EOF) {
@@ -3796,12 +5079,13 @@ verify_func(cmd_t *cmd)
 	if (initialize(TRUE) != Z_OK)
 		return;
 
-	if (zonecfg_get_zonepath(handle, zonepath, sizeof (zonepath)) != Z_OK) {
+	if (zonecfg_get_zonepath(handle, zonepath, sizeof (zonepath)) != Z_OK &&
+	    !global_zone) {
 		zerr(gettext("%s not specified"), pt_to_str(PT_ZONEPATH));
 		ret_val = Z_REQD_RESOURCE_MISSING;
 		saw_error = TRUE;
 	}
-	if (strlen(zonepath) == 0) {
+	if (strlen(zonepath) == 0 && !global_zone) {
 		zerr(gettext("%s cannot be empty."), pt_to_str(PT_ZONEPATH));
 		ret_val = Z_REQD_RESOURCE_MISSING;
 		saw_error = TRUE;
@@ -3861,6 +5145,9 @@ verify_func(cmd_t *cmd)
 		check_reqd_prop(rctltab.zone_rctl_name, RT_RCTL, PT_NAME,
 		    &ret_val);
 
+		if (strcmp(rctltab.zone_rctl_name, "zone.cpu-shares") == 0)
+			has_cpu_shares = B_TRUE;
+
 		if (rctltab.zone_rctl_valptr == NULL) {
 			zerr(gettext("%s: no %s specified"),
 			    rt_to_str(RT_RCTL), pt_to_str(PT_VALUE));
@@ -3873,6 +5160,25 @@ verify_func(cmd_t *cmd)
 	}
 	(void) zonecfg_endrctlent(handle);
 
+	if (zonecfg_lookup_pset(handle, &psettab) == Z_OK && has_cpu_shares) {
+		zerr(gettext("%s zone.cpu-shares and %s are incompatible."),
+		    rt_to_str(RT_RCTL), rt_to_str(RT_DCPU));
+		saw_error = TRUE;
+		if (ret_val == Z_OK)
+			ret_val = Z_INCOMPATIBLE;
+	}
+
+	if (has_cpu_shares && zonecfg_get_sched_class(handle, sched,
+	    sizeof (sched)) == Z_OK && strlen(sched) > 0 &&
+	    strcmp(sched, "FSS") != 0) {
+		zerr(gettext("WARNING: %s zone.cpu-shares and %s=%s are "
+		    "incompatible"),
+		    rt_to_str(RT_RCTL), rt_to_str(RT_SCHED), sched);
+		saw_error = TRUE;
+		if (ret_val == Z_OK)
+			ret_val = Z_INCOMPATIBLE;
+	}
+
 	if ((err = zonecfg_setattrent(handle)) != Z_OK) {
 		zone_perror(zone, err, TRUE);
 		return;
@@ -4061,7 +5367,9 @@ end_func(cmd_t *cmd)
 	struct zone_rctltab tmp_rctltab;
 	struct zone_attrtab tmp_attrtab;
 	struct zone_dstab tmp_dstab;
-	int err, arg;
+	int err, arg, res1, res2, res3;
+	uint64_t swap_limit;
+	uint64_t locked_limit;
 
 	assert(cmd != NULL);
 
@@ -4361,6 +5669,73 @@ end_func(cmd_t *cmd)
 			    &in_progress_dstab);
 		}
 		break;
+	case RT_DCPU:
+		/* Make sure everything was filled in. */
+		if (end_check_reqd(in_progress_psettab.zone_ncpu_min,
+		    PT_NCPUS, &validation_failed) != Z_OK) {
+			saw_error = TRUE;
+			return;
+		}
+
+		if (end_op == CMD_ADD) {
+			err = zonecfg_add_pset(handle, &in_progress_psettab);
+		} else {
+			err = zonecfg_modify_pset(handle, &in_progress_psettab);
+		}
+		break;
+	case RT_MCAP:
+		/* Make sure everything was filled in. */
+		res1 = strlen(in_progress_mcaptab.zone_physmem_cap) == 0 ?
+		    Z_ERR : Z_OK;
+		res2 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP,
+		    &swap_limit);
+		res3 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM,
+		    &locked_limit);
+
+		if (res1 != Z_OK && res2 != Z_OK && res3 != Z_OK) {
+			zerr(gettext("No property was specified.  One of %s, "
+			    "%s or %s is required."), pt_to_str(PT_PHYSICAL),
+			    pt_to_str(PT_SWAP), pt_to_str(PT_LOCKED));
+			saw_error = TRUE;
+			return;
+		}
+
+		/* if phys & locked are both set, verify locked <= phys */
+		if (res1 == Z_OK && res3 == Z_OK) {
+			uint64_t phys_limit;
+			char *endp;
+
+			phys_limit = strtoull(
+			    in_progress_mcaptab.zone_physmem_cap, &endp, 10);
+			if (phys_limit < locked_limit) {
+				zerr(gettext("The %s cap must be less than or "
+				    "equal to the %s cap."),
+				    pt_to_str(PT_LOCKED),
+				    pt_to_str(PT_PHYSICAL));
+				saw_error = TRUE;
+				return;
+			}
+		}
+
+		err = Z_OK;
+		if (res1 == Z_OK) {
+			/*
+			 * We could be ending from either an add operation
+			 * or a select operation.  Since all of the properties
+			 * within this resource are optional, we always use
+			 * modify on the mcap entry.  zonecfg_modify_mcap()
+			 * will handle both adding and modifying a memory cap.
+			 */
+			err = zonecfg_modify_mcap(handle, &in_progress_mcaptab);
+		} else if (end_op == CMD_SELECT) {
+			/*
+			 * If we're ending from a select and the physical
+			 * memory cap is empty then the user could have cleared
+			 * the physical cap value, so try to delete the entry.
+			 */
+			(void) zonecfg_delete_mcap(handle);
+		}
+		break;
 	default:
 		zone_perror(rt_to_str(resource_scope), Z_NO_RESOURCE_TYPE,
 		    TRUE);
@@ -4885,7 +6260,9 @@ main(int argc, char *argv[])
 			zonecfg_set_root(optarg);
 			break;
 		case 'z':
-			if (zonecfg_validate_zonename(optarg) != Z_OK) {
+			if (strcmp(optarg, GLOBAL_ZONENAME) == 0) {
+				global_zone = TRUE;
+			} else if (zonecfg_validate_zonename(optarg) != Z_OK) {
 				zone_perror(optarg, Z_BOGUS_ZONE_NAME, TRUE);
 				usage(FALSE, HELP_SYNTAX);
 				exit(Z_USAGE);
diff --git a/usr/src/cmd/zonecfg/zonecfg.h b/usr/src/cmd/zonecfg/zonecfg.h
index 6e153d40c1..64808e9623 100644
--- a/usr/src/cmd/zonecfg/zonecfg.h
+++ b/usr/src/cmd/zonecfg/zonecfg.h
@@ -50,19 +50,20 @@ typedef int bool;
 
 #define	CMD_ADD		0
 #define	CMD_CANCEL	1
-#define	CMD_COMMIT	2
-#define	CMD_CREATE	3
-#define	CMD_DELETE	4
-#define	CMD_END		5
-#define	CMD_EXIT	6
-#define	CMD_EXPORT	7
-#define	CMD_HELP	8
-#define	CMD_INFO	9
-#define	CMD_REMOVE	10
-#define	CMD_REVERT	11
-#define	CMD_SELECT	12
-#define	CMD_SET		13
-#define	CMD_VERIFY	14
+#define	CMD_CLEAR	2
+#define	CMD_COMMIT	3
+#define	CMD_CREATE	4
+#define	CMD_DELETE	5
+#define	CMD_END		6
+#define	CMD_EXIT	7
+#define	CMD_EXPORT	8
+#define	CMD_HELP	9
+#define	CMD_INFO	10
+#define	CMD_REMOVE	11
+#define	CMD_REVERT	12
+#define	CMD_SELECT	13
+#define	CMD_SET		14
+#define	CMD_VERIFY	15
 
 #define	CMD_MIN		CMD_ADD
 #define	CMD_MAX		CMD_VERIFY
@@ -83,9 +84,18 @@ typedef int bool;
 #define	RT_LIMITPRIV	12	/* really a property, but for info ... */
 #define	RT_BOOTARGS	13	/* really a property, but for info ... */
 #define	RT_BRAND	14	/* really a property, but for info ... */
+#define	RT_DCPU		15
+#define	RT_MCAP		16
+#define	RT_MAXLWPS	17	/* really a rctl alias property, but for info */
+#define	RT_MAXSHMMEM	18	/* really a rctl alias property, but for info */
+#define	RT_MAXSHMIDS	19	/* really a rctl alias property, but for info */
+#define	RT_MAXMSGIDS	20	/* really a rctl alias property, but for info */
+#define	RT_MAXSEMIDS	21	/* really a rctl alias property, but for info */
+#define	RT_SHARES	22	/* really a rctl alias property, but for info */
+#define	RT_SCHED	23	/* really a property, but for info ... */
 
 #define	RT_MIN		RT_UNKNOWN
-#define	RT_MAX		RT_BRAND
+#define	RT_MAX		RT_SCHED
 
 /* property types: increment PT_MAX when expanding this list */
 #define	PT_UNKNOWN	0
@@ -109,9 +119,22 @@ typedef int bool;
 #define	PT_LIMITPRIV	18
 #define	PT_BOOTARGS	19
 #define	PT_BRAND	20
+#define	PT_NCPUS	21
+#define	PT_IMPORTANCE	22
+#define	PT_SWAP		23
+#define	PT_LOCKED	24
+#define	PT_SHARES	25
+#define	PT_MAXLWPS	26
+#define	PT_MAXSHMMEM	27
+#define	PT_MAXSHMIDS	28
+#define	PT_MAXMSGIDS	29
+#define	PT_MAXSEMIDS	30
+#define	PT_MAXLOCKEDMEM	31
+#define	PT_MAXSWAP	32
+#define	PT_SCHED	33
 
 #define	PT_MIN		PT_UNKNOWN
-#define	PT_MAX		PT_BRAND
+#define	PT_MAX		PT_SCHED
 
 #define	MAX_EQ_PROP_PAIRS	3
 
@@ -184,6 +207,7 @@ extern void revert_func(cmd_t *);
 extern void select_func(cmd_t *);
 extern void set_func(cmd_t *);
 extern void verify_func(cmd_t *);
+extern void clear_func(cmd_t *);
 
 extern cmd_t *alloc_cmd(void);
 extern complex_property_ptr_t alloc_complex(void);
diff --git a/usr/src/cmd/zonecfg/zonecfg_grammar.y b/usr/src/cmd/zonecfg/zonecfg_grammar.y
index dc391da0b9..5c0dc2263e 100644
--- a/usr/src/cmd/zonecfg/zonecfg_grammar.y
+++ b/usr/src/cmd/zonecfg/zonecfg_grammar.y
@@ -60,15 +60,17 @@ extern void yyerror(char *s);
 %token COMMIT REVERT EXIT SEMICOLON TOKEN ZONENAME ZONEPATH AUTOBOOT POOL NET
 %token FS IPD ATTR DEVICE RCTL SPECIAL RAW DIR OPTIONS TYPE ADDRESS PHYSICAL
 %token NAME MATCH PRIV LIMIT ACTION VALUE EQUAL OPEN_SQ_BRACKET CLOSE_SQ_BRACKET
-%token OPEN_PAREN CLOSE_PAREN COMMA DATASET LIMITPRIV BOOTARGS BRAND
+%token OPEN_PAREN CLOSE_PAREN COMMA DATASET LIMITPRIV BOOTARGS BRAND PSET
+%token MCAP NCPUS IMPORTANCE SHARES MAXLWPS MAXSHMMEM MAXSHMIDS MAXMSGIDS
+%token MAXSEMIDS LOCKED SWAP SCHED CLEAR
 
 %type <strval> TOKEN EQUAL OPEN_SQ_BRACKET CLOSE_SQ_BRACKET
     property_value OPEN_PAREN CLOSE_PAREN COMMA simple_prop_val
 %type <complex> complex_piece complex_prop_val
-%type <ival> resource_type NET FS IPD DEVICE RCTL ATTR
+%type <ival> resource_type NET FS IPD DEVICE RCTL ATTR DATASET PSET MCAP
 %type <ival> property_name SPECIAL RAW DIR OPTIONS TYPE ADDRESS PHYSICAL NAME
     MATCH ZONENAME ZONEPATH AUTOBOOT POOL LIMITPRIV BOOTARGS VALUE PRIV LIMIT
-    ACTION BRAND
+    ACTION BRAND SCHED
 %type <cmd> command
 %type <cmd> add_command ADD
 %type <cmd> cancel_command CANCEL
@@ -84,6 +86,7 @@ extern void yyerror(char *s);
 %type <cmd> revert_command REVERT
 %type <cmd> select_command SELECT
 %type <cmd> set_command SET
+%type <cmd> clear_command CLEAR
 %type <cmd> verify_command VERIFY
 %type <cmd> terminator
 
@@ -126,6 +129,7 @@ commands: command terminator
 
 command: add_command
 	| cancel_command
+	| clear_command
 	| create_command
 	| commit_command
 	| delete_command
@@ -465,6 +469,69 @@ info_command:	INFO
 		$$->cmd_res_type = RT_BOOTARGS;
 		$$->cmd_prop_nv_pairs = 0;
 	}
+	|	INFO SCHED
+	{
+		if (($$ = alloc_cmd()) == NULL)
+			YYERROR;
+		cmd = $$;
+		$$->cmd_handler = &info_func;
+		$$->cmd_res_type = RT_SCHED;
+		$$->cmd_prop_nv_pairs = 0;
+	}
+	|	INFO SHARES
+	{
+		if (($$ = alloc_cmd()) == NULL)
+			YYERROR;
+		cmd = $$;
+		$$->cmd_handler = &info_func;
+		$$->cmd_res_type = RT_SHARES;
+		$$->cmd_prop_nv_pairs = 0;
+	}
+	|	INFO MAXLWPS
+	{
+		if (($$ = alloc_cmd()) == NULL)
+			YYERROR;
+		cmd = $$;
+		$$->cmd_handler = &info_func;
+		$$->cmd_res_type = RT_MAXLWPS;
+		$$->cmd_prop_nv_pairs = 0;
+	}
+	|	INFO MAXSHMMEM
+	{
+		if (($$ = alloc_cmd()) == NULL)
+			YYERROR;
+		cmd = $$;
+		$$->cmd_handler = &info_func;
+		$$->cmd_res_type = RT_MAXSHMMEM;
+		$$->cmd_prop_nv_pairs = 0;
+	}
+	|	INFO MAXSHMIDS
+	{
+		if (($$ = alloc_cmd()) == NULL)
+			YYERROR;
+		cmd = $$;
+		$$->cmd_handler = &info_func;
+		$$->cmd_res_type = RT_MAXSHMIDS;
+		$$->cmd_prop_nv_pairs = 0;
+	}
+	|	INFO MAXMSGIDS
+	{
+		if (($$ = alloc_cmd()) == NULL)
+			YYERROR;
+		cmd = $$;
+		$$->cmd_handler = &info_func;
+		$$->cmd_res_type = RT_MAXMSGIDS;
+		$$->cmd_prop_nv_pairs = 0;
+	}
+	|	INFO MAXSEMIDS
+	{
+		if (($$ = alloc_cmd()) == NULL)
+			YYERROR;
+		cmd = $$;
+		$$->cmd_handler = &info_func;
+		$$->cmd_res_type = RT_MAXSEMIDS;
+		$$->cmd_prop_nv_pairs = 0;
+	}
 	|	INFO resource_type property_name EQUAL property_value
 	{
 		if (($$ = alloc_cmd()) == NULL)
@@ -512,11 +579,32 @@ remove_command: REMOVE
 		usage(FALSE, HELP_RES_PROPS);
 		YYERROR;
 	}
-	| REMOVE resource_type
+	| REMOVE TOKEN
 	{
 		short_usage(CMD_REMOVE);
+		(void) fputs("\n", stderr);
+		usage(FALSE, HELP_RES_PROPS);
 		YYERROR;
 	}
+	| REMOVE resource_type
+	{
+		if (($$ = alloc_cmd()) == NULL)
+			YYERROR;
+		cmd = $$;
+		$$->cmd_handler = &remove_func;
+		$$->cmd_res_type = $2;
+	}
+	| REMOVE TOKEN resource_type
+	{
+		if (($$ = alloc_cmd()) == NULL)
+			YYERROR;
+		cmd = $$;
+		$$->cmd_handler = &remove_func;
+		$$->cmd_res_type = $3;
+		$$->cmd_argc = 1;
+		$$->cmd_argv[0] = $2;
+		$$->cmd_argv[1] = NULL;
+	}
 	| REMOVE property_name property_value
 	{
 		if (($$ = alloc_cmd()) == NULL)
@@ -594,6 +682,22 @@ select_command: SELECT
 		usage(FALSE, HELP_RES_PROPS);
 		YYERROR;
 	}
+	| SELECT PSET
+	{
+		if (($$ = alloc_cmd()) == NULL)
+			YYERROR;
+		cmd = $$;
+		$$->cmd_handler = &select_func;
+		$$->cmd_res_type = RT_DCPU;
+	}
+	| SELECT MCAP
+	{
+		if (($$ = alloc_cmd()) == NULL)
+			YYERROR;
+		cmd = $$;
+		$$->cmd_handler = &select_func;
+		$$->cmd_res_type = RT_MCAP;
+	}
 	| SELECT resource_type
 	{
 		short_usage(CMD_SELECT);
@@ -682,6 +786,22 @@ set_command: SET
 		$$->cmd_property_ptr[0] = &property[0];
 	}
 
+clear_command: CLEAR
+	{
+		short_usage(CMD_CLEAR);
+		(void) fputs("\n", stderr);
+		usage(FALSE, HELP_PROPS);
+		YYERROR;
+	}
+	| CLEAR property_name
+	{
+		if (($$ = alloc_cmd()) == NULL)
+			YYERROR;
+		cmd = $$;
+		$$->cmd_handler = &clear_func;
+		$$->cmd_res_type = $2;
+	}
+
 verify_command: VERIFY
 	{
 		if (($$ = alloc_cmd()) == NULL)
@@ -709,6 +829,8 @@ resource_type: NET	{ $$ = RT_NET; }
 	| RCTL		{ $$ = RT_RCTL; }
 	| ATTR		{ $$ = RT_ATTR; }
 	| DATASET	{ $$ = RT_DATASET; }
+	| PSET		{ $$ = RT_DCPU; }
+	| MCAP		{ $$ = RT_MCAP; }
 
 property_name: SPECIAL	{ $$ = PT_SPECIAL; }
 	| RAW		{ $$ = PT_RAW; }
@@ -730,6 +852,17 @@ property_name: SPECIAL	{ $$ = PT_SPECIAL; }
 	| LIMIT		{ $$ = PT_LIMIT; }
 	| ACTION	{ $$ = PT_ACTION; }
 	| BRAND		{ $$ = PT_BRAND; }
+	| NCPUS		{ $$ = PT_NCPUS; }
+	| LOCKED	{ $$ = PT_LOCKED; }
+	| SWAP		{ $$ = PT_SWAP; }
+	| IMPORTANCE	{ $$ = PT_IMPORTANCE; }
+	| SHARES	{ $$ = PT_SHARES; }
+	| MAXLWPS	{ $$ = PT_MAXLWPS; }
+	| MAXSHMMEM	{ $$ = PT_MAXSHMMEM; }
+	| MAXSHMIDS	{ $$ = PT_MAXSHMIDS; }
+	| MAXMSGIDS	{ $$ = PT_MAXMSGIDS; }
+	| MAXSEMIDS	{ $$ = PT_MAXSEMIDS; }
+	| SCHED		{ $$ = PT_SCHED; }
 
 /*
  * The grammar builds data structures from the bottom up.  Thus various
diff --git a/usr/src/cmd/zonecfg/zonecfg_lex.l b/usr/src/cmd/zonecfg/zonecfg_lex.l
index aef16edbcb..53f726ca2e 100644
--- a/usr/src/cmd/zonecfg/zonecfg_lex.l
+++ b/usr/src/cmd/zonecfg/zonecfg_lex.l
@@ -40,7 +40,10 @@ extern void yyerror(char *s);
 char *safe_strdup(char *s);
 %}
 
-%a 4000
+%a 6000
+%p 4000
+%e 2000
+%n 1000
 
 %{
 /*
@@ -139,6 +142,12 @@ char *safe_strdup(char *s);
 			return SET;
 		}
 
+<INITIAL>clear {
+			BEGIN TSTATE;
+			state = TSTATE;
+			return CLEAR;
+		}
+
 <INITIAL>verify	{
 			BEGIN TSTATE;
 			state = TSTATE;
@@ -162,6 +171,10 @@ char *safe_strdup(char *s);
 
 <TSTATE>dataset	{ return DATASET; }
 
+<TSTATE>dedicated-cpu	{ return PSET; }
+
+<TSTATE>capped-memory	{ return MCAP; }
+
 <TSTATE>zonepath	{ return ZONEPATH; }
 <CSTATE>zonepath	{ return ZONEPATH; }
 
@@ -219,6 +232,39 @@ char *safe_strdup(char *s);
 <TSTATE>action	{ return ACTION; }
 <CSTATE>action	{ return ACTION; }
 
+<TSTATE>ncpus	{ return NCPUS; }
+<CSTATE>ncpus	{ return NCPUS; }
+
+<TSTATE>locked	{ return LOCKED; }
+<CSTATE>locked	{ return LOCKED; }
+
+<TSTATE>swap	{ return SWAP; }
+<CSTATE>swap	{ return SWAP; }
+
+<TSTATE>importance	{ return IMPORTANCE; }
+<CSTATE>importance	{ return IMPORTANCE; }
+
+<TSTATE>cpu-shares	{ return SHARES; }
+<CSTATE>cpu-shares	{ return SHARES; }
+
+<TSTATE>max-lwps	{ return MAXLWPS; }
+<CSTATE>max-lwps	{ return MAXLWPS; }
+
+<TSTATE>max-shm-memory	{ return MAXSHMMEM; }
+<CSTATE>max-shm-memory	{ return MAXSHMMEM; }
+
+<TSTATE>max-shm-ids	{ return MAXSHMIDS; }
+<CSTATE>max-shm-ids	{ return MAXSHMIDS; }
+
+<TSTATE>max-msg-ids	{ return MAXMSGIDS; }
+<CSTATE>max-msg-ids	{ return MAXMSGIDS; }
+
+<TSTATE>max-sem-ids	{ return MAXSEMIDS; }
+<CSTATE>max-sem-ids	{ return MAXSEMIDS; }
+
+<TSTATE>scheduling-class	{ return SCHED; }
+<CSTATE>scheduling-class	{ return SCHED; }
+
 <TSTATE>=	{ return EQUAL; }
 <LSTATE>=	{ return EQUAL; }
 <CSTATE>=	{ return EQUAL; }
diff --git a/usr/src/head/libzonecfg.h b/usr/src/head/libzonecfg.h
index 3af98c1a6b..10ee4a2bb4 100644
--- a/usr/src/head/libzonecfg.h
+++ b/usr/src/head/libzonecfg.h
@@ -90,6 +90,15 @@ extern "C" {
 #define	Z_PRIV_REQUIRED		38	/* required privilege is missing */
 #define	Z_PRIV_UNKNOWN		39	/* specified privilege is unknown */
 #define	Z_BRAND_ERROR		40	/* brand-specific error */
+#define	Z_INCOMPATIBLE		41	/* incompatible settings */
+#define	Z_ALIAS_DISALLOW	42	/* rctl alias disallowed */
+#define	Z_CLEAR_DISALLOW	43	/* clear property disallowed */
+#define	Z_POOL			44	/* generic libpool error */
+#define	Z_POOLS_NOT_ACTIVE	45	/* pool service not enabled */
+#define	Z_POOL_ENABLE		46	/* pools enable failed */
+#define	Z_NO_POOL		47	/* no such pool configured */
+#define	Z_POOL_CREATE		48	/* pool create failed */
+#define	Z_POOL_BIND		49	/* pool bind failed */
 
 /*
  * Warning: these are shared with the admin/install consolidation.
@@ -126,6 +135,18 @@ extern "C" {
 #define	ZONE_PKG_VERSMAX	256
 
 /*
+ * Shortened alias names for the zones rctls.
+ */
+#define	ALIAS_MAXLWPS		"max-lwps"
+#define	ALIAS_MAXSHMMEM		"max-shm-memory"
+#define	ALIAS_MAXSHMIDS		"max-shm-ids"
+#define	ALIAS_MAXMSGIDS		"max-msg-ids"
+#define	ALIAS_MAXSEMIDS		"max-sem-ids"
+#define	ALIAS_MAXLOCKEDMEM	"locked"
+#define	ALIAS_MAXSWAP		"swap"
+#define	ALIAS_SHARES		"cpu-shares"
+
+/*
  * Bit flag definitions for passing into libzonecfg functions.
  */
 #define	ZONE_DRY_RUN		0x01
@@ -190,6 +211,16 @@ struct zone_dstab {
 	char	zone_dataset_name[MAXNAMELEN];
 };
 
+struct zone_psettab {
+	char	zone_ncpu_min[MAXNAMELEN];
+	char	zone_ncpu_max[MAXNAMELEN];
+	char	zone_importance[MAXNAMELEN];
+};
+
+struct zone_mcaptab {
+	char	zone_physmem_cap[MAXNAMELEN];
+};
+
 struct zone_pkgtab {
 	char	zone_pkg_name[MAXNAMELEN];
 	char	zone_pkg_version[ZONE_PKG_VERSMAX];
@@ -227,10 +258,17 @@ extern	int	zonecfg_access(const char *, int);
 extern	void	zonecfg_set_root(const char *);
 extern	const char *zonecfg_get_root(void);
 extern	boolean_t zonecfg_in_alt_root(void);
+extern	int	zonecfg_num_resources(zone_dochandle_t, char *);
+extern	int	zonecfg_del_all_resources(zone_dochandle_t, char *);
+extern	boolean_t zonecfg_valid_ncpus(char *, char *);
+extern	boolean_t zonecfg_valid_importance(char *);
+extern	int	zonecfg_str_to_bytes(char *, uint64_t *);
+extern	boolean_t zonecfg_valid_memlimit(char *, uint64_t *);
+extern	boolean_t zonecfg_valid_alias_limit(char *, char *, uint64_t *);
 
 /*
- * Zone name, path to zone directory, autoboot setting, pool and boot
- * arguments.
+ * Zone name, path to zone directory, autoboot setting, pool, boot
+ * arguments, and scheduling-class.
  */
 extern	int	zonecfg_validate_zonename(const char *);
 extern	int	zonecfg_get_name(zone_dochandle_t, char *, size_t);
@@ -243,6 +281,9 @@ extern	int	zonecfg_get_pool(zone_dochandle_t, char *, size_t);
 extern	int	zonecfg_set_pool(zone_dochandle_t, char *);
 extern	int	zonecfg_get_bootargs(zone_dochandle_t, char *, size_t);
 extern	int	zonecfg_set_bootargs(zone_dochandle_t, char *);
+extern	int	zonecfg_get_sched_class(zone_dochandle_t, char *, size_t);
+extern	int	zonecfg_set_sched(zone_dochandle_t, char *);
+extern	int	zonecfg_get_dflt_sched_class(zone_dochandle_t, char *, int);
 
 /*
  * Set/retrieve the brand for the zone
@@ -302,6 +343,11 @@ extern	int	zonecfg_add_rctl_value(struct zone_rctltab *,
 extern	int	zonecfg_remove_rctl_value(struct zone_rctltab *,
     struct zone_rctlvaltab *);
 extern	void	zonecfg_free_rctl_value_list(struct zone_rctlvaltab *);
+extern	boolean_t zonecfg_aliased_rctl_ok(zone_dochandle_t, char *);
+extern	int	zonecfg_set_aliased_rctl(zone_dochandle_t, char *, uint64_t);
+extern	int	zonecfg_get_aliased_rctl(zone_dochandle_t, char *, uint64_t *);
+extern	int	zonecfg_rm_aliased_rctl(zone_dochandle_t, char *);
+extern	int	zonecfg_apply_rctls(char *, zone_dochandle_t);
 
 /*
  * Generic attribute configuration and type/value extraction.
@@ -328,6 +374,34 @@ extern	int	zonecfg_modify_ds(zone_dochandle_t, struct zone_dstab *,
 extern	int	zonecfg_lookup_ds(zone_dochandle_t, struct zone_dstab *);
 
 /*
+ * cpu-set configuration.
+ */
+extern	int	zonecfg_add_pset(zone_dochandle_t, struct zone_psettab *);
+extern	int	zonecfg_delete_pset(zone_dochandle_t);
+extern	int	zonecfg_modify_pset(zone_dochandle_t, struct zone_psettab *);
+extern	int	zonecfg_lookup_pset(zone_dochandle_t, struct zone_psettab *);
+
+/*
+ * mem-cap configuration.
+ */
+extern	int	zonecfg_delete_mcap(zone_dochandle_t);
+extern	int	zonecfg_modify_mcap(zone_dochandle_t, struct zone_mcaptab *);
+extern	int	zonecfg_lookup_mcap(zone_dochandle_t, struct zone_mcaptab *);
+
+/*
+ * Temporary pool support functions.
+ */
+extern	int	zonecfg_destroy_tmp_pool(char *, char *, int);
+extern	int	zonecfg_bind_tmp_pool(zone_dochandle_t, zoneid_t, char *, int);
+extern	int	zonecfg_bind_pool(zone_dochandle_t, zoneid_t, char *, int);
+extern	boolean_t zonecfg_warn_poold(zone_dochandle_t);
+
+/*
+ * Miscellaneous utility functions.
+ */
+extern	int	zonecfg_enable_rcapd(char *, int);
+
+/*
  * attach/detach support.
  */
 extern	int	zonecfg_get_attach_handle(const char *, const char *,
@@ -373,6 +447,8 @@ extern	int	zonecfg_endrctlent(zone_dochandle_t);
 extern	int	zonecfg_setdsent(zone_dochandle_t);
 extern	int	zonecfg_getdsent(zone_dochandle_t, struct zone_dstab *);
 extern	int	zonecfg_enddsent(zone_dochandle_t);
+extern	int	zonecfg_getpsetent(zone_dochandle_t, struct zone_psettab *);
+extern	int	zonecfg_getmcapent(zone_dochandle_t, struct zone_mcaptab *);
 extern	int	zonecfg_setpkgent(zone_dochandle_t);
 extern	int	zonecfg_getpkgent(zone_dochandle_t, struct zone_pkgtab *);
 extern	int	zonecfg_endpkgent(zone_dochandle_t);
diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile
index da3bdb3844..c541fcb01c 100644
--- a/usr/src/lib/Makefile
+++ b/usr/src/lib/Makefile
@@ -489,7 +489,7 @@ libldap5:	libsasl libsocket libnsl libmd
 libsldap:	libldap5 libtsol
 libpool:	libnvpair libexacct
 libzonecfg:	libc libsocket libnsl libuuid libnvpair libsysevent libsec \
-		libbrand
+		libbrand libpool libscf
 libproc:	../cmd/sgs/librtld_db ../cmd/sgs/libelf libctf
 libproject:	libpool libproc libsecdb
 libtsnet:	libnsl libtsol libsecdb
diff --git a/usr/src/lib/libc/port/gen/getrusage.c b/usr/src/lib/libc/port/gen/getrusage.c
index c1f1b92188..efeaf0be24 100644
--- a/usr/src/lib/libc/port/gen/getrusage.c
+++ b/usr/src/lib/libc/port/gen/getrusage.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -49,6 +48,7 @@
 #include <sys/param.h>
 #include <errno.h>
 #include <sys/resource.h>
+#include <sys/vm_usage.h>
 #include <fcntl.h>
 #include <sys/fcntl.h>
 #include <procfs.h>
@@ -76,3 +76,10 @@ getrusage(int who, struct rusage *rusage)
 		return (-1);
 	}
 }
+
+int
+getvmusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres)
+{
+	return (syscall(SYS_rusagesys, _RUSAGESYS_GETVMUSAGE, flags, age,
+	    buf, nres));
+}
diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers
index 22227a6413..8e1b399567 100644
--- a/usr/src/lib/libc/port/mapfile-vers
+++ b/usr/src/lib/libc/port/mapfile-vers
@@ -59,6 +59,7 @@ SUNW_1.23 {		# SunOS 5.11 (Solaris 11)
 	fdatasync;
 	forkallx;
 	forkx;
+	getvmusage;
 	lio_listio;
 	mkdtemp;
 	_mkdtemp;
diff --git a/usr/src/lib/libpool/common/pool.c b/usr/src/lib/libpool/common/pool.c
index 167cd8be5b..6fbd7b34d3 100644
--- a/usr/src/lib/libpool/common/pool.c
+++ b/usr/src/lib/libpool/common/pool.c
@@ -914,10 +914,34 @@ pool_put_property(pool_conf_t *conf, pool_elem_t *pe, const char *name,
 		return (NULL);
 	}
 
-	if (!is_valid_prop_name(name)) {
+	/* Don't allow (re)setting of the "temporary" property */
+	if (!is_valid_prop_name(name) || strstr(name, ".temporary") != NULL) {
 		pool_seterror(POE_BADPARAM);
 		return (PO_FAIL);
 	}
+
+	/* Don't allow rename of temporary pools/resources */
+	if (strstr(name, ".name") != NULL && elem_is_tmp(pe)) {
+		boolean_t rename = B_TRUE;
+		pool_value_t *pv = pool_value_alloc();
+
+		if (pe->pe_get_prop(pe, name, pv) != POC_INVAL) {
+			const char *s1 = NULL;
+			const char *s2 = NULL;
+
+			(void) pool_value_get_string(pv, &s1);
+			(void) pool_value_get_string(val, &s2);
+			if (s1 != NULL && s2 != NULL && strcmp(s1, s2) == 0)
+				rename = B_FALSE;
+		}
+		pool_value_free(pv);
+
+		if (rename) {
+			pool_seterror(POE_BADPARAM);
+			return (PO_FAIL);
+		}
+	}
+
 	/*
 	 * Check to see if this is a property we are managing. If it is,
 	 * ensure that we are happy with what the user is doing.
@@ -936,6 +960,46 @@ pool_put_property(pool_conf_t *conf, pool_elem_t *pe, const char *name,
 }
 
 /*
+ * Set temporary property to flag as a temporary element.
+ *
+ * PO_FAIL is returned if an error is detected and the error code is updated
+ * to indicate the cause of the error.
+ */
+int
+pool_set_temporary(pool_conf_t *conf, pool_elem_t *pe)
+{
+	int res;
+	char name[128];
+	pool_value_t *val;
+
+	if (pool_conf_check(conf) != PO_SUCCESS)
+		return (PO_FAIL);
+
+	if (TO_CONF(pe) != conf) {
+		pool_seterror(POE_BADPARAM);
+		return (PO_FAIL);
+	}
+
+	/* create property name based on element type */
+	if (snprintf(name, sizeof (name), "%s.temporary",
+	    pool_elem_class_string(pe)) > sizeof (name)) {
+		pool_seterror(POE_SYSTEM);
+		return (PO_FAIL);
+	}
+
+	if ((val = pool_value_alloc()) == NULL)
+		return (PO_FAIL);
+
+	pool_value_set_bool(val, (uchar_t)1);
+
+	res = pe->pe_put_prop(pe, name, val);
+
+	pool_value_free(val);
+
+	return (res);
+}
+
+/*
  * Update the specified property value with the namespace prepended.
  * e.g. If this function is used to update the property "name" on a pool, it
  * will attempt to update "pool.name".
@@ -1030,6 +1094,12 @@ pool_rm_property(pool_conf_t *conf, pool_elem_t *pe, const char *name)
 		return (NULL);
 	}
 
+	/* Don't allow removal of the "temporary" property */
+	if (strstr(name, ".temporary") != NULL) {
+		pool_seterror(POE_BADPARAM);
+		return (PO_FAIL);
+	}
+
 	/*
 	 * Check to see if this is a property we are managing. If it is,
 	 * ensure that we are happy with what the user is doing.
@@ -1122,6 +1192,17 @@ pool_create(pool_conf_t *conf, const char *name)
 		pool_seterror(POE_PUTPROP);
 		return (NULL);
 	}
+
+	/*
+	 * If we are creating a temporary pool configuration, flag the pool.
+	 */
+	if (conf->pc_prov->pc_oflags & PO_TEMP) {
+		if (pool_set_temporary(conf, pe) == PO_FAIL) {
+			(void) pool_destroy(conf, pool_elem_pool(pe));
+			return (NULL);
+		}
+	}
+
 	return (pool_elem_pool(pe));
 }
 
@@ -1227,6 +1308,17 @@ pool_resource_create(pool_conf_t *conf, const char *sz_type, const char *name)
 			return (NULL);
 		}
 	}
+
+	/*
+	 * If we are creating a temporary pool configuration, flag the resource.
+	 */
+	if (conf->pc_prov->pc_oflags & PO_TEMP) {
+		if (pool_set_temporary(conf, pe) != PO_SUCCESS) {
+			(void) pool_resource_destroy(conf, pool_elem_res(pe));
+			return (NULL);
+		}
+	}
+
 	return (pool_elem_res(pe));
 }
 
@@ -1396,7 +1488,8 @@ pool_conf_open(pool_conf_t *conf, const char *location, int oflags)
 		pool_seterror(POE_BADPARAM);
 		return (PO_FAIL);
 	}
-	if (oflags & ~(PO_RDONLY | PO_RDWR | PO_CREAT | PO_DISCO | PO_UPDATE)) {
+	if (oflags & ~(PO_RDONLY | PO_RDWR | PO_CREAT | PO_DISCO | PO_UPDATE |
+	    PO_TEMP)) {
 		pool_seterror(POE_BADPARAM);
 		return (PO_FAIL);
 	}
@@ -1408,6 +1501,10 @@ pool_conf_open(pool_conf_t *conf, const char *location, int oflags)
 	if (oflags & PO_CREAT)
 		oflags |= PO_RDWR;
 
+	/* location is ignored when creating a temporary configuration */
+	if (oflags & PO_TEMP)
+		location = "";
+
 	if ((conf->pc_location = strdup(location)) == NULL) {
 		pool_seterror(POE_SYSTEM);
 		return (PO_FAIL);
@@ -1415,14 +1512,25 @@ pool_conf_open(pool_conf_t *conf, const char *location, int oflags)
 	/*
 	 * This is the crossover point into the actual data provider
 	 * implementation, allocate a data provider of the appropriate
-	 * type for your data storage medium. In this case it's a kernel
-	 * data provider. To use a different data provider, write some
-	 * code to implement all the required interfaces and then
-	 * change the next line to allocate a data provider which uses your
-	 * new code. All data provider routines can be static, apart from
-	 * the allocation routine.
+	 * type for your data storage medium. In this case it's either a kernel
+	 * or xml data provider. To use a different data provider, write some
+	 * code to implement all the required interfaces and then change the
+	 * following code to allocate a data provider which uses your new code.
+	 * All data provider routines can be static, apart from the allocation
+	 * routine.
+	 *
+	 * For temporary pools (PO_TEMP) we start with a copy of the current
+	 * dynamic configuration and do all of the updates in-memory.
 	 */
-	if (strcmp(location, pool_dynamic_location()) == 0) {
+	if (oflags & PO_TEMP) {
+		if (pool_knl_connection_alloc(conf, PO_TEMP) != PO_SUCCESS) {
+			conf->pc_state = POF_INVALID;
+			return (PO_FAIL);
+		}
+		/* set rdwr flag so we can updated the in-memory config. */
+		conf->pc_prov->pc_oflags |= PO_RDWR;
+
+	} else if (strcmp(location, pool_dynamic_location()) == 0) {
 		if (pool_knl_connection_alloc(conf, oflags) != PO_SUCCESS) {
 			conf->pc_state = POF_INVALID;
 			return (PO_FAIL);
diff --git a/usr/src/lib/libpool/common/pool.h b/usr/src/lib/libpool/common/pool.h
index d38e9902e6..ee11aadb7b 100644
--- a/usr/src/lib/libpool/common/pool.h
+++ b/usr/src/lib/libpool/common/pool.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -96,6 +95,7 @@ extern uint_t pool_version(uint_t ver);
 #define	PO_CREAT		0x2
 #define	PO_DISCO		0x4
 #define	PO_UPDATE		0x8
+#define	PO_TEMP			0x10
 
 /* Allocation policy */
 #define	POA_IMPORTANCE		"importance based"
@@ -218,6 +218,7 @@ extern pool_value_class_t pool_get_property(const pool_conf_t *,
 extern int pool_put_property(pool_conf_t *, pool_elem_t *, const char *,
     const pool_value_t *);
 extern int pool_rm_property(pool_conf_t *, pool_elem_t *, const char *);
+
 /*
  * Walk the associated properties of the supplied element calling the supplied
  * function for each property in turn. There is no implied order in the walk.
diff --git a/usr/src/lib/libpool/common/pool_commit.c b/usr/src/lib/libpool/common/pool_commit.c
index 1ea4808377..b996524b98 100644
--- a/usr/src/lib/libpool/common/pool_commit.c
+++ b/usr/src/lib/libpool/common/pool_commit.c
@@ -245,6 +245,9 @@ commit_delete(pool_elem_t *pe)
 	pool_t *pool;
 	int ret = 0;
 
+	if (elem_is_tmp(pe))
+		return (PO_SUCCESS);
+
 	switch (pool_elem_class(pe)) {
 	case PEC_SYSTEM:	/* NO-OP */
 		break;
@@ -1306,7 +1309,14 @@ clone_element(pool_conf_t *conf, pool_elem_t *pe, const char *name,
 	if ((prop = provider_get_prop(pe, name)) != NULL &&
 	    prop_is_readonly(prop) == PO_TRUE)
 		return (PO_SUCCESS);
-	return (pool_put_property(TO_CONF(tgt), tgt, name, pv) == PO_FAIL);
+
+	/* The temporary property needs special handling */
+	if (strstr(name, ".temporary") != NULL)
+		return (pool_set_temporary(TO_CONF(tgt), tgt) ==
+		    PO_FAIL ?  PO_FAIL : PO_SUCCESS);
+	else
+		return (pool_put_property(TO_CONF(tgt), tgt, name, pv) ==
+		    PO_FAIL ? PO_FAIL : PO_SUCCESS);
 }
 
 /*
@@ -1322,8 +1332,9 @@ clean_element(pool_conf_t *conf, pool_elem_t *pe, const char *name,
 	/*
 	 * Some properties should be ignored
 	 */
-	if ((prop = provider_get_prop(pe, name)) != NULL &&
-	    prop_is_optional(prop) == PO_FALSE)
+	if (strstr(name, ".temporary") != NULL ||
+	    ((prop = provider_get_prop(pe, name)) != NULL &&
+	    prop_is_optional(prop) == PO_FALSE))
 		return (PO_SUCCESS);
 	return (pool_rm_property(conf, (pool_elem_t *)pe, name) == PO_FAIL);
 }
diff --git a/usr/src/lib/libpool/common/pool_internal.c b/usr/src/lib/libpool/common/pool_internal.c
index 210e63d620..5e572f6eaf 100644
--- a/usr/src/lib/libpool/common/pool_internal.c
+++ b/usr/src/lib/libpool/common/pool_internal.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -1143,6 +1142,23 @@ elem_is_default(const pool_elem_t *res)
 }
 
 /*
+ * Return B_TRUE if the element has the 'temporary' property set.
+ */
+boolean_t
+elem_is_tmp(const pool_elem_t *elem)
+{
+	pool_value_t val = POOL_VALUE_INITIALIZER;
+	uchar_t bval;
+
+	if (pool_get_ns_property(elem, "temporary", &val) != POC_BOOL)
+		return (B_FALSE);
+
+	(void) pool_value_get_bool(&val, &bval);
+
+	return (bval != 0);
+}
+
+/*
  * get_default_elem() returns the default elem for type of the supplied
  * elem.
  *
diff --git a/usr/src/lib/libpool/common/pool_internal.h b/usr/src/lib/libpool/common/pool_internal.h
index 592c98d11d..e172d23af4 100644
--- a/usr/src/lib/libpool/common/pool_internal.h
+++ b/usr/src/lib/libpool/common/pool_internal.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -256,6 +255,7 @@ extern int		resource_get_pinned(const pool_resource_t *,
 extern char		*elem_get_name(const pool_elem_t *);
 extern id_t		elem_get_sysid(const pool_elem_t *);
 extern int    		elem_is_default(const pool_elem_t *);
+extern boolean_t	elem_is_tmp(const pool_elem_t *);
 extern const pool_elem_t *get_default_elem(const pool_elem_t *);
 extern int		qsort_elem_compare(const void *, const void *);
 
@@ -371,6 +371,7 @@ extern pool_value_class_t pool_get_ns_property(const pool_elem_t *,
 extern int		pool_walk_any_properties(pool_conf_t *, pool_elem_t *,
     void *, int (*)(pool_conf_t *, pool_elem_t *, const char *,
     pool_value_t *, void *), int);
+extern int		pool_set_temporary(pool_conf_t *, pool_elem_t *);
 
 /*
  * Namespace aware utility functions.
diff --git a/usr/src/lib/libpool/common/pool_kernel.c b/usr/src/lib/libpool/common/pool_kernel.c
index f84d6f2ba5..3da4f0263c 100644
--- a/usr/src/lib/libpool/common/pool_kernel.c
+++ b/usr/src/lib/libpool/common/pool_kernel.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -646,10 +645,14 @@ pool_knl_close(pool_conf_t *conf)
 	}
 	/*
 	 * Rollback any pending changes before freeing the prov. This
-	 * ensures there are no memory leaks from pending
-	 * transactions.
+	 * ensures there are no memory leaks from pending transactions.
+	 * However, don't rollback when we've done a temporary pool since the
+	 * pool/resources haven't really been committed in this case.
+	 * They will all be freed in pool_knl_connection_free and we don't
+	 * want to double free them.
 	 */
-	(void) pool_knl_rollback(conf);
+	if (!(conf->pc_prov->pc_oflags & PO_TEMP))
+		(void) pool_knl_rollback(conf);
 	pool_knl_connection_free(prov);
 	return (PO_SUCCESS);
 }
@@ -997,6 +1000,9 @@ pool_knl_export(const pool_conf_t *conf, const char *location,
 				const char *sep = "";
 				int j;
 
+				if (elem_is_tmp(elem))
+					continue;
+
 				if ((info.ktx_node = node_create(system,
 				    BAD_CAST element_class_tags
 				    [pool_elem_class(elem)])) == NULL) {
@@ -1072,6 +1078,9 @@ pool_knl_export(const pool_conf_t *conf, const char *location,
 				uint_t ncompelem;
 				int j;
 
+				if (elem_is_tmp(elem))
+					continue;
+
 				if ((info.ktx_node = node_create(system,
 				    BAD_CAST element_class_tags
 				    [pool_elem_class(elem)])) == NULL) {
diff --git a/usr/src/lib/libproject/common/setproject.c b/usr/src/lib/libproject/common/setproject.c
index 2303576d32..d22878a36f 100644
--- a/usr/src/lib/libproject/common/setproject.c
+++ b/usr/src/lib/libproject/common/setproject.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -536,7 +535,12 @@ setproject_proc(const char *project_name, const char *user_name, int flags,
 		}
 
 		old_pool_name = pool_get_binding(pid);
-		if (bind_to_pool(pool_name, pid, 0) != 0) {
+
+		/*
+		 * If parent is not bound to the default pool, then we want
+		 * to preserve same binding as parent.
+		 */
+		if (pool_name != NULL && bind_to_pool(pool_name, pid, 0) != 0) {
 			if (old_pool_name)
 				free(old_pool_name);
 			_kva_free(kv_array);
diff --git a/usr/src/lib/libzonecfg/Makefile.com b/usr/src/lib/libzonecfg/Makefile.com
index b89a44fce3..b64df94527 100644
--- a/usr/src/lib/libzonecfg/Makefile.com
+++ b/usr/src/lib/libzonecfg/Makefile.com
@@ -32,7 +32,8 @@ OBJECTS=	libzonecfg.o getzoneent.o scratchops.o
 include ../../Makefile.lib
 
 LIBS =		$(DYNLIB) $(LINTLIB)
-LDLIBS +=	-lc -lsocket -lnsl -luuid -lnvpair -lsysevent -lsec -lbrand
+LDLIBS +=	-lc -lsocket -lnsl -luuid -lnvpair -lsysevent -lsec -lbrand \
+		-lpool -lscf -lproc
 # DYNLIB libraries do not have lint libs and are not linted
 $(DYNLIB) :=	LDLIBS += -lxml2
 
diff --git a/usr/src/lib/libzonecfg/common/libzonecfg.c b/usr/src/lib/libzonecfg/common/libzonecfg.c
index f4fbcde368..1a3fb37c8c 100644
--- a/usr/src/lib/libzonecfg/common/libzonecfg.c
+++ b/usr/src/lib/libzonecfg/common/libzonecfg.c
@@ -46,6 +46,10 @@
 #include <sys/nvpair.h>
 #include <sys/types.h>
 #include <ftw.h>
+#include <pool.h>
+#include <libscf.h>
+#include <libproc.h>
+#include <sys/priocntl.h>
 
 #include <arpa/inet.h>
 #include <netdb.h>
@@ -79,6 +83,9 @@
 #define	DTD_ELEM_RCTLVALUE	(const xmlChar *) "rctl-value"
 #define	DTD_ELEM_ZONE		(const xmlChar *) "zone"
 #define	DTD_ELEM_DATASET	(const xmlChar *) "dataset"
+#define	DTD_ELEM_TMPPOOL	(const xmlChar *) "tmp_pool"
+#define	DTD_ELEM_PSET		(const xmlChar *) "pset"
+#define	DTD_ELEM_MCAP		(const xmlChar *) "mcap"
 #define	DTD_ELEM_PACKAGE	(const xmlChar *) "package"
 #define	DTD_ELEM_PATCH		(const xmlChar *) "patch"
 #define	DTD_ELEM_OBSOLETES	(const xmlChar *) "obsoletes"
@@ -92,6 +99,7 @@
 #define	DTD_ATTR_LIMIT		(const xmlChar *) "limit"
 #define	DTD_ATTR_LIMITPRIV	(const xmlChar *) "limitpriv"
 #define	DTD_ATTR_BOOTARGS	(const xmlChar *) "bootargs"
+#define	DTD_ATTR_SCHED		(const xmlChar *) "scheduling-class"
 #define	DTD_ATTR_MATCH		(const xmlChar *) "match"
 #define	DTD_ATTR_NAME		(const xmlChar *) "name"
 #define	DTD_ATTR_PHYSICAL	(const xmlChar *) "physical"
@@ -102,6 +110,10 @@
 #define	DTD_ATTR_TYPE		(const xmlChar *) "type"
 #define	DTD_ATTR_VALUE		(const xmlChar *) "value"
 #define	DTD_ATTR_ZONEPATH	(const xmlChar *) "zonepath"
+#define	DTD_ATTR_NCPU_MIN	(const xmlChar *) "ncpu_min"
+#define	DTD_ATTR_NCPU_MAX	(const xmlChar *) "ncpu_max"
+#define	DTD_ATTR_IMPORTANCE	(const xmlChar *) "importance"
+#define	DTD_ATTR_PHYSCAP	(const xmlChar *) "physcap"
 #define	DTD_ATTR_VERSION	(const xmlChar *) "version"
 #define	DTD_ATTR_ID		(const xmlChar *) "id"
 #define	DTD_ATTR_UID		(const xmlChar *) "uid"
@@ -133,6 +145,46 @@
 #define	PATCHINFO	"PATCH_INFO_"
 #define	PKGINFO_RD_LEN	128
 
+#define	TMP_POOL_NAME	"SUNWtmp_%s"
+#define	MAX_TMP_POOL_NAME	(ZONENAME_MAX + 9)
+#define	RCAP_SERVICE	"system/rcap:default"
+#define	POOLD_SERVICE	"system/pools/dynamic:default"
+
+/*
+ * rctl alias definitions
+ *
+ * This holds the alias, the full rctl name, the default priv value, action
+ * and lower limit.  The functions that handle rctl aliases step through
+ * this table, matching on the alias, and using the full values for setting
+ * the rctl entry as well the limit for validation.
+ */
+static struct alias {
+	char *shortname;
+	char *realname;
+	char *priv;
+	char *action;
+	uint64_t low_limit;
+} aliases[] = {
+	{ALIAS_MAXLWPS, "zone.max-lwps", "privileged", "deny", 100},
+	{ALIAS_MAXSHMMEM, "zone.max-shm-memory", "privileged", "deny", 0},
+	{ALIAS_MAXSHMIDS, "zone.max-shm-ids", "privileged", "deny", 0},
+	{ALIAS_MAXMSGIDS, "zone.max-msg-ids", "privileged", "deny", 0},
+	{ALIAS_MAXSEMIDS, "zone.max-sem-ids", "privileged", "deny", 0},
+	{ALIAS_MAXLOCKEDMEM, "zone.max-locked-memory", "privileged", "deny", 0},
+	{ALIAS_MAXSWAP, "zone.max-swap", "privileged", "deny", 0},
+	{ALIAS_SHARES, "zone.cpu-shares", "privileged", "none", 0},
+	{NULL, NULL, NULL, NULL, 0}
+};
+
+/*
+ * Structure for applying rctls to a running zone.  It allows important
+ * process values to be passed together easily.
+ */
+typedef struct pr_info_handle {
+	struct ps_prochandle *pr;
+	pid_t pid;
+} pr_info_handle_t;
+
 struct zone_dochandle {
 	char		*zone_dh_rootdir;
 	xmlDocPtr	zone_dh_doc;
@@ -446,14 +498,20 @@ setrootattr(zone_dochandle_t handle, const xmlChar *propname,
 	int err;
 	xmlNodePtr root;
 
-	if (propval == NULL)
-		return (Z_INVAL);
-
 	if ((err = getroot(handle, &root)) != Z_OK)
 		return (err);
 
-	if (xmlSetProp(root, propname, (const xmlChar *) propval) == NULL)
-		return (Z_INVAL);
+	/*
+	 * If we get a null propval remove the property (ignore return since it
+	 * may not be set to begin with).
+	 */
+	if (propval == NULL) {
+		(void) xmlUnsetProp(root, propname);
+	} else {
+		if (xmlSetProp(root, propname, (const xmlChar *) propval)
+		    == NULL)
+			return (Z_INVAL);
+	}
 	return (Z_OK);
 }
 
@@ -947,6 +1005,18 @@ zonecfg_set_bootargs(zone_dochandle_t handle, char *bargs)
 	return (setrootattr(handle, DTD_ATTR_BOOTARGS, bargs));
 }
 
+int
+zonecfg_get_sched_class(zone_dochandle_t handle, char *sched, size_t schedsize)
+{
+	return (getrootattr(handle, DTD_ATTR_SCHED, sched, schedsize));
+}
+
+int
+zonecfg_set_sched(zone_dochandle_t handle, char *sched)
+{
+	return (setrootattr(handle, DTD_ATTR_SCHED, sched));
+}
+
 /*
  * /etc/zones/index caches a vital piece of information which is also
  * in the <zonename>.xml file: the path to the zone.  This is for performance,
@@ -3047,6 +3117,30 @@ zonecfg_strerror(int errnum)
 	case Z_BRAND_ERROR:
 		return (dgettext(TEXT_DOMAIN,
 		    "Brand-specific error"));
+	case Z_INCOMPATIBLE:
+		return (dgettext(TEXT_DOMAIN, "Incompatible settings"));
+	case Z_ALIAS_DISALLOW:
+		return (dgettext(TEXT_DOMAIN,
+		    "An incompatible rctl already exists for this property"));
+	case Z_CLEAR_DISALLOW:
+		return (dgettext(TEXT_DOMAIN,
+		    "Clearing this property is not allowed"));
+	case Z_POOL:
+		return (dgettext(TEXT_DOMAIN, "libpool(3LIB) error"));
+	case Z_POOLS_NOT_ACTIVE:
+		return (dgettext(TEXT_DOMAIN, "Pools facility not active; "
+		    "zone will not be bound to pool"));
+	case Z_POOL_ENABLE:
+		return (dgettext(TEXT_DOMAIN,
+		    "Could not enable pools facility"));
+	case Z_NO_POOL:
+		return (dgettext(TEXT_DOMAIN,
+		    "Pool not found; using default pool"));
+	case Z_POOL_CREATE:
+		return (dgettext(TEXT_DOMAIN,
+		    "Could not create a temporary pool"));
+	case Z_POOL_BIND:
+		return (dgettext(TEXT_DOMAIN, "Could not bind zone to pool"));
 	default:
 		return (dgettext(TEXT_DOMAIN, "Unknown error"));
 	}
@@ -3086,6 +3180,951 @@ zonecfg_endent(zone_dochandle_t handle)
 	return (Z_OK);
 }
 
+/*
+ * Do the work required to manipulate a process through libproc.
+ * If grab_process() returns no errors (0), then release_process()
+ * must eventually be called.
+ *
+ * Return values:
+ *      0 Successful creation of agent thread
+ *      1 Error grabbing
+ *      2 Error creating agent
+ */
+static int
+grab_process(pr_info_handle_t *p)
+{
+	int ret;
+
+	if ((p->pr = Pgrab(p->pid, 0, &ret)) != NULL) {
+
+		if (Psetflags(p->pr, PR_RLC) != 0) {
+			Prelease(p->pr, 0);
+			return (1);
+		}
+		if (Pcreate_agent(p->pr) == 0) {
+			return (0);
+
+		} else {
+			Prelease(p->pr, 0);
+			return (2);
+		}
+	} else {
+		return (1);
+	}
+}
+
+/*
+ * Release the specified process. This destroys the agent
+ * and releases the process. If the process is NULL, nothing
+ * is done. This function should only be called if grab_process()
+ * has previously been called and returned success.
+ *
+ * This function is Pgrab-safe.
+ */
+static void
+release_process(struct ps_prochandle *Pr)
+{
+	if (Pr == NULL)
+		return;
+
+	Pdestroy_agent(Pr);
+	Prelease(Pr, 0);
+}
+
+static boolean_t
+grab_zone_proc(char *zonename, pr_info_handle_t *p)
+{
+	DIR *dirp;
+	struct dirent *dentp;
+	zoneid_t zoneid;
+	int pid_self;
+	psinfo_t psinfo;
+
+	if (zone_get_id(zonename, &zoneid) != 0)
+		return (B_FALSE);
+
+	pid_self = getpid();
+
+	if ((dirp = opendir("/proc")) == NULL)
+		return (B_FALSE);
+
+	while (dentp = readdir(dirp)) {
+		p->pid = atoi(dentp->d_name);
+
+		/* Skip self */
+		if (p->pid == pid_self)
+			continue;
+
+		if (proc_get_psinfo(p->pid, &psinfo) != 0)
+			continue;
+
+		if (psinfo.pr_zoneid != zoneid)
+			continue;
+
+		/* attempt to grab process */
+		if (grab_process(p) != 0)
+			continue;
+
+		if (pr_getzoneid(p->pr) != zoneid) {
+			release_process(p->pr);
+			continue;
+		}
+
+		(void) closedir(dirp);
+		return (B_TRUE);
+	}
+
+	(void) closedir(dirp);
+	return (B_FALSE);
+}
+
+static boolean_t
+get_priv_rctl(struct ps_prochandle *pr, char *name, rctlblk_t *rblk)
+{
+	if (pr_getrctl(pr, name, NULL, rblk, RCTL_FIRST))
+		return (B_FALSE);
+
+	if (rctlblk_get_privilege(rblk) == RCPRIV_PRIVILEGED)
+		return (B_TRUE);
+
+	while (pr_getrctl(pr, name, rblk, rblk, RCTL_NEXT) == 0) {
+		if (rctlblk_get_privilege(rblk) == RCPRIV_PRIVILEGED)
+			return (B_TRUE);
+	}
+
+	return (B_FALSE);
+}
+
+/*
+ * Apply the current rctl settings to the specified, running zone.
+ */
+int
+zonecfg_apply_rctls(char *zone_name, zone_dochandle_t handle)
+{
+	int err;
+	int res = Z_OK;
+	rctlblk_t *rblk;
+	pr_info_handle_t p;
+	struct zone_rctltab rctl;
+
+	if ((err = zonecfg_setrctlent(handle)) != Z_OK)
+		return (err);
+
+	if ((rblk = (rctlblk_t *)malloc(rctlblk_size())) == NULL) {
+		(void) zonecfg_endrctlent(handle);
+		return (Z_NOMEM);
+	}
+
+	if (!grab_zone_proc(zone_name, &p)) {
+		(void) zonecfg_endrctlent(handle);
+		free(rblk);
+		return (Z_SYSTEM);
+	}
+
+	while (zonecfg_getrctlent(handle, &rctl) == Z_OK) {
+		char *rname;
+		struct zone_rctlvaltab *valptr;
+
+		rname = rctl.zone_rctl_name;
+
+		/* first delete all current privileged settings for this rctl */
+		while (get_priv_rctl(p.pr, rname, rblk)) {
+			if (pr_setrctl(p.pr, rname, NULL, rblk, RCTL_DELETE) !=
+			    0) {
+				res = Z_SYSTEM;
+				goto done;
+			}
+		}
+
+		/* now set each new value for the rctl */
+		for (valptr = rctl.zone_rctl_valptr; valptr != NULL;
+		    valptr = valptr->zone_rctlval_next) {
+			if ((err = zonecfg_construct_rctlblk(valptr, rblk))
+			    != Z_OK) {
+				res = errno = err;
+				goto done;
+			}
+
+			if (pr_setrctl(p.pr, rname, NULL, rblk, RCTL_INSERT)) {
+				res = Z_SYSTEM;
+				goto done;
+			}
+		}
+	}
+
+done:
+	release_process(p.pr);
+	free(rblk);
+	(void) zonecfg_endrctlent(handle);
+
+	return (res);
+}
+
+static const xmlChar *
+nm_to_dtd(char *nm)
+{
+	if (strcmp(nm, "device") == 0)
+		return (DTD_ELEM_DEVICE);
+	if (strcmp(nm, "fs") == 0)
+		return (DTD_ELEM_FS);
+	if (strcmp(nm, "inherit-pkg-dir") == 0)
+		return (DTD_ELEM_IPD);
+	if (strcmp(nm, "net") == 0)
+		return (DTD_ELEM_NET);
+	if (strcmp(nm, "attr") == 0)
+		return (DTD_ELEM_ATTR);
+	if (strcmp(nm, "rctl") == 0)
+		return (DTD_ELEM_RCTL);
+	if (strcmp(nm, "dataset") == 0)
+		return (DTD_ELEM_DATASET);
+
+	return (NULL);
+}
+
+int
+zonecfg_num_resources(zone_dochandle_t handle, char *rsrc)
+{
+	int num = 0;
+	const xmlChar *dtd;
+	xmlNodePtr cur;
+
+	if ((dtd = nm_to_dtd(rsrc)) == NULL)
+		return (num);
+
+	if (zonecfg_setent(handle) != Z_OK)
+		return (num);
+
+	for (cur = handle->zone_dh_cur; cur != NULL; cur = cur->next)
+		if (xmlStrcmp(cur->name, dtd) == 0)
+			num++;
+
+	(void) zonecfg_endent(handle);
+
+	return (num);
+}
+
+int
+zonecfg_del_all_resources(zone_dochandle_t handle, char *rsrc)
+{
+	int err;
+	const xmlChar *dtd;
+	xmlNodePtr cur;
+
+	if ((dtd = nm_to_dtd(rsrc)) == NULL)
+		return (Z_NO_RESOURCE_TYPE);
+
+	if ((err = zonecfg_setent(handle)) != Z_OK)
+		return (err);
+
+	cur = handle->zone_dh_cur;
+	while (cur != NULL) {
+		xmlNodePtr tmp;
+
+		if (xmlStrcmp(cur->name, dtd)) {
+			cur = cur->next;
+			continue;
+		}
+
+		tmp = cur->next;
+		xmlUnlinkNode(cur);
+		xmlFreeNode(cur);
+		cur = tmp;
+	}
+
+	(void) zonecfg_endent(handle);
+	return (Z_OK);
+}
+
+static boolean_t
+valid_uint(char *s, uint64_t *n)
+{
+	char *endp;
+
+	/* strtoull accepts '-'?! so we want to flag that as an error */
+	if (strchr(s, '-') != NULL)
+		return (B_FALSE);
+
+	errno = 0;
+	*n = strtoull(s, &endp, 10);
+
+	if (errno != 0 || *endp != '\0')
+		return (B_FALSE);
+	return (B_TRUE);
+}
+
+/*
+ * Convert a string representing a number (possibly a fraction) into an integer.
+ * The string can have a modifier (K, M, G or T).   The modifiers are treated
+ * as powers of two (not 10).
+ */
+int
+zonecfg_str_to_bytes(char *str, uint64_t *bytes)
+{
+	long double val;
+	char *unitp;
+	uint64_t scale;
+
+	if ((val = strtold(str, &unitp)) < 0)
+		return (-1);
+
+	/* remove any leading white space from units string */
+	while (isspace(*unitp) != 0)
+		++unitp;
+
+	/* if no units explicitly set, error */
+	if (unitp == NULL || *unitp == '\0') {
+		scale = 1;
+	} else {
+		int i;
+		char *units[] = {"K", "M", "G", "T", NULL};
+
+		scale = 1024;
+
+		/* update scale based on units */
+		for (i = 0; units[i] != NULL; i++) {
+			if (strcasecmp(unitp, units[i]) == 0)
+				break;
+			scale <<= 10;
+		}
+
+		if (units[i] == NULL)
+			return (-1);
+	}
+
+	*bytes = (uint64_t)(val * scale);
+	return (0);
+}
+
+boolean_t
+zonecfg_valid_ncpus(char *lowstr, char *highstr)
+{
+	uint64_t low, high;
+
+	if (!valid_uint(lowstr, &low) || !valid_uint(highstr, &high) ||
+	    low < 1 || low > high)
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+boolean_t
+zonecfg_valid_importance(char *impstr)
+{
+	uint64_t num;
+
+	if (!valid_uint(impstr, &num))
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+boolean_t
+zonecfg_valid_alias_limit(char *name, char *limitstr, uint64_t *limit)
+{
+	int i;
+
+	for (i = 0; aliases[i].shortname != NULL; i++)
+		if (strcmp(name, aliases[i].shortname) == 0)
+			break;
+
+	if (aliases[i].shortname == NULL)
+		return (B_FALSE);
+
+	if (!valid_uint(limitstr, limit) || *limit < aliases[i].low_limit)
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+boolean_t
+zonecfg_valid_memlimit(char *memstr, uint64_t *mem_val)
+{
+	if (zonecfg_str_to_bytes(memstr, mem_val) != 0)
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+static int
+zerr_pool(char *pool_err, int err_size, int res)
+{
+	(void) strlcpy(pool_err, pool_strerror(pool_error()), err_size);
+	return (res);
+}
+
+static int
+create_tmp_pset(char *pool_err, int err_size, pool_conf_t *pconf, pool_t *pool,
+    char *name, int min, int max)
+{
+	pool_resource_t *res;
+	pool_elem_t *elem;
+	pool_value_t *val;
+
+	if ((res = pool_resource_create(pconf, "pset", name)) == NULL)
+		return (zerr_pool(pool_err, err_size, Z_POOL));
+
+	if (pool_associate(pconf, pool, res) != PO_SUCCESS)
+		return (zerr_pool(pool_err, err_size, Z_POOL));
+
+	if ((elem = pool_resource_to_elem(pconf, res)) == NULL)
+		return (zerr_pool(pool_err, err_size, Z_POOL));
+
+	if ((val = pool_value_alloc()) == NULL)
+		return (zerr_pool(pool_err, err_size, Z_POOL));
+
+	/* set the maximum number of cpus for the pset */
+	pool_value_set_uint64(val, (uint64_t)max);
+
+	if (pool_put_property(pconf, elem, "pset.max", val) != PO_SUCCESS) {
+		pool_value_free(val);
+		return (zerr_pool(pool_err, err_size, Z_POOL));
+	}
+
+	/* set the minimum number of cpus for the pset */
+	pool_value_set_uint64(val, (uint64_t)min);
+
+	if (pool_put_property(pconf, elem, "pset.min", val) != PO_SUCCESS) {
+		pool_value_free(val);
+		return (zerr_pool(pool_err, err_size, Z_POOL));
+	}
+
+	pool_value_free(val);
+
+	return (Z_OK);
+}
+
+static int
+create_tmp_pool(char *pool_err, int err_size, pool_conf_t *pconf, char *name,
+    struct zone_psettab *pset_tab)
+{
+	pool_t *pool;
+	int res = Z_OK;
+
+	/* create a temporary pool configuration */
+	if (pool_conf_open(pconf, NULL, PO_TEMP) != PO_SUCCESS) {
+		res = zerr_pool(pool_err, err_size, Z_POOL);
+		return (res);
+	}
+
+	if ((pool = pool_create(pconf, name)) == NULL) {
+		res = zerr_pool(pool_err, err_size, Z_POOL_CREATE);
+		goto done;
+	}
+
+	/* set pool importance */
+	if (pset_tab->zone_importance[0] != '\0') {
+		pool_elem_t *elem;
+		pool_value_t *val;
+
+		if ((elem = pool_to_elem(pconf, pool)) == NULL) {
+			res = zerr_pool(pool_err, err_size, Z_POOL);
+			goto done;
+		}
+
+		if ((val = pool_value_alloc()) == NULL) {
+			res = zerr_pool(pool_err, err_size, Z_POOL);
+			goto done;
+		}
+
+		pool_value_set_int64(val,
+		    (int64_t)atoi(pset_tab->zone_importance));
+
+		if (pool_put_property(pconf, elem, "pool.importance", val)
+		    != PO_SUCCESS) {
+			res = zerr_pool(pool_err, err_size, Z_POOL);
+			pool_value_free(val);
+			goto done;
+		}
+
+		pool_value_free(val);
+	}
+
+	if ((res = create_tmp_pset(pool_err, err_size, pconf, pool, name,
+	    atoi(pset_tab->zone_ncpu_min),
+	    atoi(pset_tab->zone_ncpu_max))) != Z_OK)
+		goto done;
+
+	/* validation */
+	if (pool_conf_status(pconf) == POF_INVALID) {
+		res = zerr_pool(pool_err, err_size, Z_POOL);
+		goto done;
+	}
+
+	/*
+	 * This validation is the one we expect to fail if the user specified
+	 * an invalid configuration (too many cpus) for this system.
+	 */
+	if (pool_conf_validate(pconf, POV_RUNTIME) != PO_SUCCESS) {
+		res = zerr_pool(pool_err, err_size, Z_POOL_CREATE);
+		goto done;
+	}
+
+	/*
+	 * Commit the dynamic configuration but not the pool configuration
+	 * file.
+	 */
+	if (pool_conf_commit(pconf, 1) != PO_SUCCESS)
+		res = zerr_pool(pool_err, err_size, Z_POOL);
+
+done:
+	(void) pool_conf_close(pconf);
+	return (res);
+}
+
+static int
+get_running_tmp_pset(pool_conf_t *pconf, pool_t *pool, pool_resource_t *pset,
+    struct zone_psettab *pset_tab)
+{
+	int nfound = 0;
+	pool_elem_t *pe;
+	pool_value_t *pv = pool_value_alloc();
+	uint64_t val_uint;
+
+	if (pool != NULL) {
+		pe = pool_to_elem(pconf, pool);
+		if (pool_get_property(pconf, pe, "pool.importance", pv)
+		    != POC_INVAL) {
+			int64_t val_int;
+
+			(void) pool_value_get_int64(pv, &val_int);
+			(void) snprintf(pset_tab->zone_importance,
+			    sizeof (pset_tab->zone_importance), "%d", val_int);
+			nfound++;
+		}
+	}
+
+	if (pset != NULL) {
+		pe = pool_resource_to_elem(pconf, pset);
+		if (pool_get_property(pconf, pe, "pset.min", pv) != POC_INVAL) {
+			(void) pool_value_get_uint64(pv, &val_uint);
+			(void) snprintf(pset_tab->zone_ncpu_min,
+			    sizeof (pset_tab->zone_ncpu_min), "%u", val_uint);
+			nfound++;
+		}
+
+		if (pool_get_property(pconf, pe, "pset.max", pv) != POC_INVAL) {
+			(void) pool_value_get_uint64(pv, &val_uint);
+			(void) snprintf(pset_tab->zone_ncpu_max,
+			    sizeof (pset_tab->zone_ncpu_max), "%u", val_uint);
+			nfound++;
+		}
+	}
+
+	pool_value_free(pv);
+
+	if (nfound == 3)
+		return (PO_SUCCESS);
+
+	return (PO_FAIL);
+}
+
+/*
+ * Determine if a tmp pool is configured and if so, if the configuration is
+ * still valid or if it has been changed since the tmp pool was created.
+ * If the tmp pool configuration is no longer valid, delete the tmp pool.
+ *
+ * Set *valid=B_TRUE if there is an existing, valid tmp pool configuration.
+ */
+static int
+verify_del_tmp_pool(pool_conf_t *pconf, char *tmp_name, char *pool_err,
+    int err_size, struct zone_psettab *pset_tab, boolean_t *exists)
+{
+	int res = Z_OK;
+	pool_t *pool;
+	pool_resource_t *pset;
+	struct zone_psettab pset_current;
+
+	*exists = B_FALSE;
+
+	if (pool_conf_open(pconf, pool_dynamic_location(), PO_RDWR)
+	    != PO_SUCCESS) {
+		res = zerr_pool(pool_err, err_size, Z_POOL);
+		return (res);
+	}
+
+	pool = pool_get_pool(pconf, tmp_name);
+	pset = pool_get_resource(pconf, "pset", tmp_name);
+
+	if (pool == NULL && pset == NULL) {
+		/* no tmp pool configured */
+		goto done;
+	}
+
+	/*
+	 * If an existing tmp pool for this zone is configured with the proper
+	 * settings, then the tmp pool is valid.
+	 */
+	if (get_running_tmp_pset(pconf, pool, pset, &pset_current)
+	    == PO_SUCCESS &&
+	    strcmp(pset_tab->zone_ncpu_min,
+	    pset_current.zone_ncpu_min) == 0 &&
+	    strcmp(pset_tab->zone_ncpu_max,
+	    pset_current.zone_ncpu_max) == 0 &&
+	    strcmp(pset_tab->zone_importance,
+	    pset_current.zone_importance) == 0) {
+		*exists = B_TRUE;
+
+	} else {
+		/*
+		 * An out-of-date tmp pool configuration exists.  Delete it
+		 * so that we can create the correct tmp pool config.
+		 */
+		if (pset != NULL &&
+		    pool_resource_destroy(pconf, pset) != PO_SUCCESS) {
+			res = zerr_pool(pool_err, err_size, Z_POOL);
+			goto done;
+		}
+
+		if (pool != NULL &&
+		    pool_destroy(pconf, pool) != PO_SUCCESS) {
+			res = zerr_pool(pool_err, err_size, Z_POOL);
+			goto done;
+		}
+
+		/* commit dynamic config */
+		if (pool_conf_commit(pconf, 0) != PO_SUCCESS)
+			res = zerr_pool(pool_err, err_size, Z_POOL);
+	}
+
+done:
+	(void) pool_conf_close(pconf);
+
+	return (res);
+}
+
+/*
+ * Destroy any existing tmp pool.
+ */
+int
+zonecfg_destroy_tmp_pool(char *zone_name, char *pool_err, int err_size)
+{
+	int status;
+	int res = Z_OK;
+	pool_conf_t *pconf;
+	pool_t *pool;
+	pool_resource_t *pset;
+	char tmp_name[MAX_TMP_POOL_NAME];
+
+	/* if pools not enabled then nothing to do */
+	if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED)
+		return (Z_OK);
+
+	if ((pconf = pool_conf_alloc()) == NULL)
+		return (zerr_pool(pool_err, err_size, Z_POOL));
+
+	(void) snprintf(tmp_name, sizeof (tmp_name), TMP_POOL_NAME, zone_name);
+
+	if (pool_conf_open(pconf, pool_dynamic_location(), PO_RDWR)
+	    != PO_SUCCESS) {
+		res = zerr_pool(pool_err, err_size, Z_POOL);
+		pool_conf_free(pconf);
+		return (res);
+	}
+
+	pool = pool_get_pool(pconf, tmp_name);
+	pset = pool_get_resource(pconf, "pset", tmp_name);
+
+	if (pool == NULL && pset == NULL) {
+		/* nothing to destroy, we're done */
+		goto done;
+	}
+
+	if (pset != NULL && pool_resource_destroy(pconf, pset) != PO_SUCCESS) {
+		res = zerr_pool(pool_err, err_size, Z_POOL);
+		goto done;
+	}
+
+	if (pool != NULL && pool_destroy(pconf, pool) != PO_SUCCESS) {
+		res = zerr_pool(pool_err, err_size, Z_POOL);
+		goto done;
+	}
+
+	/* commit dynamic config */
+	if (pool_conf_commit(pconf, 0) != PO_SUCCESS)
+		res = zerr_pool(pool_err, err_size, Z_POOL);
+
+done:
+	(void) pool_conf_close(pconf);
+	pool_conf_free(pconf);
+
+	return (res);
+}
+
+/*
+ * Attempt to bind to a tmp pool for this zone.  If there is no tmp pool
+ * configured, we just return Z_OK.
+ *
+ * We either attempt to create the tmp pool for this zone or rebind to an
+ * existing tmp pool for this zone.
+ *
+ * Rebinding is used when a zone with a tmp pool reboots so that we don't have
+ * to recreate the tmp pool.  To do this we need to be sure we work correctly
+ * for the following cases:
+ *
+ *	- there is an existing, properly configured tmp pool.
+ *	- zonecfg added tmp pool after zone was booted, must now create.
+ *	- zonecfg updated tmp pool config after zone was booted, in this case
+ *	  we destroy the old tmp pool and create a new one.
+ */
+int
+zonecfg_bind_tmp_pool(zone_dochandle_t handle, zoneid_t zoneid, char *pool_err,
+    int err_size)
+{
+	struct zone_psettab pset_tab;
+	int err;
+	int status;
+	pool_conf_t *pconf;
+	boolean_t exists;
+	char zone_name[ZONENAME_MAX];
+	char tmp_name[MAX_TMP_POOL_NAME];
+
+	(void) getzonenamebyid(zoneid, zone_name, sizeof (zone_name));
+
+	err = zonecfg_lookup_pset(handle, &pset_tab);
+
+	/* if no temporary pool configured, we're done */
+	if (err == Z_NO_ENTRY)
+		return (Z_OK);
+
+	/*
+	 * importance might not have a value but we need to validate it here,
+	 * so set the default.
+	 */
+	if (pset_tab.zone_importance[0] == '\0')
+		(void) strlcpy(pset_tab.zone_importance, "1",
+		    sizeof (pset_tab.zone_importance));
+
+	/* if pools not enabled, enable them now */
+	if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED) {
+		if (pool_set_status(POOL_ENABLED) != PO_SUCCESS)
+			return (Z_POOL_ENABLE);
+	}
+
+	if ((pconf = pool_conf_alloc()) == NULL)
+		return (zerr_pool(pool_err, err_size, Z_POOL));
+
+	(void) snprintf(tmp_name, sizeof (tmp_name), TMP_POOL_NAME, zone_name);
+
+	/*
+	 * Check if a valid tmp pool/pset already exists.  If so, we just
+	 * reuse it.
+	 */
+	if ((err = verify_del_tmp_pool(pconf, tmp_name, pool_err, err_size,
+	    &pset_tab, &exists)) != Z_OK) {
+		pool_conf_free(pconf);
+		return (err);
+	}
+
+	if (!exists)
+		err = create_tmp_pool(pool_err, err_size, pconf, tmp_name,
+		    &pset_tab);
+
+	pool_conf_free(pconf);
+
+	if (err != Z_OK)
+		return (err);
+
+	/* Bind the zone to the pool. */
+	if (pool_set_binding(tmp_name, P_ZONEID, zoneid) != PO_SUCCESS)
+		return (zerr_pool(pool_err, err_size, Z_POOL_BIND));
+
+	return (Z_OK);
+}
+
+/*
+ * Attempt to bind to a permanent pool for this zone.  If there is no
+ * permanent pool configured, we just return Z_OK.
+ */
+int
+zonecfg_bind_pool(zone_dochandle_t handle, zoneid_t zoneid, char *pool_err,
+    int err_size)
+{
+	pool_conf_t *poolconf;
+	pool_t *pool;
+	char poolname[MAXPATHLEN];
+	int status;
+	int error;
+
+	/*
+	 * Find the pool mentioned in the zone configuration, and bind to it.
+	 */
+	error = zonecfg_get_pool(handle, poolname, sizeof (poolname));
+	if (error == Z_NO_ENTRY || (error == Z_OK && strlen(poolname) == 0)) {
+		/*
+		 * The property is not set on the zone, so the pool
+		 * should be bound to the default pool.  But that's
+		 * already done by the kernel, so we can just return.
+		 */
+		return (Z_OK);
+	}
+	if (error != Z_OK) {
+		/*
+		 * Not an error, even though it shouldn't be happening.
+		 */
+		return (Z_OK);
+	}
+	/*
+	 * Don't do anything if pools aren't enabled.
+	 */
+	if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED)
+		return (Z_POOLS_NOT_ACTIVE);
+
+	/*
+	 * Try to provide a sane error message if the requested pool doesn't
+	 * exist.
+	 */
+	if ((poolconf = pool_conf_alloc()) == NULL)
+		return (zerr_pool(pool_err, err_size, Z_POOL));
+
+	if (pool_conf_open(poolconf, pool_dynamic_location(), PO_RDONLY) !=
+	    PO_SUCCESS) {
+		pool_conf_free(poolconf);
+		return (zerr_pool(pool_err, err_size, Z_POOL));
+	}
+	pool = pool_get_pool(poolconf, poolname);
+	(void) pool_conf_close(poolconf);
+	pool_conf_free(poolconf);
+	if (pool == NULL)
+		return (Z_NO_POOL);
+
+	/*
+	 * Bind the zone to the pool.
+	 */
+	if (pool_set_binding(poolname, P_ZONEID, zoneid) != PO_SUCCESS) {
+		/* if bind fails, return poolname for the error msg */
+		(void) strlcpy(pool_err, poolname, err_size);
+		return (Z_POOL_BIND);
+	}
+
+	return (Z_OK);
+}
+
+
+static boolean_t
+svc_enabled(char *svc_name)
+{
+	scf_simple_prop_t	*prop;
+	boolean_t		found = B_FALSE;
+
+	prop = scf_simple_prop_get(NULL, svc_name, SCF_PG_GENERAL,
+	    SCF_PROPERTY_ENABLED);
+
+	if (scf_simple_prop_numvalues(prop) == 1 &&
+	    *scf_simple_prop_next_boolean(prop) != 0)
+		found = B_TRUE;
+
+	scf_simple_prop_free(prop);
+
+	return (found);
+}
+
+/*
+ * If the zone has capped-memory, make sure the rcap service is enabled.
+ */
+int
+zonecfg_enable_rcapd(char *err, int size)
+{
+	if (!svc_enabled(RCAP_SERVICE) &&
+	    smf_enable_instance(RCAP_SERVICE, 0) == -1) {
+		(void) strlcpy(err, scf_strerror(scf_error()), size);
+		return (Z_SYSTEM);
+	}
+
+	return (Z_OK);
+}
+
+/*
+ * Return true if pset has cpu range specified and poold is not enabled.
+ */
+boolean_t
+zonecfg_warn_poold(zone_dochandle_t handle)
+{
+	struct zone_psettab pset_tab;
+	int min, max;
+	int err;
+
+	err = zonecfg_lookup_pset(handle, &pset_tab);
+
+	/* if no temporary pool configured, we're done */
+	if (err == Z_NO_ENTRY)
+		return (B_FALSE);
+
+	min = atoi(pset_tab.zone_ncpu_min);
+	max = atoi(pset_tab.zone_ncpu_max);
+
+	/* range not specified, no need for poold */
+	if (min == max)
+		return (B_FALSE);
+
+	/* we have a range, check if poold service is enabled */
+	if (svc_enabled(POOLD_SERVICE))
+		return (B_FALSE);
+
+	return (B_TRUE);
+}
+
+static int
+get_pool_sched_class(char *poolname, char *class, int clsize)
+{
+	int status;
+	pool_conf_t *poolconf;
+	pool_t *pool;
+	pool_elem_t *pe;
+	pool_value_t *pv = pool_value_alloc();
+	const char *sched_str;
+
+	if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED)
+		return (Z_NO_POOL);
+
+	if ((poolconf = pool_conf_alloc()) == NULL)
+		return (Z_NO_POOL);
+
+	if (pool_conf_open(poolconf, pool_dynamic_location(), PO_RDONLY) !=
+	    PO_SUCCESS) {
+		pool_conf_free(poolconf);
+		return (Z_NO_POOL);
+	}
+
+	if ((pool = pool_get_pool(poolconf, poolname)) == NULL) {
+		(void) pool_conf_close(poolconf);
+		pool_conf_free(poolconf);
+		return (Z_NO_POOL);
+	}
+
+	pe = pool_to_elem(poolconf, pool);
+	if (pool_get_property(poolconf, pe, "pool.scheduler", pv)
+	    != POC_INVAL) {
+		(void) pool_value_get_string(pv, &sched_str);
+		if (strlcpy(class, sched_str, clsize) >= clsize)
+			return (Z_TOO_BIG);
+	}
+
+	(void) pool_conf_close(poolconf);
+	pool_conf_free(poolconf);
+	return (Z_OK);
+}
+
+/*
+ * Get the default scheduling class for the zone.  This will either be the
+ * class set on the zone's pool or the system default scheduling class.
+ */
+int
+zonecfg_get_dflt_sched_class(zone_dochandle_t handle, char *class, int clsize)
+{
+	char poolname[MAXPATHLEN];
+
+	if (zonecfg_get_pool(handle, poolname, sizeof (poolname)) == Z_OK) {
+		/* check if the zone's pool specified a sched class */
+		if (get_pool_sched_class(poolname, class, clsize) == Z_OK)
+			return (Z_OK);
+	}
+
+	if (priocntl(0, 0, PC_GETDFLCL, class, (uint64_t)clsize) == -1)
+		return (Z_TOO_BIG);
+
+	return (Z_OK);
+}
+
 int
 zonecfg_setfsent(zone_dochandle_t handle)
 {
@@ -4825,6 +5864,509 @@ zonecfg_enddsent(zone_dochandle_t handle)
 	return (zonecfg_endent(handle));
 }
 
+/*
+ * Support for aliased rctls; that is, rctls that have simplified names in
+ * zonecfg.  For example, max-lwps is an alias for a well defined zone.max-lwps
+ * rctl.  If there are multiple existing values for one of these rctls or if
+ * there is a single value that does not match the well defined template (i.e.
+ * it has a different action) then we cannot treat the rctl as having an alias
+ * so we return Z_ALIAS_DISALLOW.  That means that the rctl cannot be
+ * managed in zonecfg via an alias and that the standard rctl syntax must be
+ * used.
+ *
+ * The possible return values are:
+ *	Z_NO_PROPERTY_ID - invalid alias name
+ *	Z_ALIAS_DISALLOW - pre-existing, incompatible rctl definition
+ *	Z_NO_ENTRY - no rctl is configured for this alias
+ *	Z_OK - we got a valid rctl for the specified alias
+ */
+int
+zonecfg_get_aliased_rctl(zone_dochandle_t handle, char *name, uint64_t *rval)
+{
+	boolean_t found = B_FALSE;
+	boolean_t found_val = B_FALSE;
+	xmlNodePtr cur, val;
+	char savedname[MAXNAMELEN];
+	struct zone_rctlvaltab rctl;
+	int i;
+	int err;
+
+	for (i = 0; aliases[i].shortname != NULL; i++)
+		if (strcmp(name, aliases[i].shortname) == 0)
+			break;
+
+	if (aliases[i].shortname == NULL)
+		return (Z_NO_PROPERTY_ID);
+
+	if ((err = operation_prep(handle)) != Z_OK)
+		return (err);
+
+	cur = handle->zone_dh_cur;
+	for (cur = cur->xmlChildrenNode; cur != NULL; cur = cur->next) {
+		if (xmlStrcmp(cur->name, DTD_ELEM_RCTL) != 0)
+			continue;
+		if ((fetchprop(cur, DTD_ATTR_NAME, savedname,
+		    sizeof (savedname)) == Z_OK) &&
+		    (strcmp(savedname, aliases[i].realname) == 0)) {
+
+			/*
+			 * If we already saw one of these, we can't have an
+			 * alias since we just found another.
+			 */
+			if (found)
+				return (Z_ALIAS_DISALLOW);
+			found = B_TRUE;
+
+			for (val = cur->xmlChildrenNode; val != NULL;
+			    val = val->next) {
+				/*
+				 * If we already have one value, we can't have
+				 * an alias since we just found another.
+				 */
+				if (found_val)
+					return (Z_ALIAS_DISALLOW);
+				found_val = B_TRUE;
+
+				if ((fetchprop(val, DTD_ATTR_PRIV,
+				    rctl.zone_rctlval_priv,
+				    sizeof (rctl.zone_rctlval_priv)) != Z_OK))
+					break;
+				if ((fetchprop(val, DTD_ATTR_LIMIT,
+				    rctl.zone_rctlval_limit,
+				    sizeof (rctl.zone_rctlval_limit)) != Z_OK))
+					break;
+				if ((fetchprop(val, DTD_ATTR_ACTION,
+				    rctl.zone_rctlval_action,
+				    sizeof (rctl.zone_rctlval_action)) != Z_OK))
+					break;
+			}
+
+			/* check priv and action match the expected vals */
+			if (strcmp(rctl.zone_rctlval_priv,
+			    aliases[i].priv) != 0 ||
+			    strcmp(rctl.zone_rctlval_action,
+			    aliases[i].action) != 0)
+				return (Z_ALIAS_DISALLOW);
+		}
+	}
+
+	if (found) {
+		*rval = strtoull(rctl.zone_rctlval_limit, NULL, 10);
+		return (Z_OK);
+	}
+
+	return (Z_NO_ENTRY);
+}
+
+int
+zonecfg_rm_aliased_rctl(zone_dochandle_t handle, char *name)
+{
+	int i;
+	uint64_t val;
+	struct zone_rctltab rctltab;
+
+	/*
+	 * First check that we have a valid aliased rctl to remove.
+	 * This will catch an rctl entry with non-standard values or
+	 * multiple rctl values for this name.  We need to ignore those
+	 * rctl entries.
+	 */
+	if (zonecfg_get_aliased_rctl(handle, name, &val) != Z_OK)
+		return (Z_OK);
+
+	for (i = 0; aliases[i].shortname != NULL; i++)
+		if (strcmp(name, aliases[i].shortname) == 0)
+			break;
+
+	if (aliases[i].shortname == NULL)
+		return (Z_NO_RESOURCE_ID);
+
+	(void) strlcpy(rctltab.zone_rctl_name, aliases[i].realname,
+	    sizeof (rctltab.zone_rctl_name));
+
+	return (zonecfg_delete_rctl(handle, &rctltab));
+}
+
+boolean_t
+zonecfg_aliased_rctl_ok(zone_dochandle_t handle, char *name)
+{
+	uint64_t tmp_val;
+
+	switch (zonecfg_get_aliased_rctl(handle, name, &tmp_val)) {
+	case Z_OK:
+		/*FALLTHRU*/
+	case Z_NO_ENTRY:
+		return (B_TRUE);
+	default:
+		return (B_FALSE);
+	}
+}
+
+int
+zonecfg_set_aliased_rctl(zone_dochandle_t handle, char *name, uint64_t val)
+{
+	int i;
+	int err;
+	struct zone_rctltab rctltab;
+	struct zone_rctlvaltab *rctlvaltab;
+	char buf[128];
+
+	if (!zonecfg_aliased_rctl_ok(handle, name))
+		return (Z_ALIAS_DISALLOW);
+
+	for (i = 0; aliases[i].shortname != NULL; i++)
+		if (strcmp(name, aliases[i].shortname) == 0)
+			break;
+
+	if (aliases[i].shortname == NULL)
+		return (Z_NO_RESOURCE_ID);
+
+	/* remove any pre-existing definition for this rctl */
+	(void) zonecfg_rm_aliased_rctl(handle, name);
+
+	(void) strlcpy(rctltab.zone_rctl_name, aliases[i].realname,
+	    sizeof (rctltab.zone_rctl_name));
+
+	rctltab.zone_rctl_valptr = NULL;
+
+	if ((rctlvaltab = calloc(1, sizeof (struct zone_rctlvaltab))) == NULL)
+		return (Z_NOMEM);
+
+	(void) snprintf(buf, sizeof (buf), "%llu", (long long)val);
+
+	(void) strlcpy(rctlvaltab->zone_rctlval_priv, aliases[i].priv,
+	    sizeof (rctlvaltab->zone_rctlval_priv));
+	(void) strlcpy(rctlvaltab->zone_rctlval_limit, buf,
+	    sizeof (rctlvaltab->zone_rctlval_limit));
+	(void) strlcpy(rctlvaltab->zone_rctlval_action, aliases[i].action,
+	    sizeof (rctlvaltab->zone_rctlval_action));
+
+	rctlvaltab->zone_rctlval_next = NULL;
+
+	if ((err = zonecfg_add_rctl_value(&rctltab, rctlvaltab)) != Z_OK)
+		return (err);
+
+	return (zonecfg_add_rctl(handle, &rctltab));
+}
+
+static int
+delete_tmp_pool(zone_dochandle_t handle)
+{
+	int err;
+	xmlNodePtr cur = handle->zone_dh_cur;
+
+	if ((err = operation_prep(handle)) != Z_OK)
+		return (err);
+
+	for (cur = cur->xmlChildrenNode; cur != NULL; cur = cur->next) {
+		if (xmlStrcmp(cur->name, DTD_ELEM_TMPPOOL) == 0) {
+			xmlUnlinkNode(cur);
+			xmlFreeNode(cur);
+			return (Z_OK);
+		}
+	}
+
+	return (Z_NO_RESOURCE_ID);
+}
+
+static int
+modify_tmp_pool(zone_dochandle_t handle, char *pool_importance)
+{
+	int err;
+	xmlNodePtr cur = handle->zone_dh_cur;
+	xmlNodePtr newnode;
+
+	err = delete_tmp_pool(handle);
+	if (err != Z_OK && err != Z_NO_RESOURCE_ID)
+		return (err);
+
+	if (*pool_importance != '\0') {
+		if ((err = operation_prep(handle)) != Z_OK)
+			return (err);
+
+		newnode = xmlNewTextChild(cur, NULL, DTD_ELEM_TMPPOOL, NULL);
+		if ((err = newprop(newnode, DTD_ATTR_IMPORTANCE,
+		    pool_importance)) != Z_OK)
+			return (err);
+	}
+
+	return (Z_OK);
+}
+
+static int
+add_pset_core(zone_dochandle_t handle, struct zone_psettab *tabptr)
+{
+	xmlNodePtr newnode, cur = handle->zone_dh_cur;
+	int err;
+
+	newnode = xmlNewTextChild(cur, NULL, DTD_ELEM_PSET, NULL);
+	if ((err = newprop(newnode, DTD_ATTR_NCPU_MIN,
+	    tabptr->zone_ncpu_min)) != Z_OK)
+		return (err);
+	if ((err = newprop(newnode, DTD_ATTR_NCPU_MAX,
+	    tabptr->zone_ncpu_max)) != Z_OK)
+		return (err);
+
+	if ((err = modify_tmp_pool(handle, tabptr->zone_importance)) != Z_OK)
+		return (err);
+
+	return (Z_OK);
+}
+
+int
+zonecfg_add_pset(zone_dochandle_t handle, struct zone_psettab *tabptr)
+{
+	int err;
+
+	if (tabptr == NULL)
+		return (Z_INVAL);
+
+	if ((err = operation_prep(handle)) != Z_OK)
+		return (err);
+
+	if ((err = add_pset_core(handle, tabptr)) != Z_OK)
+		return (err);
+
+	return (Z_OK);
+}
+
+int
+zonecfg_delete_pset(zone_dochandle_t handle)
+{
+	int err;
+	int res = Z_NO_RESOURCE_ID;
+	xmlNodePtr cur = handle->zone_dh_cur;
+
+	if ((err = operation_prep(handle)) != Z_OK)
+		return (err);
+
+	for (cur = cur->xmlChildrenNode; cur != NULL; cur = cur->next) {
+		if (xmlStrcmp(cur->name, DTD_ELEM_PSET) == 0) {
+			xmlUnlinkNode(cur);
+			xmlFreeNode(cur);
+			res = Z_OK;
+			break;
+		}
+	}
+
+	/*
+	 * Once we have msets, we should check that a mset
+	 * do not exist before we delete the tmp_pool data.
+	 */
+	err = delete_tmp_pool(handle);
+	if (err != Z_OK && err != Z_NO_RESOURCE_ID)
+		return (err);
+
+	return (res);
+}
+
+int
+zonecfg_modify_pset(zone_dochandle_t handle, struct zone_psettab *tabptr)
+{
+	int err;
+
+	if (tabptr == NULL)
+		return (Z_INVAL);
+
+	if ((err = zonecfg_delete_pset(handle)) != Z_OK)
+		return (err);
+
+	if ((err = add_pset_core(handle, tabptr)) != Z_OK)
+		return (err);
+
+	return (Z_OK);
+}
+
+int
+zonecfg_lookup_pset(zone_dochandle_t handle, struct zone_psettab *tabptr)
+{
+	xmlNodePtr cur;
+	int err;
+	int res = Z_NO_ENTRY;
+
+	if (tabptr == NULL)
+		return (Z_INVAL);
+
+	if ((err = operation_prep(handle)) != Z_OK)
+		return (err);
+
+	/* this is an optional component */
+	tabptr->zone_importance[0] = '\0';
+
+	cur = handle->zone_dh_cur;
+	for (cur = cur->xmlChildrenNode; cur != NULL; cur = cur->next) {
+		if (xmlStrcmp(cur->name, DTD_ELEM_PSET) == 0) {
+			if ((err = fetchprop(cur, DTD_ATTR_NCPU_MIN,
+			    tabptr->zone_ncpu_min,
+			    sizeof (tabptr->zone_ncpu_min))) != Z_OK) {
+				handle->zone_dh_cur = handle->zone_dh_top;
+				return (err);
+			}
+
+			if ((err = fetchprop(cur, DTD_ATTR_NCPU_MAX,
+			    tabptr->zone_ncpu_max,
+			    sizeof (tabptr->zone_ncpu_max))) != Z_OK) {
+				handle->zone_dh_cur = handle->zone_dh_top;
+				return (err);
+			}
+
+			res = Z_OK;
+
+		} else if (xmlStrcmp(cur->name, DTD_ELEM_TMPPOOL) == 0) {
+			if ((err = fetchprop(cur, DTD_ATTR_IMPORTANCE,
+			    tabptr->zone_importance,
+			    sizeof (tabptr->zone_importance))) != Z_OK) {
+				handle->zone_dh_cur = handle->zone_dh_top;
+				return (err);
+			}
+		}
+	}
+
+	return (res);
+}
+
+int
+zonecfg_getpsetent(zone_dochandle_t handle, struct zone_psettab *tabptr)
+{
+	int err;
+
+	if ((err = zonecfg_setent(handle)) != Z_OK)
+		return (err);
+
+	err = zonecfg_lookup_pset(handle, tabptr);
+
+	(void) zonecfg_endent(handle);
+
+	return (err);
+}
+
+static int
+add_mcap(zone_dochandle_t handle, struct zone_mcaptab *tabptr)
+{
+	xmlNodePtr newnode, cur = handle->zone_dh_cur;
+	int err;
+
+	newnode = xmlNewTextChild(cur, NULL, DTD_ELEM_MCAP, NULL);
+	if ((err = newprop(newnode, DTD_ATTR_PHYSCAP, tabptr->zone_physmem_cap))
+	    != Z_OK)
+		return (err);
+
+	return (Z_OK);
+}
+
+int
+zonecfg_delete_mcap(zone_dochandle_t handle)
+{
+	int err;
+	xmlNodePtr cur = handle->zone_dh_cur;
+
+	if ((err = operation_prep(handle)) != Z_OK)
+		return (err);
+
+	for (cur = cur->xmlChildrenNode; cur != NULL; cur = cur->next) {
+		if (xmlStrcmp(cur->name, DTD_ELEM_MCAP) != 0)
+			continue;
+
+		xmlUnlinkNode(cur);
+		xmlFreeNode(cur);
+		return (Z_OK);
+	}
+	return (Z_NO_RESOURCE_ID);
+}
+
+int
+zonecfg_modify_mcap(zone_dochandle_t handle, struct zone_mcaptab *tabptr)
+{
+	int err;
+
+	if (tabptr == NULL)
+		return (Z_INVAL);
+
+	err = zonecfg_delete_mcap(handle);
+	/* it is ok if there is no mcap entry */
+	if (err != Z_OK && err != Z_NO_RESOURCE_ID)
+		return (err);
+
+	if ((err = add_mcap(handle, tabptr)) != Z_OK)
+		return (err);
+
+	return (Z_OK);
+}
+
+int
+zonecfg_lookup_mcap(zone_dochandle_t handle, struct zone_mcaptab *tabptr)
+{
+	xmlNodePtr cur;
+	int err;
+
+	if (tabptr == NULL)
+		return (Z_INVAL);
+
+	if ((err = operation_prep(handle)) != Z_OK)
+		return (err);
+
+	cur = handle->zone_dh_cur;
+	for (cur = cur->xmlChildrenNode; cur != NULL; cur = cur->next) {
+		if (xmlStrcmp(cur->name, DTD_ELEM_MCAP) != 0)
+			continue;
+		if ((err = fetchprop(cur, DTD_ATTR_PHYSCAP,
+		    tabptr->zone_physmem_cap,
+		    sizeof (tabptr->zone_physmem_cap))) != Z_OK) {
+			handle->zone_dh_cur = handle->zone_dh_top;
+			return (err);
+		}
+
+		return (Z_OK);
+	}
+
+	return (Z_NO_ENTRY);
+}
+
+static int
+getmcapent_core(zone_dochandle_t handle, struct zone_mcaptab *tabptr)
+{
+	xmlNodePtr cur;
+	int err;
+
+	if (handle == NULL)
+		return (Z_INVAL);
+
+	if ((cur = handle->zone_dh_cur) == NULL)
+		return (Z_NO_ENTRY);
+
+	for (; cur != NULL; cur = cur->next)
+		if (xmlStrcmp(cur->name, DTD_ELEM_MCAP) == 0)
+			break;
+	if (cur == NULL) {
+		handle->zone_dh_cur = handle->zone_dh_top;
+		return (Z_NO_ENTRY);
+	}
+
+	if ((err = fetchprop(cur, DTD_ATTR_PHYSCAP, tabptr->zone_physmem_cap,
+	    sizeof (tabptr->zone_physmem_cap))) != Z_OK) {
+		handle->zone_dh_cur = handle->zone_dh_top;
+		return (err);
+	}
+
+	handle->zone_dh_cur = cur->next;
+	return (Z_OK);
+}
+
+int
+zonecfg_getmcapent(zone_dochandle_t handle, struct zone_mcaptab *tabptr)
+{
+	int err;
+
+	if ((err = zonecfg_setent(handle)) != Z_OK)
+		return (err);
+
+	err = getmcapent_core(handle, tabptr);
+
+	(void) zonecfg_endent(handle);
+
+	return (err);
+}
+
 int
 zonecfg_setpkgent(zone_dochandle_t handle)
 {
diff --git a/usr/src/lib/libzonecfg/common/mapfile-vers b/usr/src/lib/libzonecfg/common/mapfile-vers
index a9d59548d3..e2bb782688 100644
--- a/usr/src/lib/libzonecfg/common/mapfile-vers
+++ b/usr/src/lib/libzonecfg/common/mapfile-vers
@@ -40,10 +40,15 @@ SUNWprivate_1.1 {
 	zonecfg_add_fs_option;
 	zonecfg_add_ipd;
 	zonecfg_add_nwif;
+	zonecfg_add_pset;
 	zonecfg_add_rctl;
 	zonecfg_add_rctl_value;
 	zonecfg_add_scratch;
+	zonecfg_aliased_rctl_ok;
+	zonecfg_apply_rctls;
 	zonecfg_attach_manifest;
+	zonecfg_bind_pool;
+	zonecfg_bind_tmp_pool;
 	zonecfg_check_handle;
 	zonecfg_close_scratch;
 	zonecfg_construct_rctlblk;
@@ -54,15 +59,20 @@ SUNWprivate_1.1 {
 	zonecfg_delete_ds;
 	zonecfg_delete_filesystem;
 	zonecfg_delete_ipd;
+	zonecfg_delete_mcap;
 	zonecfg_delete_nwif;
+	zonecfg_delete_pset;
 	zonecfg_delete_rctl;
 	zonecfg_delete_scratch;
+	zonecfg_del_all_resources;
 	zonecfg_destroy;
 	zonecfg_destroy_snapshot;
+	zonecfg_destroy_tmp_pool;
 	zonecfg_detached;
 	zonecfg_detach_save;
 	zonecfg_devperms_apply;
 	zonecfg_devwalk;
+	zonecfg_enable_rcapd;
 	zonecfg_endattrent;
 	zonecfg_enddevent;
 	zonecfg_enddevperment;
@@ -78,6 +88,7 @@ SUNWprivate_1.1 {
 	zonecfg_fini_handle;
 	zonecfg_free_fs_option_list;
 	zonecfg_free_rctl_value_list;
+	zonecfg_get_aliased_rctl;
 	zonecfg_get_attach_handle;
 	zonecfg_get_attr_boolean;
 	zonecfg_getattrent;
@@ -88,6 +99,7 @@ SUNWprivate_1.1 {
 	zonecfg_get_bootargs;
 	zonecfg_get_brand;
 	zonecfg_get_detach_info;
+	zonecfg_get_dflt_sched_class;
 	zonecfg_getdevent;
 	zonecfg_getdevperment;
 	zonecfg_getdsent;
@@ -95,6 +107,7 @@ SUNWprivate_1.1 {
 	zonecfg_get_handle;
 	zonecfg_getipdent;
 	zonecfg_get_limitpriv;
+	zonecfg_getmcapent;
 	zonecfg_get_name;
 	zonecfg_get_name_by_uuid;
 	zonecfg_getnwifent;
@@ -102,8 +115,10 @@ SUNWprivate_1.1 {
 	zonecfg_getpkgent;
 	zonecfg_get_pool;
 	zonecfg_get_privset;
+	zonecfg_getpsetent;
 	zonecfg_getrctlent;
 	zonecfg_get_root;
+	zonecfg_get_sched_class;
 	zonecfg_get_scratch;
 	zonecfg_get_snapshot_handle;
 	zonecfg_get_template_handle;
@@ -120,28 +135,35 @@ SUNWprivate_1.1 {
 	zonecfg_lookup_ds;
 	zonecfg_lookup_filesystem;
 	zonecfg_lookup_ipd;
+	zonecfg_lookup_mcap;
 	zonecfg_lookup_nwif;
+	zonecfg_lookup_pset;
 	zonecfg_lookup_rctl;
 	zonecfg_modify_attr;
 	zonecfg_modify_dev;
 	zonecfg_modify_ds;
 	zonecfg_modify_filesystem;
 	zonecfg_modify_ipd;
+	zonecfg_modify_mcap;
 	zonecfg_modify_nwif;
+	zonecfg_modify_pset;
 	zonecfg_modify_rctl;
 	zonecfg_notify_bind;
 	zonecfg_notify_critical_abort;
 	zonecfg_notify_critical_enter;
 	zonecfg_notify_critical_exit;
 	zonecfg_notify_unbind;
+	zonecfg_num_resources;
 	zonecfg_open_scratch;
 	zonecfg_remove_fs_option;
 	zonecfg_remove_rctl_value;
 	zonecfg_reverse_scratch;
+	zonecfg_rm_aliased_rctl;
 	zonecfg_rm_detached;
 	zonecfg_same_net_address;
 	zonecfg_save;
 	zonecfg_setattrent;
+	zonecfg_set_aliased_rctl;
 	zonecfg_set_autoboot;
 	zonecfg_set_bootargs;
 	zonecfg_set_brand;
@@ -158,15 +180,22 @@ SUNWprivate_1.1 {
 	zonecfg_set_pool;
 	zonecfg_setrctlent;
 	zonecfg_set_root;
+	zonecfg_set_sched;
 	zonecfg_set_zonepath;
 	zonecfg_strerror;
+	zonecfg_str_to_bytes;
 	zonecfg_validate_zonename;
+	zonecfg_valid_alias_limit;
 	zonecfg_valid_fs_type;
+	zonecfg_valid_importance;
+	zonecfg_valid_memlimit;
+	zonecfg_valid_ncpus;
 	zonecfg_valid_net_address;
 	zonecfg_valid_rctl;
 	zonecfg_valid_rctlblk;
 	zonecfg_valid_rctlname;
 	zonecfg_verify_save;
+	zonecfg_warn_poold;
 	zone_get_brand;
 	zone_get_devroot;
 	zone_get_id;
diff --git a/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1 b/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1
index 3208af7a79..c51e89add3 100644
--- a/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1
+++ b/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1
@@ -111,7 +111,27 @@
 			mode		CDATA #REQUIRED
 			acl		CDATA #REQUIRED>
 
-<!ELEMENT zone		(filesystem | inherited-pkg-dir | network | device | deleted-device | rctl | attr | dataset | package | patch | dev-perm)*>
+<!--
+	The tmp_pool element is separate from the pset element so that
+	we can track the importance value at the pool level, where it
+	belongs, instead of at the pset level.  Once we have msets this
+	will be important since tmp psets and tmp msets will share a common
+	pool-level importance.
+-->
+<!ELEMENT tmp_pool	EMPTY>
+
+<!ATTLIST tmp_pool	importance	CDATA #REQUIRED>
+
+<!ELEMENT pset		EMPTY>
+
+<!ATTLIST pset		ncpu_min	CDATA #REQUIRED
+			ncpu_max	CDATA #REQUIRED>
+
+<!ELEMENT mcap		EMPTY>
+
+<!ATTLIST mcap		physcap		CDATA #REQUIRED>
+
+<!ELEMENT zone		(filesystem | inherited-pkg-dir | network | device | deleted-device | rctl | attr | dataset | package | patch | dev-perm | tmp_pool | pset | mcap)*>
 
 <!ATTLIST zone		name		CDATA #REQUIRED
 			zonepath	CDATA #REQUIRED
@@ -120,4 +140,5 @@
 			limitpriv	CDATA ""
 			bootargs	CDATA ""
 			brand		CDATA ""
+			scheduling-class	CDATA ""
 			version		NMTOKEN #FIXED '1'>
diff --git a/usr/src/pkgdefs/SUNWhea/prototype_com b/usr/src/pkgdefs/SUNWhea/prototype_com
index c52316079d..6ac2e461ab 100644
--- a/usr/src/pkgdefs/SUNWhea/prototype_com
+++ b/usr/src/pkgdefs/SUNWhea/prototype_com
@@ -1178,6 +1178,7 @@ f none usr/include/sys/varargs.h 644 root bin
 f none usr/include/sys/vfs.h 644 root bin
 f none usr/include/sys/vfstab.h 644 root bin
 f none usr/include/sys/vm.h 644 root bin
+f none usr/include/sys/vm_usage.h 644 root bin
 f none usr/include/sys/vmem.h 644 root bin
 f none usr/include/sys/vmem_impl.h 644 root bin
 f none usr/include/sys/vmem_impl_user.h 644 root bin
diff --git a/usr/src/pkgdefs/SUNWrcapu/depend b/usr/src/pkgdefs/SUNWrcapu/depend
index 9aaa446bca..a7375758b0 100644
--- a/usr/src/pkgdefs/SUNWrcapu/depend
+++ b/usr/src/pkgdefs/SUNWrcapu/depend
@@ -1,13 +1,12 @@
 #
-# Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 # Use is subject to license terms.
 #
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License").  You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
@@ -43,3 +42,4 @@
 P SUNWrcapr	Solaris Resource Capping Daemon (Root)
 P SUNWcsu	Core Solaris, (Usr)
 P SUNWcsl	Core Solaris, (Shared Libs)
+P SUNWzoneu	Solaris Zones (Usr)
diff --git a/usr/src/pkgdefs/SUNWzoner/prototype_com b/usr/src/pkgdefs/SUNWzoner/prototype_com
index 009de7fb9f..15661840ea 100644
--- a/usr/src/pkgdefs/SUNWzoner/prototype_com
+++ b/usr/src/pkgdefs/SUNWzoner/prototype_com
@@ -56,9 +56,11 @@ f none etc/zones/SUNWblank.xml 444 root bin
 d none lib 755 root bin
 d none lib/svc 0755 root bin
 d none lib/svc/method 0755 root bin
+f none lib/svc/method/svc-resource-mgmt 0555 root bin
 f none lib/svc/method/svc-zones 0555 root bin
 d none var 755 root sys
 d none var/svc 755 root sys
 d none var/svc/manifest 755 root sys
 d none var/svc/manifest/system 755 root sys
+f manifest var/svc/manifest/system/resource-mgmt.xml 0444 root sys
 f manifest var/svc/manifest/system/zones.xml 0444 root sys
diff --git a/usr/src/tools/scripts/bfu.sh b/usr/src/tools/scripts/bfu.sh
index 21d5a7eb67..b10d453c7b 100644
--- a/usr/src/tools/scripts/bfu.sh
+++ b/usr/src/tools/scripts/bfu.sh
@@ -332,6 +332,7 @@ superfluous_local_zone_files="
 	lib/svc/method/svc-poold
 	lib/svc/method/svc-pools
 	lib/svc/method/svc-power
+	lib/svc/method/svc-resource-mgmt
 	lib/svc/method/svc-rmvolmgr
 	lib/svc/method/svc-scheduler
 	lib/svc/method/svc-sckmd
@@ -401,6 +402,7 @@ superfluous_local_zone_files="
 	var/svc/manifest/system/poold.xml
 	var/svc/manifest/system/pools.xml
 	var/svc/manifest/system/power.xml
+	var/svc/manifest/system/resource-mgmt.xml
 	var/svc/manifest/system/scheduler.xml
 	var/svc/manifest/system/sysevent.xml
 	var/svc/manifest/system/zones.xml
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index 32a63d6c22..b2bbcbc8c3 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -334,6 +334,7 @@ GENUNIX_OBJS +=	\
 		vm_seg.o	\
 		vm_subr.o	\
 		vm_swap.o	\
+		vm_usage.o	\
 		vnode.o		\
 		vuid_queue.o	\
 		vuid_store.o	\
diff --git a/usr/src/uts/common/disp/priocntl.c b/usr/src/uts/common/disp/priocntl.c
index 3bb90cf1fa..9197dc815b 100644
--- a/usr/src/uts/common/disp/priocntl.c
+++ b/usr/src/uts/common/disp/priocntl.c
@@ -136,6 +136,7 @@ priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg,
 	struct pcmpargs		pcmpargs;
 	pc_vaparms_t		vaparms;
 	char			clname[PC_CLNMSZ];
+	char			*outstr;
 	int			count;
 	kthread_id_t		retthreadp;
 	proc_t			*initpp;
@@ -145,6 +146,7 @@ priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg,
 	int			rv = 0;
 	pid_t			saved_pid;
 	id_t			classid;
+	int			size;
 	int (*copyinfn)(const void *, void *, size_t);
 	int (*copyoutfn)(const void *, void *, size_t);
 
@@ -692,6 +694,21 @@ priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg,
 		ASSERT(defaultcid > 0 && defaultcid < loaded_classes);
 		break;
 
+	case PC_GETDFLCL:
+		mutex_enter(&class_lock);
+
+		if (defaultcid >= loaded_classes)
+			outstr = "";
+		else
+			outstr = sclass[defaultcid].cl_name;
+		size = strlen(outstr) + 1;
+		if (arg != NULL)
+			if ((*copyoutfn)(outstr, arg, size) != 0)
+				error = EFAULT;
+
+		mutex_exit(&class_lock);
+		break;
+
 	default:
 		error = EINVAL;
 		break;
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_tnode.c b/usr/src/uts/common/fs/tmpfs/tmp_tnode.c
index 5a7000c242..c5145cccf0 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_tnode.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_tnode.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -67,6 +66,7 @@ tmp_resv(
 	int pagecreate)		/* call anon_resv if set */
 {
 	pgcnt_t pages = btopr(delta);
+	zone_t *zone;
 
 	ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
 	ASSERT(tp->tn_type == VREG);
@@ -79,9 +79,10 @@ tmp_resv(
 	 *
 	 * Deny if trying to reserve more than tmpfs can allocate
 	 */
+	zone = tm->tm_vfsp->vfs_zone;
 	if (pagecreate && ((tm->tm_anonmem + pages > tm->tm_anonmax) ||
-	    (!anon_checkspace(ptob(pages + tmpfs_minfree))) ||
-	    (anon_resv(delta) == 0))) {
+	    (!anon_checkspace(ptob(pages + tmpfs_minfree), zone)) ||
+	    (anon_resv_zone(delta, zone) == 0))) {
 		return (1);
 	}
 
@@ -114,7 +115,7 @@ tmp_unresv(
 	ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
 	ASSERT(tp->tn_type == VREG);
 
-	anon_unresv(delta);
+	anon_unresv_zone(delta, tm->tm_vfsp->vfs_zone);
 
 	mutex_enter(&tm->tm_contents);
 	tm->tm_anonmem -= btopr(delta);
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
index d623dce3f7..aa870b124a 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
@@ -215,9 +215,26 @@ wrtmp(
 		if (delta > 0) {
 			pagecreate = 1;
 			if (tmp_resv(tm, tp, delta, pagecreate)) {
-				cmn_err(CE_WARN,
-	"%s: File system full, swap space limit exceeded",
+				/*
+				 * Log file system full in the zone that owns
+				 * the tmpfs mount, as well as in the global
+				 * zone if necessary.
+				 */
+				zcmn_err(tm->tm_vfsp->vfs_zone->zone_id,
+				    CE_WARN, "%s: File system full, "
+				    "swap space limit exceeded",
 				    tm->tm_mntpath);
+
+				if (tm->tm_vfsp->vfs_zone->zone_id !=
+				    GLOBAL_ZONEID) {
+
+					vfs_t *vfs = tm->tm_vfsp;
+
+					zcmn_err(GLOBAL_ZONEID,
+					    CE_WARN, "%s: File system full, "
+					    "swap space limit exceeded",
+					    vfs->vfs_vnodecovered->v_path);
+				}
 				error = ENOSPC;
 				break;
 			}
diff --git a/usr/src/uts/common/os/modhash.c b/usr/src/uts/common/os/modhash.c
index 19700ce685..3c63231253 100644
--- a/usr/src/uts/common/os/modhash.c
+++ b/usr/src/uts/common/os/modhash.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -165,15 +164,6 @@
  */
 #define	MH_KEYCMP(hash, key1, key2) ((hash->mh_keycmp)(key1, key2))
 
-static void i_mod_hash_clear_nosync(mod_hash_t *);
-static int i_mod_hash_find_nosync(mod_hash_t *, mod_hash_key_t,
-    mod_hash_val_t *);
-static int i_mod_hash_insert_nosync(mod_hash_t *, mod_hash_key_t,
-    mod_hash_val_t, mod_hash_hndl_t);
-static int i_mod_hash_remove_nosync(mod_hash_t *, mod_hash_key_t,
-    mod_hash_val_t *);
-static uint_t i_mod_hash(mod_hash_t *, mod_hash_key_t);
-
 /*
  * Cache for struct mod_hash_entry
  */
@@ -522,7 +512,7 @@ mod_hash_destroy_hash(mod_hash_t *hash)
  * i_mod_hash()
  * 	Call the hashing algorithm for this hash table, with the given key.
  */
-static uint_t
+uint_t
 i_mod_hash(mod_hash_t *hash, mod_hash_key_t key)
 {
 	uint_t h;
@@ -778,7 +768,7 @@ mod_hash_destroy(mod_hash_t *hash, mod_hash_key_t key)
  * mod_hash_find()
  * 	Find a value in the hash table corresponding to the given key.
  */
-static int
+int
 i_mod_hash_find_nosync(mod_hash_t *hash, mod_hash_key_t key,
     mod_hash_val_t *val)
 {
@@ -826,7 +816,7 @@ mod_hash_find_cb(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val,
 	return (res);
 }
 
-static void
+void
 i_mod_hash_walk_nosync(mod_hash_t *hash,
     uint_t (*callback)(mod_hash_key_t, mod_hash_val_t *, void *), void *arg)
 {
@@ -870,7 +860,7 @@ mod_hash_walk(mod_hash_t *hash,
  *	Clears the given hash table by calling the destructor of every hash
  *	element and freeing up all mod_hash_entry's.
  */
-static void
+void
 i_mod_hash_clear_nosync(mod_hash_t *hash)
 {
 	int i;
diff --git a/usr/src/uts/common/os/pid.c b/usr/src/uts/common/os/pid.c
index 88b0258afe..fecc4a6c45 100644
--- a/usr/src/uts/common/os/pid.c
+++ b/usr/src/uts/common/os/pid.c
@@ -385,6 +385,56 @@ pgfind(pid_t pgid)
 }
 
 /*
+ * Sets P_PR_LOCK on a non-system process.  Process must be fully created
+ * and not exiting to succeed.
+ *
+ * Returns 0 on success.
+ * Returns 1 if P_PR_LOCK is set.
+ * Returns -1 if proc is in invalid state.
+ */
+int
+sprtrylock_proc(proc_t *p)
+{
+	ASSERT(MUTEX_HELD(&p->p_lock));
+
+	/* skip system and incomplete processes */
+	if (p->p_stat == SIDL || p->p_stat == SZOMB ||
+	    (p->p_flag & (SSYS | SEXITING | SEXITLWPS))) {
+		return (-1);
+	}
+
+	if (p->p_proc_flag & P_PR_LOCK)
+		return (1);
+
+	p->p_proc_flag |= P_PR_LOCK;
+	THREAD_KPRI_REQUEST();
+
+	return (0);
+}
+
+/*
+ * Wait for P_PR_LOCK to become clear.  Returns with p_lock dropped,
+ * and the proc pointer no longer valid, as the proc may have exited.
+ */
+void
+sprwaitlock_proc(proc_t *p)
+{
+	kmutex_t *mp;
+
+	ASSERT(MUTEX_HELD(&p->p_lock));
+	ASSERT(p->p_proc_flag & P_PR_LOCK);
+
+	/*
+	 * p_lock is persistent, but p itself is not -- it could
+	 * vanish during cv_wait().  Load p->p_lock now so we can
+	 * drop it after cv_wait() without referencing p.
+	 */
+	mp = &p->p_lock;
+	cv_wait(&pr_pid_cv[p->p_slot], mp);
+	mutex_exit(mp);
+}
+
+/*
  * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK.
  * Returns the proc pointer on success, NULL on failure.  sprlock() is
  * really just a stripped-down version of pr_p_lock() to allow practive
@@ -394,7 +444,7 @@ proc_t *
 sprlock_zone(pid_t pid, zoneid_t zoneid)
 {
 	proc_t *p;
-	kmutex_t *mp;
+	int ret;
 
 	for (;;) {
 		mutex_enter(&pidlock);
@@ -402,31 +452,21 @@ sprlock_zone(pid_t pid, zoneid_t zoneid)
 			mutex_exit(&pidlock);
 			return (NULL);
 		}
-		/*
-		 * p_lock is persistent, but p itself is not -- it could
-		 * vanish during cv_wait().  Load p->p_lock now so we can
-		 * drop it after cv_wait() without referencing p.
-		 */
-		mp = &p->p_lock;
-		mutex_enter(mp);
+		mutex_enter(&p->p_lock);
 		mutex_exit(&pidlock);
-		/*
-		 * If the process is in some half-baked state, fail.
-		 */
-		if (p->p_stat == SZOMB || p->p_stat == SIDL ||
-		    (p->p_flag & (SEXITING | SEXITLWPS))) {
-			mutex_exit(mp);
-			return (NULL);
-		}
+
 		if (panicstr)
 			return (p);
-		if (!(p->p_proc_flag & P_PR_LOCK))
+
+		ret = sprtrylock_proc(p);
+		if (ret == -1) {
+			mutex_exit(&p->p_lock);
+			return (NULL);
+		} else if (ret == 0) {
 			break;
-		cv_wait(&pr_pid_cv[p->p_slot], mp);
-		mutex_exit(mp);
+		}
+		sprwaitlock_proc(p);
 	}
-	p->p_proc_flag |= P_PR_LOCK;
-	THREAD_KPRI_REQUEST();
 	return (p);
 }
 
diff --git a/usr/src/uts/common/os/pool.c b/usr/src/uts/common/os/pool.c
index ceb90850fa..818bb54701 100644
--- a/usr/src/uts/common/os/pool.c
+++ b/usr/src/uts/common/os/pool.c
@@ -293,6 +293,8 @@ pool_enable(void)
 	(void) nvlist_add_string(pool_sys_prop, "system.comment", "");
 	(void) nvlist_add_int64(pool_sys_prop, "system.version", 1);
 	(void) nvlist_add_byte(pool_sys_prop, "system.bind-default", 1);
+	(void) nvlist_add_string(pool_sys_prop, "system.poold.objectives",
+	    "wt-load");
 
 	(void) nvlist_alloc(&pool_default->pool_props,
 	    NV_UNIQUE_NAME, KM_SLEEP);
@@ -1309,7 +1311,7 @@ pool_do_bind(pool_t *pool, idtype_t idtype, id_t id, int flags)
 	}
 
 	if (idtype == P_PROJID) {
-		kpj = project_hold_by_id(id, GLOBAL_ZONEID, PROJECT_HOLD_FIND);
+		kpj = project_hold_by_id(id, global_zone, PROJECT_HOLD_FIND);
 		if (kpj == NULL)
 			return (ESRCH);
 		mutex_enter(&kpj->kpj_poolbind);
diff --git a/usr/src/uts/common/os/project.c b/usr/src/uts/common/os/project.c
index 6c266c0ca3..d75b60f6e9 100644
--- a/usr/src/uts/common/os/project.c
+++ b/usr/src/uts/common/os/project.c
@@ -29,6 +29,7 @@
 #include <sys/modhash.h>
 #include <sys/modctl.h>
 #include <sys/kmem.h>
+#include <sys/kstat.h>
 #include <sys/atomic.h>
 #include <sys/cmn_err.h>
 #include <sys/proc.h>
@@ -103,6 +104,8 @@ struct project_zone {
  *   acquired, the hash lock is to be acquired first.
  */
 
+static kstat_t *project_kstat_create(kproject_t *pj, zone_t *zone);
+static void project_kstat_delete(kproject_t *pj);
 
 static void
 project_data_init(kproject_data_t *data)
@@ -118,6 +121,7 @@ project_data_init(kproject_data_t *data)
 	data->kpd_locked_mem_ctl = UINT64_MAX;
 	data->kpd_contract = 0;
 	data->kpd_crypto_mem = 0;
+	data->kpd_lockedmem_kstat = NULL;
 }
 
 /*ARGSUSED*/
@@ -179,11 +183,11 @@ project_hold(kproject_t *p)
 }
 
 /*
- * kproject_t *project_hold_by_id(projid_t, zoneid_t, int)
+ * kproject_t *project_hold_by_id(projid_t, zone_t *, int)
  *
  * Overview
  *   project_hold_by_id() performs a look-up in the dictionary of projects
- *   active on the system by specified project ID + zone ID and puts a hold on
+ *   active on the system by specified project ID + zone and puts a hold on
  *   it.  The third argument defines the desired behavior in the case when
  *   project with given project ID cannot be found:
  *
@@ -202,7 +206,7 @@ project_hold(kproject_t *p)
  *   Caller must be in a context suitable for KM_SLEEP allocations.
  */
 kproject_t *
-project_hold_by_id(projid_t id, zoneid_t zoneid, int flag)
+project_hold_by_id(projid_t id, zone_t *zone, int flag)
 {
 	kproject_t *spare_p;
 	kproject_t *p;
@@ -211,9 +215,11 @@ project_hold_by_id(projid_t id, zoneid_t zoneid, int flag)
 	rctl_alloc_gp_t *gp;
 	rctl_entity_p_t e;
 	struct project_zone pz;
+	boolean_t create = B_FALSE;
+	kstat_t *ksp;
 
 	pz.kpj_id = id;
-	pz.kpj_zoneid = zoneid;
+	pz.kpj_zoneid = zone->zone_id;
 
 	if (flag == PROJECT_HOLD_FIND) {
 		mutex_enter(&project_hash_lock);
@@ -241,9 +247,10 @@ project_hold_by_id(projid_t id, zoneid_t zoneid, int flag)
 	mutex_enter(&project_hash_lock);
 	if (mod_hash_find(projects_hash, (mod_hash_key_t)&pz,
 	    (mod_hash_val_t *)&p) == MH_ERR_NOTFOUND) {
+
 		p = spare_p;
 		p->kpj_id = id;
-		p->kpj_zoneid = zoneid;
+		p->kpj_zoneid = zone->zone_id;
 		p->kpj_count = 0;
 		p->kpj_shares = 1;
 		p->kpj_nlwps = 0;
@@ -265,7 +272,7 @@ project_hold_by_id(projid_t id, zoneid_t zoneid, int flag)
 		 * Insert project into global project list.
 		 */
 		mutex_enter(&projects_list_lock);
-		if (id != 0 || zoneid != GLOBAL_ZONEID) {
+		if (id != 0 || zone != &zone0) {
 			p->kpj_next = projects_list;
 			p->kpj_prev = projects_list->kpj_prev;
 			p->kpj_prev->kpj_next = p;
@@ -279,6 +286,7 @@ project_hold_by_id(projid_t id, zoneid_t zoneid, int flag)
 			projects_list = p;
 		}
 		mutex_exit(&projects_list_lock);
+		create = B_TRUE;
 	} else {
 		mutex_exit(&curproc->p_lock);
 		mod_hash_cancel(projects_hash, &hndl);
@@ -290,10 +298,20 @@ project_hold_by_id(projid_t id, zoneid_t zoneid, int flag)
 	p->kpj_count++;
 	mutex_exit(&project_hash_lock);
 
+	/*
+	 * The kstat stores the project's zone name, as zoneid's may change
+	 * across reboots.
+	 */
+	if (create == B_TRUE) {
+		ksp = project_kstat_create(p, zone);
+		mutex_enter(&project_hash_lock);
+		ASSERT(p->kpj_data.kpd_lockedmem_kstat == NULL);
+		p->kpj_data.kpd_lockedmem_kstat = ksp;
+		mutex_exit(&project_hash_lock);
+	}
 	return (p);
 }
 
-
 /*
  * void project_rele(kproject_t *)
  *
@@ -325,6 +343,7 @@ project_rele(kproject_t *p)
 		mutex_exit(&projects_list_lock);
 
 		rctl_set_free(p->kpj_rctls);
+		project_kstat_delete(p);
 
 		if (mod_hash_destroy(projects_hash, (mod_hash_key_t)p))
 			panic("unable to delete project %d zone %d", p->kpj_id,
@@ -636,9 +655,9 @@ project_locked_mem_usage(rctl_t *rctl, struct proc *p)
 {
 	rctl_qty_t q;
 	ASSERT(MUTEX_HELD(&p->p_lock));
-	mutex_enter(&p->p_zone->zone_rctl_lock);
+	mutex_enter(&p->p_zone->zone_mem_lock);
 	q = p->p_task->tk_proj->kpj_data.kpd_locked_mem;
-	mutex_exit(&p->p_zone->zone_rctl_lock);
+	mutex_exit(&p->p_zone->zone_mem_lock);
 	return (q);
 }
 
@@ -649,7 +668,7 @@ project_locked_mem_test(struct rctl *rctl, struct proc *p, rctl_entity_p_t *e,
 {
 	rctl_qty_t q;
 	ASSERT(MUTEX_HELD(&p->p_lock));
-	ASSERT(MUTEX_HELD(&p->p_zone->zone_rctl_lock));
+	ASSERT(MUTEX_HELD(&p->p_zone->zone_mem_lock));
 	q = p->p_task->tk_proj->kpj_data.kpd_locked_mem;
 	if (q + inc > rval->rcv_value)
 		return (1);
@@ -868,7 +887,7 @@ project_init(void)
 	rctl_add_default_limit("project.max-contracts", 10000,
 	    RCPRIV_PRIVILEGED, RCTL_LOCAL_DENY);
 
-	t0.t_proj = proj0p = project_hold_by_id(0, GLOBAL_ZONEID,
+	t0.t_proj = proj0p = project_hold_by_id(0, &zone0,
 	    PROJECT_HOLD_INSERT);
 
 	mutex_enter(&p0.p_lock);
@@ -876,3 +895,57 @@ project_init(void)
 	mutex_exit(&p0.p_lock);
 	proj0p->kpj_ntasks = 1;
 }
+
+static int
+project_lockedmem_kstat_update(kstat_t *ksp, int rw)
+{
+	kproject_t *pj = ksp->ks_private;
+	kproject_kstat_t *kpk = ksp->ks_data;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	kpk->kpk_usage.value.ui64 = pj->kpj_data.kpd_locked_mem;
+	kpk->kpk_value.value.ui64 = pj->kpj_data.kpd_locked_mem_ctl;
+	return (0);
+}
+
+static kstat_t *
+project_kstat_create(kproject_t *pj, zone_t *zone)
+{
+	kstat_t *ksp;
+	kproject_kstat_t *kpk;
+	char *zonename = zone->zone_name;
+
+	ksp = rctl_kstat_create_project(pj, "lockedmem", KSTAT_TYPE_NAMED,
+	    sizeof (kproject_kstat_t) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL);
+
+	if (ksp == NULL)
+		return (NULL);
+
+	kpk = ksp->ks_data = kmem_alloc(sizeof (kproject_kstat_t), KM_SLEEP);
+	ksp->ks_data_size += strlen(zonename) + 1;
+	kstat_named_init(&kpk->kpk_zonename, "zonename", KSTAT_DATA_STRING);
+	kstat_named_setstr(&kpk->kpk_zonename, zonename);
+	kstat_named_init(&kpk->kpk_usage, "usage", KSTAT_DATA_UINT64);
+	kstat_named_init(&kpk->kpk_value, "value", KSTAT_DATA_UINT64);
+	ksp->ks_update = project_lockedmem_kstat_update;
+	ksp->ks_private = pj;
+	kstat_install(ksp);
+
+	return (ksp);
+}
+
+static void
+project_kstat_delete(kproject_t *pj)
+{
+	void *data;
+
+	if (pj->kpj_data.kpd_lockedmem_kstat != NULL) {
+		data = pj->kpj_data.kpd_lockedmem_kstat->ks_data;
+		kstat_delete(pj->kpj_data.kpd_lockedmem_kstat);
+		kmem_free(data, sizeof (zone_kstat_t));
+	}
+	pj->kpj_data.kpd_lockedmem_kstat = NULL;
+}
diff --git a/usr/src/uts/common/os/rctl.c b/usr/src/uts/common/os/rctl.c
index 4de4c74fe8..c0479005ea 100644
--- a/usr/src/uts/common/os/rctl.c
+++ b/usr/src/uts/common/os/rctl.c
@@ -29,6 +29,7 @@
 #include <sys/cmn_err.h>
 #include <sys/id_space.h>
 #include <sys/kmem.h>
+#include <sys/kstat.h>
 #include <sys/log.h>
 #include <sys/modctl.h>
 #include <sys/modhash.h>
@@ -2599,7 +2600,7 @@ rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc,
 		zonep = p->p_zone;
 	}
 
-	mutex_enter(&zonep->zone_rctl_lock);
+	mutex_enter(&zonep->zone_mem_lock);
 
 	e.rcep_p.proj = projp;
 	e.rcep_t = RCENTITY_PROJECT;
@@ -2627,7 +2628,7 @@ rctl_incr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc,
 		p->p_locked_mem += inc;
 	}
 out:
-	mutex_exit(&zonep->zone_rctl_lock);
+	mutex_exit(&zonep->zone_mem_lock);
 	if (proj != NULL)
 		zone_rele(zonep);
 	return (ret);
@@ -2661,7 +2662,7 @@ rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc,
 		zonep = p->p_zone;
 	}
 
-	mutex_enter(&zonep->zone_rctl_lock);
+	mutex_enter(&zonep->zone_mem_lock);
 	zonep->zone_locked_mem -= inc;
 	projp->kpj_data.kpd_locked_mem -= inc;
 	if (creditproc != 0) {
@@ -2669,7 +2670,120 @@ rctl_decr_locked_mem(proc_t *p, kproject_t *proj, rctl_qty_t inc,
 		ASSERT(MUTEX_HELD(&p->p_lock));
 		p->p_locked_mem -= inc;
 	}
-	mutex_exit(&zonep->zone_rctl_lock);
+	mutex_exit(&zonep->zone_mem_lock);
 	if (proj != NULL)
 		zone_rele(zonep);
 }
+
+/*
+ * rctl_incr_swap(proc_t *, zone_t *, size_t)
+ *
+ * Overview
+ *   Increments the swap charge on the specified zone.
+ *
+ * Return values
+ *   0 on success.  EAGAIN if swap increment fails due an rctl value
+ *   on the zone.
+ *
+ * Callers context
+ *   p_lock held on specified proc.
+ *   swap must be even multiple of PAGESIZE
+ */
+int
+rctl_incr_swap(proc_t *proc, zone_t *zone, size_t swap)
+{
+	rctl_entity_p_t e;
+
+	ASSERT(MUTEX_HELD(&proc->p_lock));
+	ASSERT((swap & PAGEOFFSET) == 0);
+	e.rcep_p.zone = zone;
+	e.rcep_t = RCENTITY_ZONE;
+
+	mutex_enter(&zone->zone_mem_lock);
+
+	if ((zone->zone_max_swap + swap) >
+	    zone->zone_max_swap_ctl) {
+
+		if (rctl_test_entity(rc_zone_max_swap, zone->zone_rctls,
+		    proc, &e, swap, 0) & RCT_DENY) {
+			mutex_exit(&zone->zone_mem_lock);
+			return (EAGAIN);
+		}
+	}
+	zone->zone_max_swap += swap;
+	mutex_exit(&zone->zone_mem_lock);
+	return (0);
+}
+
+/*
+ * rctl_decr_swap(zone_t *, size_t)
+ *
+ * Overview
+ *   Decrements the swap charge on the specified zone.
+ *
+ * Return values
+ *   None
+ *
+ * Callers context
+ *   swap must be even multiple of PAGESIZE
+ */
+void
+rctl_decr_swap(zone_t *zone, size_t swap)
+{
+	ASSERT((swap & PAGEOFFSET) == 0);
+	mutex_enter(&zone->zone_mem_lock);
+	ASSERT(zone->zone_max_swap >= swap);
+	zone->zone_max_swap -= swap;
+	mutex_exit(&zone->zone_mem_lock);
+}
+
+/*
+ * Create resource kstat
+ */
+static kstat_t *
+rctl_kstat_create_common(char *ks_name, int ks_instance, char *ks_class,
+    uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags, int ks_zoneid)
+{
+	kstat_t *ksp = NULL;
+	char name[KSTAT_STRLEN];
+
+	(void) snprintf(name, KSTAT_STRLEN, "%s_%d", ks_name, ks_instance);
+
+	if ((ksp = kstat_create_zone("caps", ks_zoneid,
+		name, ks_class, ks_type,
+		ks_ndata, ks_flags, ks_zoneid)) != NULL) {
+		if (ks_zoneid != GLOBAL_ZONEID)
+			kstat_zone_add(ksp, GLOBAL_ZONEID);
+	}
+	return (ksp);
+}
+
+/*
+ * Create zone-specific resource kstat
+ */
+kstat_t *
+rctl_kstat_create_zone(zone_t *zone, char *ks_name, uchar_t ks_type,
+    uint_t ks_ndata, uchar_t ks_flags)
+{
+	char name[KSTAT_STRLEN];
+
+	(void) snprintf(name, KSTAT_STRLEN, "%s_zone", ks_name);
+
+	return (rctl_kstat_create_common(name, zone->zone_id, "zone_caps",
+	    ks_type, ks_ndata, ks_flags, zone->zone_id));
+}
+
+/*
+ * Create project-specific resource kstat
+ */
+kstat_t *
+rctl_kstat_create_project(kproject_t *kpj, char *ks_name, uchar_t ks_type,
+    uint_t ks_ndata, uchar_t ks_flags)
+{
+	char name[KSTAT_STRLEN];
+
+	(void) snprintf(name, KSTAT_STRLEN, "%s_project", ks_name);
+
+	return (rctl_kstat_create_common(name, kpj->kpj_id, "project_caps",
+	    ks_type, ks_ndata, ks_flags, kpj->kpj_zoneid));
+}
diff --git a/usr/src/uts/common/os/schedctl.c b/usr/src/uts/common/os/schedctl.c
index 66aae7d2bc..62279e0777 100644
--- a/usr/src/uts/common/os/schedctl.c
+++ b/usr/src/uts/common/os/schedctl.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -542,13 +541,13 @@ schedctl_getpage(struct anon_map **newamp, caddr_t *newaddr)
 	 * Set up anonymous memory struct.  No swap reservation is
 	 * needed since the page will be locked into memory.
 	 */
-	amp = anonmap_alloc(PAGESIZE, PAGESIZE);
+	amp = anonmap_alloc(PAGESIZE, 0);
 
 	/*
 	 * Allocate the page.
 	 */
-	kaddr = segkp_get_withanonmap(segkp, PAGESIZE, KPD_LOCKED | KPD_ZERO,
-	    amp);
+	kaddr = segkp_get_withanonmap(segkp, PAGESIZE,
+	    KPD_NO_ANON | KPD_LOCKED | KPD_ZERO, amp);
 	if (kaddr == NULL) {
 		amp->refcnt--;
 		anonmap_free(amp);
diff --git a/usr/src/uts/common/os/sysent.c b/usr/src/uts/common/os/sysent.c
index 9ada0aac18..a7ef99fddb 100644
--- a/usr/src/uts/common/os/sysent.c
+++ b/usr/src/uts/common/os/sysent.c
@@ -666,7 +666,7 @@ struct sysent sysent[NSYSCALL] =
 	/* 178 */ SYSENT_LOADABLE(),		/* kaio */
 	/* 179 */ SYSENT_LOADABLE(),		/* cpc */
 	/* 180 */ SYSENT_CI("lgrpsys",		lgrpsys,	3),
-	/* 181 */ SYSENT_CI("rusagesys",	rusagesys, 	2),
+	/* 181 */ SYSENT_CI("rusagesys",	rusagesys, 	5),
 	/* 182 */ SYSENT_LOADABLE(),		/* portfs */
 	/* 183 */ SYSENT_CI("pollsys",		pollsys,	4),
 	/* 184 */ SYSENT_CI("labelsys",		labelsys,	5),
@@ -1044,7 +1044,7 @@ struct sysent sysent32[NSYSCALL] =
 	/* 178 */ SYSENT_LOADABLE32(),		/* kaio */
 	/* 179 */ SYSENT_LOADABLE32(),		/* cpc */
 	/* 180 */ SYSENT_CI("lgrpsys",		lgrpsys,	3),
-	/* 181 */ SYSENT_CI("rusagesys",	rusagesys,	2),
+	/* 181 */ SYSENT_CI("rusagesys",	rusagesys,	5),
 	/* 182 */ SYSENT_LOADABLE32(),		/* portfs */
 	/* 183 */ SYSENT_CI("pollsys",		pollsys,	4),
 	/* 184 */ SYSENT_CI("labelsys",		labelsys,	5),
diff --git a/usr/src/uts/common/os/task.c b/usr/src/uts/common/os/task.c
index 562e3596b5..785f74c145 100644
--- a/usr/src/uts/common/os/task.c
+++ b/usr/src/uts/common/os/task.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -389,7 +388,7 @@ task_create(projid_t projid, zone_t *zone)
 	tk->tk_nlwps = 0;
 	tk->tk_nlwps_ctl = INT_MAX;
 	tk->tk_usage = tu;
-	tk->tk_proj = project_hold_by_id(projid, zone->zone_id,
+	tk->tk_proj = project_hold_by_id(projid, zone,
 	    PROJECT_HOLD_INSERT);
 	tk->tk_flags = TASK_NORMAL;
 
@@ -848,7 +847,7 @@ task_init(void)
 
 	task0p->tk_tkid = id_alloc(taskid_space);
 	task0p->tk_usage = kmem_zalloc(sizeof (task_usage_t), KM_SLEEP);
-	task0p->tk_proj = project_hold_by_id(0, GLOBAL_ZONEID,
+	task0p->tk_proj = project_hold_by_id(0, &zone0,
 	    PROJECT_HOLD_INSERT);
 	task0p->tk_flags = TASK_NORMAL;
 	task0p->tk_nlwps = p->p_lwpcnt;
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index 0fb2c2be55..19ea8b31f1 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -154,6 +154,10 @@
  *   zone_lock: This is a per-zone lock used to protect several fields of
  *       the zone_t (see <sys/zone.h> for details).  In addition, holding
  *       this lock means that the zone cannot go away.
+ *   zone_nlwps_lock: This is a per-zone lock used to protect the fields
+ *	 related to the zone.max-lwps rctl.
+ *   zone_mem_lock: This is a per-zone lock used to protect the fields
+ *	 related to the zone.max-locked-memory and zone.max-swap rctls.
  *   zsd_key_lock: This is a global lock protecting the key state for ZSD.
  *   zone_deathrow_lock: This is a global lock protecting the "deathrow"
  *       list (a list of zones in the ZONE_IS_DEAD state).
@@ -162,6 +166,10 @@
  *       pool_lock --> cpu_lock --> zonehash_lock --> zone_status_lock -->
  *       	zone_lock --> zsd_key_lock --> pidlock --> p_lock
  *
+ *   When taking zone_mem_lock or zone_nlwps_lock, the lock ordering is:
+ *	zonehash_lock --> a_lock --> pidlock --> p_lock --> zone_mem_lock
+ *	zonehash_lock --> a_lock --> pidlock --> p_lock --> zone_mem_lock
+ *
  *   Blocking memory allocations are permitted while holding any of the
  *   zone locks.
  *
@@ -190,6 +198,7 @@
 #include <sys/debug.h>
 #include <sys/file.h>
 #include <sys/kmem.h>
+#include <sys/kstat.h>
 #include <sys/mutex.h>
 #include <sys/note.h>
 #include <sys/pathname.h>
@@ -232,6 +241,8 @@
 #include <sys/zone.h>
 #include <sys/tsol/label.h>
 
+#include <vm/seg.h>
+
 /*
  * cv used to signal that all references to the zone have been released.  This
  * needs to be global since there may be multiple waiters, and the first to
@@ -317,6 +328,7 @@ const char  *zone_status_table[] = {
  */
 rctl_hndl_t rc_zone_cpu_shares;
 rctl_hndl_t rc_zone_locked_mem;
+rctl_hndl_t rc_zone_max_swap;
 rctl_hndl_t rc_zone_nlwps;
 rctl_hndl_t rc_zone_shmmax;
 rctl_hndl_t rc_zone_shmmni;
@@ -1011,9 +1023,9 @@ zone_locked_mem_usage(rctl_t *rctl, struct proc *p)
 {
 	rctl_qty_t q;
 	ASSERT(MUTEX_HELD(&p->p_lock));
-	mutex_enter(&p->p_zone->zone_rctl_lock);
+	mutex_enter(&p->p_zone->zone_mem_lock);
 	q = p->p_zone->zone_locked_mem;
-	mutex_exit(&p->p_zone->zone_rctl_lock);
+	mutex_exit(&p->p_zone->zone_mem_lock);
 	return (q);
 }
 
@@ -1023,9 +1035,12 @@ zone_locked_mem_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e,
     rctl_val_t *rcntl, rctl_qty_t incr, uint_t flags)
 {
 	rctl_qty_t q;
+	zone_t *z;
+
+	z = e->rcep_p.zone;
 	ASSERT(MUTEX_HELD(&p->p_lock));
-	ASSERT(MUTEX_HELD(&p->p_zone->zone_rctl_lock));
-	q = p->p_zone->zone_locked_mem;
+	ASSERT(MUTEX_HELD(&z->zone_mem_lock));
+	q = z->zone_locked_mem;
 	if (q + incr > rcntl->rcv_value)
 		return (1);
 	return (0);
@@ -1051,6 +1066,57 @@ static rctl_ops_t zone_locked_mem_ops = {
 	zone_locked_mem_test
 };
 
+/*ARGSUSED*/
+static rctl_qty_t
+zone_max_swap_usage(rctl_t *rctl, struct proc *p)
+{
+	rctl_qty_t q;
+	zone_t *z = p->p_zone;
+
+	ASSERT(MUTEX_HELD(&p->p_lock));
+	mutex_enter(&z->zone_mem_lock);
+	q = z->zone_max_swap;
+	mutex_exit(&z->zone_mem_lock);
+	return (q);
+}
+
+/*ARGSUSED*/
+static int
+zone_max_swap_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e,
+    rctl_val_t *rcntl, rctl_qty_t incr, uint_t flags)
+{
+	rctl_qty_t q;
+	zone_t *z;
+
+	z = e->rcep_p.zone;
+	ASSERT(MUTEX_HELD(&p->p_lock));
+	ASSERT(MUTEX_HELD(&z->zone_mem_lock));
+	q = z->zone_max_swap;
+	if (q + incr > rcntl->rcv_value)
+		return (1);
+	return (0);
+}
+
+/*ARGSUSED*/
+static int
+zone_max_swap_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
+    rctl_qty_t nv)
+{
+	ASSERT(MUTEX_HELD(&p->p_lock));
+	ASSERT(e->rcep_t == RCENTITY_ZONE);
+	if (e->rcep_p.zone == NULL)
+		return (0);
+	e->rcep_p.zone->zone_max_swap_ctl = nv;
+	return (0);
+}
+
+static rctl_ops_t zone_max_swap_ops = {
+	rcop_no_action,
+	zone_max_swap_usage,
+	zone_max_swap_set,
+	zone_max_swap_test
+};
+
 /*
  * Helper function to brand the zone with a unique ID.
  */
@@ -1080,6 +1146,96 @@ zone_get_kcred(zoneid_t zoneid)
 	return (cr);
 }
 
+static int
+zone_lockedmem_kstat_update(kstat_t *ksp, int rw)
+{
+	zone_t *zone = ksp->ks_private;
+	zone_kstat_t *zk = ksp->ks_data;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	zk->zk_usage.value.ui64 = zone->zone_locked_mem;
+	zk->zk_value.value.ui64 = zone->zone_locked_mem_ctl;
+	return (0);
+}
+
+static int
+zone_swapresv_kstat_update(kstat_t *ksp, int rw)
+{
+	zone_t *zone = ksp->ks_private;
+	zone_kstat_t *zk = ksp->ks_data;
+
+	if (rw == KSTAT_WRITE)
+		return (EACCES);
+
+	zk->zk_usage.value.ui64 = zone->zone_max_swap;
+	zk->zk_value.value.ui64 = zone->zone_max_swap_ctl;
+	return (0);
+}
+
+static void
+zone_kstat_create(zone_t *zone)
+{
+	kstat_t *ksp;
+	zone_kstat_t *zk;
+
+	ksp = rctl_kstat_create_zone(zone, "lockedmem", KSTAT_TYPE_NAMED,
+	    sizeof (zone_kstat_t) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL);
+
+	if (ksp == NULL)
+		return;
+
+	zk = ksp->ks_data = kmem_alloc(sizeof (zone_kstat_t), KM_SLEEP);
+	ksp->ks_data_size += strlen(zone->zone_name) + 1;
+	kstat_named_init(&zk->zk_zonename, "zonename", KSTAT_DATA_STRING);
+	kstat_named_setstr(&zk->zk_zonename, zone->zone_name);
+	kstat_named_init(&zk->zk_usage, "usage", KSTAT_DATA_UINT64);
+	kstat_named_init(&zk->zk_value, "value", KSTAT_DATA_UINT64);
+	ksp->ks_update = zone_lockedmem_kstat_update;
+	ksp->ks_private = zone;
+	kstat_install(ksp);
+
+	zone->zone_lockedmem_kstat = ksp;
+
+	ksp = rctl_kstat_create_zone(zone, "swapresv", KSTAT_TYPE_NAMED,
+	    sizeof (zone_kstat_t) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL);
+
+	if (ksp == NULL)
+		return;
+
+	zk = ksp->ks_data = kmem_alloc(sizeof (zone_kstat_t), KM_SLEEP);
+	ksp->ks_data_size += strlen(zone->zone_name) + 1;
+	kstat_named_init(&zk->zk_zonename, "zonename", KSTAT_DATA_STRING);
+	kstat_named_setstr(&zk->zk_zonename, zone->zone_name);
+	kstat_named_init(&zk->zk_usage, "usage", KSTAT_DATA_UINT64);
+	kstat_named_init(&zk->zk_value, "value", KSTAT_DATA_UINT64);
+	ksp->ks_update = zone_swapresv_kstat_update;
+	ksp->ks_private = zone;
+	kstat_install(ksp);
+
+	zone->zone_swapresv_kstat = ksp;
+}
+
+static void
+zone_kstat_delete(zone_t *zone)
+{
+	void *data;
+
+	if (zone->zone_lockedmem_kstat != NULL) {
+		data = zone->zone_lockedmem_kstat->ks_data;
+		kstat_delete(zone->zone_lockedmem_kstat);
+		kmem_free(data, sizeof (zone_kstat_t));
+	}
+	if (zone->zone_swapresv_kstat != NULL) {
+		data = zone->zone_swapresv_kstat->ks_data;
+		kstat_delete(zone->zone_swapresv_kstat);
+		kmem_free(data, sizeof (zone_kstat_t));
+	}
+}
+
 /*
  * Called very early on in boot to initialize the ZSD list so that
  * zone_key_create() can be called before zone_init().  It also initializes
@@ -1101,8 +1257,14 @@ zone_zsd_init(void)
 
 	mutex_init(&zone0.zone_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&zone0.zone_nlwps_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&zone0.zone_mem_lock, NULL, MUTEX_DEFAULT, NULL);
 	zone0.zone_shares = 1;
+	zone0.zone_nlwps = 0;
 	zone0.zone_nlwps_ctl = INT_MAX;
+	zone0.zone_locked_mem = 0;
+	zone0.zone_locked_mem_ctl = UINT64_MAX;
+	ASSERT(zone0.zone_max_swap == 0);
+	zone0.zone_max_swap_ctl = UINT64_MAX;
 	zone0.zone_shmmax = 0;
 	zone0.zone_ipc.ipcq_shmmni = 0;
 	zone0.zone_ipc.ipcq_semmni = 0;
@@ -1120,6 +1282,8 @@ zone_zsd_init(void)
 	zone0.zone_ncpus_online = 0;
 	zone0.zone_proc_initpid = 1;
 	zone0.zone_initname = initname;
+	zone0.zone_lockedmem_kstat = NULL;
+	zone0.zone_swapresv_kstat = NULL;
 	list_create(&zone0.zone_zsd, sizeof (struct zsd_entry),
 	    offsetof(struct zsd_entry, zsd_linkage));
 	list_insert_head(&zone_active, &zone0);
@@ -1259,6 +1423,12 @@ zone_init(void)
 	    RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
 	    RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
 	    &zone_locked_mem_ops);
+
+	rc_zone_max_swap = rctl_register("zone.max-swap",
+	    RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
+	    RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
+	    &zone_max_swap_ops);
+
 	/*
 	 * Initialize the ``global zone''.
 	 */
@@ -1277,9 +1447,14 @@ zone_init(void)
 	zone0.zone_brand = &native_brand;
 	rctl_prealloc_destroy(gp);
 	/*
-	 * pool_default hasn't been initialized yet, so we let pool_init() take
-	 * care of making the global zone is in the default pool.
+	 * pool_default hasn't been initialized yet, so we let pool_init()
+	 * take care of making sure the global zone is in the default pool.
+	 */
+
+	/*
+	 * Initialize global zone kstats
 	 */
+	zone_kstat_create(&zone0);
 
 	/*
 	 * Initialize zone label.
@@ -1337,6 +1512,7 @@ zone_init(void)
 
 	if (res)
 		panic("Sysevent_evc_bind failed during zone setup.\n");
+
 }
 
 static void
@@ -1476,6 +1652,38 @@ zone_set_initname(zone_t *zone, const char *zone_initname)
 	return (0);
 }
 
+static int
+zone_set_phys_mcap(zone_t *zone, const uint64_t *zone_mcap)
+{
+	uint64_t mcap;
+	int err = 0;
+
+	if ((err = copyin(zone_mcap, &mcap, sizeof (uint64_t))) == 0)
+		zone->zone_phys_mcap = mcap;
+
+	return (err);
+}
+
+static int
+zone_set_sched_class(zone_t *zone, const char *new_class)
+{
+	char sched_class[PC_CLNMSZ];
+	id_t classid;
+	int err;
+
+	ASSERT(zone != global_zone);
+	if ((err = copyinstr(new_class, sched_class, PC_CLNMSZ, NULL)) != 0)
+		return (err);	/* EFAULT or ENAMETOOLONG */
+
+	if (getcid(sched_class, &classid) != 0 || classid == syscid)
+		return (set_errno(EINVAL));
+	zone->zone_defaultcid = classid;
+	ASSERT(zone->zone_defaultcid > 0 &&
+	    zone->zone_defaultcid < loaded_classes);
+
+	return (0);
+}
+
 /*
  * Block indefinitely waiting for (zone_status >= status)
  */
@@ -2510,10 +2718,10 @@ zsched(void *arg)
 	/*
 	 * Decrement locked memory counts on old zone and project.
 	 */
-	mutex_enter(&global_zone->zone_rctl_lock);
+	mutex_enter(&global_zone->zone_mem_lock);
 	global_zone->zone_locked_mem -= pp->p_locked_mem;
 	pj->kpj_data.kpd_locked_mem -= pp->p_locked_mem;
-	mutex_exit(&global_zone->zone_rctl_lock);
+	mutex_exit(&global_zone->zone_mem_lock);
 
 	/*
 	 * Create and join a new task in project '0' of this zone.
@@ -2529,10 +2737,10 @@ zsched(void *arg)
 
 	pj = pp->p_task->tk_proj;
 
-	mutex_enter(&zone->zone_rctl_lock);
+	mutex_enter(&zone->zone_mem_lock);
 	zone->zone_locked_mem += pp->p_locked_mem;
 	pj->kpj_data.kpd_locked_mem += pp->p_locked_mem;
-	mutex_exit(&zone->zone_rctl_lock);
+	mutex_exit(&zone->zone_mem_lock);
 
 	/*
 	 * add lwp counts to zsched's zone, and increment project's task count
@@ -2689,7 +2897,10 @@ zsched(void *arg)
 		 * classid 'cid'.
 		 */
 		pool_lock();
-		cid = pool_get_class(zone->zone_pool);
+		if (zone->zone_defaultcid > 0)
+			cid = zone->zone_defaultcid;
+		else
+			cid = pool_get_class(zone->zone_pool);
 		if (cid == -1)
 			cid = defaultcid;
 
@@ -3019,7 +3230,7 @@ zone_create(const char *zone_name, const char *zone_root,
 	zone->zone_initname = NULL;
 	mutex_init(&zone->zone_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&zone->zone_nlwps_lock, NULL, MUTEX_DEFAULT, NULL);
-	mutex_init(&zone->zone_rctl_lock, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&zone->zone_mem_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&zone->zone_cv, NULL, CV_DEFAULT, NULL);
 	list_create(&zone->zone_zsd, sizeof (struct zsd_entry),
 	    offsetof(struct zsd_entry, zsd_linkage));
@@ -3057,8 +3268,14 @@ zone_create(const char *zone_name, const char *zone_root,
 	zone->zone_initname =
 	    kmem_alloc(strlen(zone_default_initname) + 1, KM_SLEEP);
 	(void) strcpy(zone->zone_initname, zone_default_initname);
+	zone->zone_nlwps = 0;
+	zone->zone_nlwps_ctl = INT_MAX;
 	zone->zone_locked_mem = 0;
 	zone->zone_locked_mem_ctl = UINT64_MAX;
+	zone->zone_max_swap = 0;
+	zone->zone_max_swap_ctl = UINT64_MAX;
+	zone0.zone_lockedmem_kstat = NULL;
+	zone0.zone_swapresv_kstat = NULL;
 
 	/*
 	 * Zsched initializes the rctls.
@@ -3233,6 +3450,11 @@ zone_create(const char *zone_name, const char *zone_root,
 	 */
 
 	/*
+	 * Create zone kstats
+	 */
+	zone_kstat_create(zone);
+
+	/*
 	 * Let the other lwps continue.
 	 */
 	mutex_enter(&pp->p_lock);
@@ -3643,6 +3865,9 @@ zone_destroy(zoneid_t zoneid)
 
 	}
 
+	/* Get rid of the zone's kstats */
+	zone_kstat_delete(zone);
+
 	/*
 	 * It is now safe to let the zone be recreated; remove it from the
 	 * lists.  The memory will not be freed until the last cred
@@ -3892,6 +4117,32 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
 				error = EFAULT;
 		}
 		break;
+	case ZONE_ATTR_PHYS_MCAP:
+		size = sizeof (zone->zone_phys_mcap);
+		if (bufsize > size)
+			bufsize = size;
+		if (buf != NULL &&
+		    copyout(&zone->zone_phys_mcap, buf, bufsize) != 0)
+			error = EFAULT;
+		break;
+	case ZONE_ATTR_SCHED_CLASS:
+		mutex_enter(&class_lock);
+
+		if (zone->zone_defaultcid >= loaded_classes)
+			outstr = "";
+		else
+			outstr = sclass[zone->zone_defaultcid].cl_name;
+		size = strlen(outstr) + 1;
+		if (bufsize > size)
+			bufsize = size;
+		if (buf != NULL) {
+			err = copyoutstr(outstr, buf, bufsize, NULL);
+			if (err != 0 && err != ENAMETOOLONG)
+				error = EFAULT;
+		}
+
+		mutex_exit(&class_lock);
+		break;
 	default:
 		if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone)) {
 			size = bufsize;
@@ -3923,10 +4174,10 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
 		return (set_errno(EPERM));
 
 	/*
-	 * At present, attributes can only be set on non-running,
-	 * non-global zones.
+	 * Only the ZONE_ATTR_PHYS_MCAP attribute can be set on the
+	 * global zone.
 	 */
-	if (zoneid == GLOBAL_ZONEID) {
+	if (zoneid == GLOBAL_ZONEID && attr != ZONE_ATTR_PHYS_MCAP) {
 		return (set_errno(EINVAL));
 	}
 
@@ -3938,8 +4189,12 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
 	zone_hold(zone);
 	mutex_exit(&zonehash_lock);
 
+	/*
+	 * At present most attributes can only be set on non-running,
+	 * non-global zones.
+	 */
 	zone_status = zone_status_get(zone);
-	if (zone_status > ZONE_IS_READY)
+	if (attr != ZONE_ATTR_PHYS_MCAP && zone_status > ZONE_IS_READY)
 		goto done;
 
 	switch (attr) {
@@ -3971,6 +4226,12 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
 		if (zone->zone_brand == NULL)
 			err = EINVAL;
 		break;
+	case ZONE_ATTR_PHYS_MCAP:
+		err = zone_set_phys_mcap(zone, (const uint64_t *)buf);
+		break;
+	case ZONE_ATTR_SCHED_CLASS:
+		err = zone_set_sched_class(zone, (const char *)buf);
+		break;
 	default:
 		if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone))
 			err = ZBROP(zone)->b_setattr(zone, attr, buf, bufsize);
@@ -3986,6 +4247,11 @@ done:
 /*
  * Return zero if the process has at least one vnode mapped in to its
  * address space which shouldn't be allowed to change zones.
+ *
+ * Also return zero if the process has any shared mappings which reserve
+ * swap.  This is because the counting for zone.max-swap does not allow swap
+ * revervation to be shared between zones.  zone swap reservation is counted
+ * on zone->zone_max_swap.
  */
 static int
 as_can_change_zones(void)
@@ -3997,8 +4263,17 @@ as_can_change_zones(void)
 	int allow = 1;
 
 	ASSERT(pp->p_as != &kas);
-	AS_LOCK_ENTER(&as, &as->a_lock, RW_READER);
+	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
 	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
+
+		/*
+		 * Cannot enter zone with shared anon memory which
+		 * reserves swap.  See comment above.
+		 */
+		if (seg_can_change_zones(seg) == B_FALSE) {
+			allow = 0;
+			break;
+		}
 		/*
 		 * if we can't get a backing vnode for this segment then skip
 		 * it.
@@ -4011,11 +4286,30 @@ as_can_change_zones(void)
 			break;
 		}
 	}
-	AS_LOCK_EXIT(&as, &as->a_lock);
+	AS_LOCK_EXIT(as, &as->a_lock);
 	return (allow);
 }
 
 /*
+ * Count swap reserved by curproc's address space
+ */
+static size_t
+as_swresv(void)
+{
+	proc_t *pp = curproc;
+	struct seg *seg;
+	struct as *as = pp->p_as;
+	size_t swap = 0;
+
+	ASSERT(pp->p_as != &kas);
+	ASSERT(AS_WRITE_HELD(as, &as->a_lock));
+	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg))
+		swap += seg_swresv(seg);
+
+	return (swap);
+}
+
+/*
  * Systemcall entry point for zone_enter().
  *
  * The current process is injected into said zone.  In the process
@@ -4043,6 +4337,7 @@ zone_enter(zoneid_t zoneid)
 	zone_status_t status;
 	int err = 0;
 	rctl_entity_p_t e;
+	size_t swap;
 
 	if (secpolicy_zone_config(CRED()) != 0)
 		return (set_errno(EPERM));
@@ -4205,6 +4500,15 @@ zone_enter(zoneid_t zoneid)
 		goto out;
 	}
 
+	/*
+	 * a_lock must be held while transfering locked memory and swap
+	 * reservation from the global zone to the non global zone because
+	 * asynchronous faults on the processes' address space can lock
+	 * memory and reserve swap via MCL_FUTURE and MAP_NORESERVE
+	 * segments respectively.
+	 */
+	AS_LOCK_ENTER(pp->as, &pp->p_as->a_lock, RW_WRITER);
+	swap = as_swresv();
 	mutex_enter(&pp->p_lock);
 	zone_proj0 = zone->zone_zsched->p_task->tk_proj;
 	/* verify that we do not exceed and task or lwp limits */
@@ -4216,10 +4520,11 @@ zone_enter(zoneid_t zoneid)
 	zone_proj0->kpj_ntasks += 1;
 	mutex_exit(&zone->zone_nlwps_lock);
 
-	mutex_enter(&zone->zone_rctl_lock);
+	mutex_enter(&zone->zone_mem_lock);
 	zone->zone_locked_mem += pp->p_locked_mem;
 	zone_proj0->kpj_data.kpd_locked_mem += pp->p_locked_mem;
-	mutex_exit(&zone->zone_rctl_lock);
+	zone->zone_max_swap += swap;
+	mutex_exit(&zone->zone_mem_lock);
 
 	/* remove lwps from proc's old zone and old project */
 	mutex_enter(&pp->p_zone->zone_nlwps_lock);
@@ -4227,12 +4532,14 @@ zone_enter(zoneid_t zoneid)
 	pp->p_task->tk_proj->kpj_nlwps -= pp->p_lwpcnt;
 	mutex_exit(&pp->p_zone->zone_nlwps_lock);
 
-	mutex_enter(&pp->p_zone->zone_rctl_lock);
+	mutex_enter(&pp->p_zone->zone_mem_lock);
 	pp->p_zone->zone_locked_mem -= pp->p_locked_mem;
 	pp->p_task->tk_proj->kpj_data.kpd_locked_mem -= pp->p_locked_mem;
-	mutex_exit(&pp->p_zone->zone_rctl_lock);
+	pp->p_zone->zone_max_swap -= swap;
+	mutex_exit(&pp->p_zone->zone_mem_lock);
 
 	mutex_exit(&pp->p_lock);
+	AS_LOCK_EXIT(pp->p_as, &pp->p_as->a_lock);
 
 	/*
 	 * Joining the zone cannot fail from now on.
@@ -4289,6 +4596,31 @@ zone_enter(zoneid_t zoneid)
 	sess_rele(pp->p_sessp, B_TRUE);
 	pp->p_sessp = sp;
 	pgjoin(pp, zone->zone_zsched->p_pidp);
+
+	/*
+	 * If there is a default scheduling class for the zone and it is not
+	 * the class we are currently in, change all of the threads in the
+	 * process to the new class.  We need to be holding pidlock & p_lock
+	 * when we call parmsset so this is a good place to do it.
+	 */
+	if (zone->zone_defaultcid > 0 &&
+	    zone->zone_defaultcid != curthread->t_cid) {
+		pcparms_t pcparms;
+		kthread_id_t t;
+
+		pcparms.pc_cid = zone->zone_defaultcid;
+		pcparms.pc_clparms[0] = 0;
+
+		/*
+		 * If setting the class fails, we still want to enter the zone.
+		 */
+		if ((t = pp->p_tlist) != NULL) {
+			do {
+				(void) parmsset(&pcparms, t);
+			} while ((t = t->t_forw) != pp->p_tlist);
+		}
+	}
+
 	mutex_exit(&pp->p_lock);
 	mutex_exit(&pidlock);
 
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index ab103ef4c7..4493f99454 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -544,6 +544,7 @@ CHKHDRS=			\
 	visual_io.h		\
 	vlan.h			\
 	vm.h			\
+	vm_usage.h		\
 	vmem.h			\
 	vmem_impl.h		\
 	vmmeter.h		\
diff --git a/usr/src/uts/common/sys/modhash_impl.h b/usr/src/uts/common/sys/modhash_impl.h
index 25e45cec23..a187eb68ee 100644
--- a/usr/src/uts/common/sys/modhash_impl.h
+++ b/usr/src/uts/common/sys/modhash_impl.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -93,6 +92,18 @@ struct mod_hash {
  */
 void mod_hash_init(void);
 
+/*
+ * Internal routines.  Use directly with care.
+ */
+uint_t i_mod_hash(mod_hash_t *, mod_hash_key_t);
+int i_mod_hash_insert_nosync(mod_hash_t *, mod_hash_key_t, mod_hash_val_t,
+    mod_hash_hndl_t);
+int i_mod_hash_remove_nosync(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *);
+int i_mod_hash_find_nosync(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *);
+void i_mod_hash_walk_nosync(mod_hash_t *, uint_t (*)(mod_hash_key_t,
+    mod_hash_val_t *, void *), void *);
+void i_mod_hash_clear_nosync(mod_hash_t *hash);
+
 #endif /* _KERNEL */
 
 #ifdef __cplusplus
diff --git a/usr/src/uts/common/sys/priocntl.h b/usr/src/uts/common/sys/priocntl.h
index ca1a92400a..6475ed0a4c 100644
--- a/usr/src/uts/common/sys/priocntl.h
+++ b/usr/src/uts/common/sys/priocntl.h
@@ -65,6 +65,7 @@ extern long	priocntl(), priocntlset();
 #define	PC_SETXPARMS	7	/* Set extended scheduling parameters */
 #define	PC_GETXPARMS	8	/* Get extended scheduling parameters */
 #define	PC_SETDFLCL	9	/* Set default class, not for general use */
+#define	PC_GETDFLCL	10	/* Get default class, not for general use */
 
 #define	PC_CLNULL	-1
 
diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h
index fcf953262c..9a0ba2cc37 100644
--- a/usr/src/uts/common/sys/proc.h
+++ b/usr/src/uts/common/sys/proc.h
@@ -613,6 +613,8 @@ extern proc_t *pgfind(pid_t);
 extern proc_t *pgfind_zone(pid_t, zoneid_t);
 extern proc_t *sprlock(pid_t);
 extern proc_t *sprlock_zone(pid_t, zoneid_t);
+extern int sprtrylock_proc(proc_t *);
+extern void sprwaitlock_proc(proc_t *);
 extern void sprlock_proc(proc_t *);
 extern void sprunlock(proc_t *);
 extern void pid_init(void);
diff --git a/usr/src/uts/common/sys/project.h b/usr/src/uts/common/sys/project.h
index 679c1eddc2..5018df8499 100644
--- a/usr/src/uts/common/sys/project.h
+++ b/usr/src/uts/common/sys/project.h
@@ -28,15 +28,24 @@
 
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
+
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
+
+#include <sys/kstat.h>
 #include <sys/types.h>
 #include <sys/mutex.h>
 #include <sys/rctl.h>
 #include <sys/ipc_rctl.h>
 
+typedef struct kproject_kstat {
+	kstat_named_t kpk_zonename;
+	kstat_named_t kpk_usage;
+	kstat_named_t kpk_value;
+} kproject_kstat_t;
+
 typedef struct kproject_data {		/* Datum protected by: */
 	rctl_qty_t	kpd_shmmax;	/* shm's ipcs_lock */
 	ipc_rqty_t	kpd_ipc;	/* shm|sem|msg's ipcs lock */
@@ -44,6 +53,7 @@ typedef struct kproject_data {		/* Datum protected by: */
 	rctl_qty_t	kpd_locked_mem_ctl; /* kpj_rctls->rcs_lock */
 	rctl_qty_t	kpd_contract;	/* contract_lock */
 	rctl_qty_t	kpd_crypto_mem;	/* crypto_rctl_lock */
+	kstat_t		*kpd_lockedmem_kstat; /* locked memory kstat */
 
 } kproject_data_t;
 
@@ -76,9 +86,11 @@ typedef struct kproject {
 #define	PROJECT_HOLD_FIND	1
 #define	PROJECT_HOLD_INSERT	2
 
+struct zone;
+
 void project_init(void);
 kproject_t *project_hold(kproject_t *);
-kproject_t *project_hold_by_id(projid_t, zoneid_t, int);
+kproject_t *project_hold_by_id(projid_t, struct zone *, int);
 void project_rele(kproject_t *);
 int project_walk_all(zoneid_t, int (*)(kproject_t *, void *), void *);
 projid_t curprojid(void);
diff --git a/usr/src/uts/common/sys/rctl.h b/usr/src/uts/common/sys/rctl.h
index eb56fff9e5..a8480c2768 100644
--- a/usr/src/uts/common/sys/rctl.h
+++ b/usr/src/uts/common/sys/rctl.h
@@ -168,6 +168,7 @@ struct proc;
 struct task;
 struct kproject;
 struct zone;
+struct kstat;
 
 typedef struct rctl_entity_p_struct {
 	rctl_entity_t rcep_t;
@@ -324,6 +325,14 @@ int rctl_incr_locked_mem(struct proc *, struct kproject *, rctl_qty_t,
     int);
 void rctl_decr_locked_mem(struct proc *, struct kproject *, rctl_qty_t,
     int);
+int rctl_incr_swap(struct proc *, struct zone *, size_t);
+void rctl_decr_swap(struct zone *, size_t);
+
+struct kstat *rctl_kstat_create_zone(struct zone *, char *, uchar_t, uint_t,
+    uchar_t);
+
+struct kstat *rctl_kstat_create_project(struct kproject *, char *, uchar_t,
+    uint_t, uchar_t);
 
 #endif /* _KERNEL */
 
diff --git a/usr/src/uts/common/sys/resource.h b/usr/src/uts/common/sys/resource.h
index 86cc716d56..bf02808d4b 100644
--- a/usr/src/uts/common/sys/resource.h
+++ b/usr/src/uts/common/sys/resource.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -191,6 +190,7 @@ struct	rusage {
 #define	_RUSAGESYS_GETRUSAGE		0	/* rusage process */
 #define	_RUSAGESYS_GETRUSAGE_CHLD	1	/* rusage child process */
 #define	_RUSAGESYS_GETRUSAGE_LWP	2	/* rusage lwp */
+#define	_RUSAGESYS_GETVMUSAGE		3	/* getvmusage */
 
 #if defined(_SYSCALL32)
 
diff --git a/usr/src/uts/common/sys/syscall.h b/usr/src/uts/common/sys/syscall.h
index 96cb967023..eedadfa0c0 100644
--- a/usr/src/uts/common/sys/syscall.h
+++ b/usr/src/uts/common/sys/syscall.h
@@ -384,7 +384,8 @@ extern "C" {
 #define	SYS_rusagesys		181
 	/*
 	 * subcodes:
-	 *	getrusage(...) :: rusagesys(RUSAGESYS_GETRUSAGE,...)
+	 *	getrusage(...) :: rusagesys(RUSAGESYS_GETRUSAGE, ...)
+	 *	getvmusage(...)    :: rusagesys(RUSAGESYS_GETVMUSAGE, ...)
 	 */
 #define	SYS_port		182
 	/*
diff --git a/usr/src/uts/common/sys/vm_usage.h b/usr/src/uts/common/sys/vm_usage.h
new file mode 100644
index 0000000000..5f8c8b8fe5
--- /dev/null
+++ b/usr/src/uts/common/sys/vm_usage.h
@@ -0,0 +1,120 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_VM_USAGE_H
+#define	_SYS_VM_USAGE_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/types.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * The flags passed to getvmusage() request how to aggregate rss/swap results.
+ * Results can be aggregated by zone, project, task, ruser, and/or euser.
+ *
+ * If VMUSAGE_ALL_* or VMUSAGE_COL_* are passed from a non-global-zone, the
+ * flag is treated as VMUSAGE_*.  For example, VMUSAGE_ALL_ZONES would be
+ * treated as VMUSAGE_ZONE.
+ *
+ * If VMUSAGE_SYSTEM is passed from a non-global zone, a result of type
+ * VMUSAGE_SYSTEM will be returned, but it will only reflect the usage
+ * of the calling zone.
+ *
+ * VMUSAGE_*	 requests results for the calling zone.
+ * VMUSAGE_ALL_* requests results for all zones.
+ * VMUSAGE_COL_* requests results for all zones, but collapses out the zoneid.
+ *		 For example, VMUSAGE_COL_PROJECTS requests results for all
+ *		 projects in all zones, and project N in ANY zone is treated
+ *		 as the same project.
+ */
+#define	VMUSAGE_SYSTEM		0x1	/* rss/swap for ALL processes */
+#define	VMUSAGE_ZONE		0x2	/* rss/swap for caller's zone */
+#define	VMUSAGE_PROJECTS	0x4	/* rss/swap for all projects in */
+					/* caller's zone */
+#define	VMUSAGE_TASKS		0x8	/* rss/swap for all tasks in */
+					/* caller's zones */
+#define	VMUSAGE_RUSERS		0x10	/* rss/swap for all users (by process */
+					/* ruser) in the caller's zone */
+#define	VMUSAGE_EUSERS		0x20	/* same as VMUSAGE_RUSERS, but by */
+					/* euser */
+
+#define	VMUSAGE_ALL_ZONES	0x40	/* rss/swap for all zones */
+#define	VMUSAGE_ALL_PROJECTS	0x80	/* rss/swap for all projects in */
+					/* all zones */
+#define	VMUSAGE_ALL_TASKS	0x100	/* rss/swap for all tasks in all */
+					/* zones */
+#define	VMUSAGE_ALL_RUSERS	0x200	/* rss/swap for all users (by process */
+					/* ruser) in all zones */
+#define	VMUSAGE_ALL_EUSERS	0x400	/* same as VMUSAGE_ALL_RUSERS, but by */
+					/* euser */
+
+#define	VMUSAGE_COL_PROJECTS	0x800	/* rss/swap for all projects in */
+					/* all zones.  Collapse zoneid. */
+#define	VMUSAGE_COL_RUSERS	0x1000	/* rss/swap for all users (by process */
+					/* ruser), in all zones.  Collapse */
+					/* zoneid */
+#define	VMUSAGE_COL_EUSERS	0x2000	/* same as VMUSAGE_COL_RUSERS, but by */
+					/* euser */
+
+#define	VMUSAGE_MASK		0x3fff  /* all valid flags for getvmusage() */
+
+typedef struct vmusage {
+	id_t	vmu_zoneid;		/* zoneid, or ALL_ZONES for */
+					/* VMUSAGE_COL_* results */
+					/* ALL_ZONES means that the result */
+					/* reflects swap and rss usage for */
+					/* a projid/uid across all zones */
+	uint_t	vmu_type;		/* Entity type of result.  One of:  */
+					/* VMUSAGE_(SYSTEM|ZONE|PROJECTS| */
+					/* TASKS|RUSERS|EUSERS) */
+	id_t	vmu_id;			/* zoneid, projid, taskid, ... */
+	size_t	vmu_rss_all;		/* total resident memory of entity */
+					/* in bytes */
+	size_t	vmu_rss_private;	/* total resident private memory */
+	size_t	vmu_rss_shared;		/* total resident shared memory */
+	size_t	vmu_swap_all;		/* total swap reserved, in bytes */
+	size_t	vmu_swap_private;	/* swap reserved for private mappings */
+	size_t	vmu_swap_shared;	/* swap reserved for shared mappings */
+
+} vmusage_t;
+
+extern int getvmusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres);
+
+#ifdef	_KERNEL
+
+int vm_getusage(uint_t, time_t, vmusage_t *, size_t *);
+void vm_usage_init();
+
+#endif	/* _KERNEL */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_VM_USAGE_H */
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index daccd16bdf..94646bc976 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -88,6 +88,8 @@ extern "C" {
 #define	ZONE_ATTR_INITNAME	9
 #define	ZONE_ATTR_BOOTARGS	10
 #define	ZONE_ATTR_BRAND		11
+#define	ZONE_ATTR_PHYS_MCAP	12
+#define	ZONE_ATTR_SCHED_CLASS	13
 
 /* Start of the brand-specific attribute namespace */
 #define	ZONE_ATTR_BRAND_ATTRS	32768
@@ -280,6 +282,15 @@ typedef struct zone_dataset {
 	list_node_t	zd_linkage;
 } zone_dataset_t;
 
+/*
+ * structure for zone kstats
+ */
+typedef struct zone_kstat {
+	kstat_named_t zk_zonename;
+	kstat_named_t zk_usage;
+	kstat_named_t zk_value;
+} zone_kstat_t;
+
 typedef struct zone {
 	/*
 	 * zone_name is never modified once set.
@@ -326,14 +337,20 @@ typedef struct zone {
 	uint_t		zone_rootpathlen; /* strlen(zone_rootpath) + 1 */
 	uint32_t	zone_shares;	/* FSS shares allocated to zone */
 	rctl_set_t	*zone_rctls;	/* zone-wide (zone.*) rctls */
-	kmutex_t	zone_rctl_lock; /* protects zone_locked_mem and */
+	kmutex_t	zone_mem_lock;	/* protects zone_locked_mem and */
 					/* kpd_locked_mem for all */
-					/* projects in zone */
+					/* projects in zone. */
+					/* Also protects zone_max_swap */
 					/* grab after p_lock, before rcs_lock */
-	rctl_qty_t	zone_locked_mem; /* bytes of locked memory in zone */
-	rctl_qty_t	zone_locked_mem_ctl;	/* current locked memory */
+	rctl_qty_t	zone_locked_mem;	/* bytes of locked memory in */
+						/* zone */
+	rctl_qty_t	zone_locked_mem_ctl;	/* Current locked memory */
 						/* limit.  Protected by */
 						/* zone_rctls->rcs_lock */
+	rctl_qty_t	zone_max_swap; /* bytes of swap reserved by zone */
+	rctl_qty_t	zone_max_swap_ctl;	/* current swap limit. */
+						/* Protected by */
+						/* zone_rctls->rcs_lock */
 	list_t		zone_zsd;	/* list of Zone-Specific Data values */
 	kcondvar_t	zone_cv;	/* used to signal state changes */
 	struct proc	*zone_zsched;	/* Dummy kernel "zsched" process */
@@ -341,6 +358,7 @@ typedef struct zone {
 	char		*zone_initname;	/* fs path to 'init' */
 	int		zone_boot_err;  /* for zone_boot() if boot fails */
 	char		*zone_bootargs;	/* arguments passed via zone_boot() */
+	uint64_t	zone_phys_mcap;	/* physical memory cap */
 	/*
 	 * zone_kthreads is protected by zone_status_lock.
 	 */
@@ -376,6 +394,9 @@ typedef struct zone {
 
 	boolean_t	zone_restart_init;	/* Restart init if it dies? */
 	struct brand	*zone_brand;		/* zone's brand */
+	id_t		zone_defaultcid;	/* dflt scheduling class id */
+	kstat_t		*zone_swapresv_kstat;
+	kstat_t		*zone_lockedmem_kstat;
 } zone_t;
 
 /*
@@ -553,6 +574,7 @@ extern void mount_completed(void);
 extern int zone_walk(int (*)(zone_t *, void *), void *);
 
 extern rctl_hndl_t rc_zone_locked_mem;
+extern rctl_hndl_t rc_zone_max_swap;
 
 #endif	/* _KERNEL */
 
diff --git a/usr/src/uts/common/syscall/processor_bind.c b/usr/src/uts/common/syscall/processor_bind.c
index 10ca1178d5..bd416e43e6 100644
--- a/usr/src/uts/common/syscall/processor_bind.c
+++ b/usr/src/uts/common/syscall/processor_bind.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -285,9 +284,10 @@ processor_bind(idtype_t idtype, id_t id, processorid_t bind,
 		break;
 
 	case P_PROJID:
+		pp = curproc;
 		if (id == P_MYID)
 			id = curprojid();
-		if ((kpj = project_hold_by_id(id, getzoneid(),
+		if ((kpj = project_hold_by_id(id, pp->p_zone,
 		    PROJECT_HOLD_FIND)) == NULL) {
 			ret = ESRCH;
 		} else {
diff --git a/usr/src/uts/common/syscall/pset.c b/usr/src/uts/common/syscall/pset.c
index 5d3b7e6233..767529fc5d 100644
--- a/usr/src/uts/common/syscall/pset.c
+++ b/usr/src/uts/common/syscall/pset.c
@@ -542,9 +542,10 @@ pset_bind(psetid_t pset, idtype_t idtype, id_t id, psetid_t *opset)
 		break;
 
 	case P_PROJID:
+		pp = curproc;
 		if (id == P_MYID)
 			id = curprojid();
-		if ((kpj = project_hold_by_id(id, getzoneid(),
+		if ((kpj = project_hold_by_id(id, pp->p_zone,
 		    PROJECT_HOLD_FIND)) == NULL) {
 			error = ESRCH;
 			break;
diff --git a/usr/src/uts/common/syscall/rusagesys.c b/usr/src/uts/common/syscall/rusagesys.c
index 3e09643981..036500932f 100644
--- a/usr/src/uts/common/syscall/rusagesys.c
+++ b/usr/src/uts/common/syscall/rusagesys.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -35,6 +34,7 @@
 #include <sys/time.h>
 #include <sys/errno.h>
 #include <sys/resource.h>
+#include <sys/vm_usage.h>
 
 static int
 getrusage(void *user_rusage)
@@ -246,16 +246,19 @@ getrusage_lwp(void *user_rusage)
 }
 
 int
-rusagesys(int code, void * arg)
+rusagesys(int code, void *arg1, void *arg2, void *arg3, void *arg4)
 {
 	switch (code) {
 
 	case _RUSAGESYS_GETRUSAGE:
-		return (getrusage(arg));
+		return (getrusage(arg1));
 	case _RUSAGESYS_GETRUSAGE_CHLD:
-		return (getrusage_chld(arg));
+		return (getrusage_chld(arg1));
 	case _RUSAGESYS_GETRUSAGE_LWP:
-		return (getrusage_lwp(arg));
+		return (getrusage_lwp(arg1));
+	case _RUSAGESYS_GETVMUSAGE:
+		return (vm_getusage((uint_t)(uintptr_t)arg1, (time_t)arg2,
+		    (vmusage_t *)arg3, (size_t *)arg4));
 	default:
 		return (set_errno(EINVAL));
 	}
diff --git a/usr/src/uts/common/syscall/tasksys.c b/usr/src/uts/common/syscall/tasksys.c
index 705b543a37..bec091e61c 100644
--- a/usr/src/uts/common/syscall/tasksys.c
+++ b/usr/src/uts/common/syscall/tasksys.c
@@ -25,6 +25,7 @@
 
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
+
 /*
  * System calls for creating and inquiring about tasks and projects
  */
@@ -102,7 +103,7 @@ tasksys_settaskid(projid_t projid, uint_t flags)
 	 * Put a hold on our new project and make sure that nobody is
 	 * trying to bind it to a pool while we're joining.
 	 */
-	kpj = project_hold_by_id(projid, getzoneid(), PROJECT_HOLD_INSERT);
+	kpj = project_hold_by_id(projid, p->p_zone, PROJECT_HOLD_INSERT);
 	e.rcep_p.proj = kpj;
 	e.rcep_t = RCENTITY_PROJECT;
 
@@ -111,7 +112,7 @@ tasksys_settaskid(projid_t projid, uint_t flags)
 	zone = p->p_zone;
 
 	mutex_enter(&zone->zone_nlwps_lock);
-	mutex_enter(&zone->zone_rctl_lock);
+	mutex_enter(&zone->zone_mem_lock);
 
 	if (kpj->kpj_nlwps + p->p_lwpcnt > kpj->kpj_nlwps_ctl)
 		if (rctl_test_entity(rc_project_nlwps, kpj->kpj_rctls, p, &e,
@@ -130,7 +131,7 @@ tasksys_settaskid(projid_t projid, uint_t flags)
 			rctlfail = 1;
 
 	if (rctlfail) {
-		mutex_exit(&zone->zone_rctl_lock);
+		mutex_exit(&zone->zone_mem_lock);
 		mutex_exit(&zone->zone_nlwps_lock);
 		if (curthread != p->p_agenttp)
 			continuelwps(p);
@@ -144,7 +145,7 @@ tasksys_settaskid(projid_t projid, uint_t flags)
 	oldpj->kpj_data.kpd_locked_mem -= p->p_locked_mem;
 	oldpj->kpj_nlwps -= p->p_lwpcnt;
 
-	mutex_exit(&zone->zone_rctl_lock);
+	mutex_exit(&zone->zone_mem_lock);
 	mutex_exit(&zone->zone_nlwps_lock);
 	mutex_exit(&p->p_lock);
 
diff --git a/usr/src/uts/common/vm/anon.h b/usr/src/uts/common/vm/anon.h
index 90f6e1e661..ed59ec590b 100644
--- a/usr/src/uts/common/vm/anon.h
+++ b/usr/src/uts/common/vm/anon.h
@@ -42,6 +42,7 @@
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
 #include <sys/cred.h>
+#include <sys/zone.h>
 #include <vm/seg.h>
 #include <vm/vpage.h>
 
@@ -387,8 +388,8 @@ extern int	anon_map_demotepages(struct anon_map *, ulong_t,
 		    struct seg *, caddr_t, uint_t,
 		    struct vpage [], struct cred *);
 extern void	anon_shmap_free_pages(struct anon_map *, ulong_t, size_t);
-extern int	anon_resvmem(size_t, uint_t);
-extern void	anon_unresv(size_t);
+extern int	anon_resvmem(size_t, boolean_t, zone_t *);
+extern void	anon_unresvmem(size_t, zone_t *);
 extern struct	anon_map *anonmap_alloc(size_t, size_t);
 extern void	anonmap_free(struct anon_map *);
 extern void	anon_decref(struct anon *);
@@ -416,9 +417,16 @@ extern void	anon_array_exit(anon_sync_obj_t *);
  * request and if so, reserves the appropriate anonymous memory resources.
  * anon_checkspace just checks to see if there is space to fulfill the request,
  * without taking any resources.  Both return 1 if successful and 0 if not.
+ *
+ * Macros are provided as anon reservation is usually charged to the zone of
+ * the current process.  In some cases (such as anon reserved by tmpfs), a
+ * zone pointer is needed to charge the appropriate zone.
  */
-#define	anon_resv(size)		anon_resvmem((size), 1)
-#define	anon_checkspace(size)	anon_resvmem((size), 0)
+#define	anon_unresv(size)		anon_unresvmem(size, curproc->p_zone)
+#define	anon_unresv_zone(size, zone)	anon_unresvmem(size, zone)
+#define	anon_resv(size)			anon_resvmem((size), 1, curproc->p_zone)
+#define	anon_resv_zone(size, zone)	anon_resvmem((size), 1, zone)
+#define	anon_checkspace(size, zone)	anon_resvmem((size), 0, zone)
 
 /*
  * Flags to anon_private
diff --git a/usr/src/uts/common/vm/seg.h b/usr/src/uts/common/vm/seg.h
index 0ee7d62ce1..a9683c0e54 100644
--- a/usr/src/uts/common/vm/seg.h
+++ b/usr/src/uts/common/vm/seg.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -245,6 +244,9 @@ uint_t	seg_pages(struct seg *);
 
 #endif	/* VMDEBUG */
 
+boolean_t	seg_can_change_zones(struct seg *);
+size_t		seg_swresv(struct seg *);
+
 #endif	/* _KERNEL */
 
 #ifdef	__cplusplus
diff --git a/usr/src/uts/common/vm/seg_kp.c b/usr/src/uts/common/vm/seg_kp.c
index ff9c47e0ff..d58e873a19 100644
--- a/usr/src/uts/common/vm/seg_kp.c
+++ b/usr/src/uts/common/vm/seg_kp.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -147,6 +146,7 @@ uint32_t	red_closest = UINT_MAX;
 uint32_t	red_ndoubles;
 
 pgcnt_t anon_segkp_pages_locked;	/* See vm/anon.h */
+pgcnt_t anon_segkp_pages_resv;		/* anon reserved by seg_kp */
 
 static struct	seg_ops segkp_ops = {
 	SEGKP_BADOP(int),		/* dup */
@@ -448,8 +448,10 @@ segkp_get_internal(
 	 * Note that we don't need swap space for the red zone page.
 	 */
 	if (amp != NULL) {
-		ASSERT((flags & KPD_NO_ANON) == 0);
-		/* The reserve has been done and the anon_hdr is separate. */
+		/*
+		 * The swap reservation has been done, if required, and the
+		 * anon_hdr is separate.
+		 */
 		anon_idx = 0;
 		kpd->kp_anon_idx = anon_idx;
 		kpd->kp_anon = amp->ahp;
@@ -458,7 +460,7 @@ segkp_get_internal(
 		    kpd, vbase, len, flags, 1);
 
 	} else if ((flags & KPD_NO_ANON) == 0) {
-		if (anon_resv(SEGKP_MAPLEN(len, flags)) == 0) {
+		if (anon_resv_zone(SEGKP_MAPLEN(len, flags), NULL) == 0) {
 			if (flags & KPD_LOCKED) {
 				atomic_add_long(&anon_segkp_pages_locked,
 				    -pages);
@@ -468,6 +470,8 @@ segkp_get_internal(
 			kmem_free(kpd, sizeof (struct segkp_data));
 			return (NULL);
 		}
+		atomic_add_long(&anon_segkp_pages_resv,
+		    btop(SEGKP_MAPLEN(len, flags)));
 		anon_idx = ((uintptr_t)(vbase - s_base)) >> PAGESHIFT;
 		kpd->kp_anon_idx = anon_idx;
 		kpd->kp_anon = kpsd->kpsd_anon;
@@ -704,7 +708,9 @@ segkp_release_internal(struct seg *seg, struct segkp_data *kpd, size_t len)
 			if ((kpd->kp_flags & KPD_HASAMP) == 0) {
 				anon_free(kpd->kp_anon, kpd->kp_anon_idx + i,
 				    PAGESIZE);
-				anon_unresv(PAGESIZE);
+				anon_unresv_zone(PAGESIZE, NULL);
+				atomic_add_long(&anon_segkp_pages_resv,
+				    -1);
 			}
 			TRACE_5(TR_FAC_VM,
 			    TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c
index f48db44acc..e2069b27c6 100644
--- a/usr/src/uts/common/vm/seg_vn.c
+++ b/usr/src/uts/common/vm/seg_vn.c
@@ -2323,8 +2323,9 @@ segvn_faultpage(
 			 * zeroes. If no advance reservations, reserve now.
 			 */
 			if (svd->flags & MAP_NORESERVE) {
-				if (anon_resv(ptob(1))) {
-					svd->swresv += ptob(1);
+				if (anon_resv_zone(ptob(1),
+				    seg->s_as->a_proc->p_zone)) {
+					atomic_add_long(&svd->swresv, ptob(1));
 				} else {
 					err = ENOMEM;
 					goto out;
diff --git a/usr/src/uts/common/vm/vm_anon.c b/usr/src/uts/common/vm/vm_anon.c
index 0cad34257c..3f225a345a 100644
--- a/usr/src/uts/common/vm/vm_anon.c
+++ b/usr/src/uts/common/vm/vm_anon.c
@@ -113,6 +113,7 @@
 #include <sys/policy.h>
 #include <sys/condvar_impl.h>
 #include <sys/mutex_impl.h>
+#include <sys/rctl.h>
 
 #include <vm/as.h>
 #include <vm/hat.h>
@@ -729,12 +730,22 @@ set_anoninfo(void)
  * Return non-zero on success.
  */
 int
-anon_resvmem(size_t size, uint_t takemem)
+anon_resvmem(size_t size, boolean_t takemem, zone_t *zone)
 {
 	pgcnt_t npages = btopr(size);
 	pgcnt_t mswap_pages = 0;
 	pgcnt_t pswap_pages = 0;
+	proc_t *p = curproc;
 
+	if (zone != NULL && takemem) {
+		/* test zone.max-swap resource control */
+		mutex_enter(&p->p_lock);
+		if (rctl_incr_swap(p, zone, ptob(npages)) != 0) {
+			mutex_exit(&p->p_lock);
+			return (0);
+		}
+		mutex_exit(&p->p_lock);
+	}
 	mutex_enter(&anoninfo_lock);
 
 	/*
@@ -834,16 +845,17 @@ anon_resvmem(size_t size, uint_t takemem)
 		mutex_exit(&anoninfo_lock);
 		ANON_PRINT(A_RESV,
 			("anon_resvmem: not enough space from swapfs\n"));
+		if (zone != NULL && takemem)
+			rctl_decr_swap(zone, ptob(npages));
 		return (0);
 	}
 }
 
-
 /*
  * Give back an anon reservation.
  */
 void
-anon_unresv(size_t size)
+anon_unresvmem(size_t size, zone_t *zone)
 {
 	pgcnt_t npages = btopr(size);
 	spgcnt_t mem_free_pages = 0;
@@ -851,6 +863,8 @@ anon_unresv(size_t size)
 #ifdef	ANON_DEBUG
 	pgcnt_t mem_resv;
 #endif
+	if (zone != NULL)
+		rctl_decr_swap(zone, size);
 
 	mutex_enter(&anoninfo_lock);
 
diff --git a/usr/src/uts/common/vm/vm_page.c b/usr/src/uts/common/vm/vm_page.c
index 05bfe662be..adac07b766 100644
--- a/usr/src/uts/common/vm/vm_page.c
+++ b/usr/src/uts/common/vm/vm_page.c
@@ -77,7 +77,7 @@
 #include <vm/pvn.h>
 #include <vm/seg_kmem.h>
 #include <vm/vm_dep.h>
-
+#include <sys/vm_usage.h>
 #include <fs/fs_subr.h>
 
 static int nopageage = 0;
@@ -343,6 +343,7 @@ vm_init(void)
 	(void) callb_add(callb_vm_cpr, 0, CB_CL_CPR_VM, "vm");
 	page_init_mem_config();
 	page_retire_init();
+	vm_usage_init();
 }
 
 /*
diff --git a/usr/src/uts/common/vm/vm_seg.c b/usr/src/uts/common/vm/vm_seg.c
index 50cc21cdf7..aed892969d 100644
--- a/usr/src/uts/common/vm/vm_seg.c
+++ b/usr/src/uts/common/vm/vm_seg.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -54,12 +53,14 @@
 #include <sys/cmn_err.h>
 #include <sys/callb.h>
 #include <sys/mem_config.h>
+#include <sys/mman.h>
 
 #include <vm/hat.h>
 #include <vm/as.h>
 #include <vm/seg.h>
 #include <vm/seg_kmem.h>
-
+#include <vm/seg_spt.h>
+#include <vm/seg_vn.h>
 /*
  * kstats for segment advise
  */
@@ -950,3 +951,48 @@ seg_pinit_mem_config(void)
 	 */
 	ASSERT(ret == 0);
 }
+
+extern struct seg_ops segvn_ops;
+extern struct seg_ops segspt_shmops;
+
+/*
+ * Verify that segment is not a shared anonymous segment which reserves
+ * swap.  zone.max-swap accounting (zone->zone_max_swap) cannot be transfered
+ * from one zone to another if any segments are shared.  This is because the
+ * last process to exit will credit the swap reservation.  This could lead
+ * to the swap being reserved by one zone, and credited to another.
+ */
+boolean_t
+seg_can_change_zones(struct seg *seg)
+{
+	struct segvn_data *svd;
+
+	if (seg->s_ops == &segspt_shmops)
+		return (B_FALSE);
+
+	if (seg->s_ops == &segvn_ops) {
+		svd = (struct segvn_data *)seg->s_data;
+		if (svd->type == MAP_SHARED &&
+		    svd->amp != NULL &&
+		    svd->amp->swresv > 0)
+		return (B_FALSE);
+	}
+	return (B_TRUE);
+}
+
+/*
+ * Return swap reserved by a segment backing a private mapping.
+ */
+size_t
+seg_swresv(struct seg *seg)
+{
+	struct segvn_data *svd;
+	size_t swap = 0;
+
+	if (seg->s_ops == &segvn_ops) {
+		svd = (struct segvn_data *)seg->s_data;
+		if (svd->type == MAP_PRIVATE && svd->swresv > 0)
+			swap = svd->swresv;
+	}
+	return (swap);
+}
diff --git a/usr/src/uts/common/vm/vm_usage.c b/usr/src/uts/common/vm/vm_usage.c
new file mode 100644
index 0000000000..32a8811e10
--- /dev/null
+++ b/usr/src/uts/common/vm/vm_usage.c
@@ -0,0 +1,1978 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * vm_usage
+ *
+ * This file implements the getvmusage() private system call.
+ * getvmusage() counts the amount of resident memory pages and swap
+ * reserved by the specified process collective. A "process collective" is
+ * the set of processes owned by a particular, zone, project, task, or user.
+ *
+ * rss and swap are counted so that for a given process collective, a page is
+ * only counted once.  For example, this means that if multiple processes in
+ * the same project map the same page, then the project will only be charged
+ * once for that page.  On the other hand, if two processes in different
+ * projects map the same page, then both projects will be charged
+ * for the page.
+ *
+ * The vm_getusage() calculation is implemented so that the first thread
+ * performs the rss/swap counting. Other callers will wait for that thread to
+ * finish, copying the results.  This enables multiple rcapds and prstats to
+ * consume data from the same calculation.  The results are also cached so that
+ * a caller interested in recent results can just copy them instead of starting
+ * a new calculation. The caller passes the maximium age (in seconds) of the
+ * data.  If the cached data is young enough, the cache is copied, otherwise,
+ * a new calculation is executed and the cache is replaced with the new
+ * data.
+ *
+ * The rss calculation for each process collective is as follows:
+ *
+ *   - Inspect flags, determine if counting rss for zones, projects, tasks,
+ *     and/or users.
+ *   - For each proc:
+ *	- Figure out proc's collectives (zone, project, task, and/or user).
+ *	- For each seg in proc's address space:
+ *		- If seg is private:
+ *			- Lookup anons in the amp.
+ *			- For incore pages not previously visited each of the
+ *			  proc's collectives, add incore pagesize to each.
+ *			  collective.
+ *			  Anon's with a refcnt of 1 can be assummed to be not
+ *			  previously visited.
+ *			- For address ranges without anons in the amp:
+ *				- Lookup pages in underlying vnode.
+ *				- For incore pages not previously visiting for
+ *				  each of the proc's collectives, add incore
+ *				  pagesize to each collective.
+ *		- If seg is shared:
+ *			- Lookup pages in the shared amp or vnode.
+ *			- For incore pages not previously visited for each of
+ *			  the proc's collectives, add incore pagesize to each
+ *			  collective.
+ *
+ * Swap is reserved by private segments, and shared anonymous segments.
+ * The only shared anon segments which do not reserve swap are ISM segments
+ * and schedctl segments, both of which can be identified by having
+ * amp->swresv == 0.
+ *
+ * The swap calculation for each collective is as follows:
+ *
+ *   - Inspect flags, determine if counting rss for zones, projects, tasks,
+ *     and/or users.
+ *   - For each proc:
+ *	- Figure out proc's collectives (zone, project, task, and/or user).
+ *	- For each seg in proc's address space:
+ *		- If seg is private:
+ *			- Add svd->swresv pages to swap count for each of the
+ *			  proc's collectives.
+ *		- If seg is anon, shared, and amp->swresv != 0
+ *			- For address ranges in amp not previously visited for
+ *			  each of the proc's collectives, add size of address
+ *			  range to the swap count for each collective.
+ *
+ * These two calculations are done simultaneously, with most of the work
+ * being done in vmu_calculate_seg().  The results of the calculation are
+ * copied into "vmu_data.vmu_cache_results".
+ *
+ * To perform the calculation, various things are tracked and cached:
+ *
+ *    - incore/not-incore page ranges for all vnodes.
+ *	(vmu_data.vmu_all_vnodes_hash)
+ *	This eliminates looking up the same page more than once.
+ *
+ *    - incore/not-incore page ranges for all shared amps.
+ *	(vmu_data.vmu_all_amps_hash)
+ *	This eliminates looking up the same page more than once.
+ *
+ *    - visited page ranges for each collective.
+ *	   - per vnode (entity->vme_vnode_hash)
+ *	   - per shared amp (entity->vme_amp_hash)
+ *	For accurate counting of map-shared and cow-shared pages.
+ *
+ *    - visited private anons (refcnt > 1) for each collective.
+ *	(entity->vme_anon_hash)
+ *	For accurate counting of cow-shared pages.
+ *
+ * The common accounting structure is the vmu_entity_t, which represents
+ * collectives:
+ *
+ *    - A zone.
+ *    - A project, task, or user within a zone.
+ *    - The entire system (vmu_data.vmu_system).
+ *    - Each collapsed (col) project and user.  This means a given projid or
+ *	uid, regardless of which zone the process is in.  For instance,
+ *      project 0 in the global zone and project 0 in a non global zone are
+ *	the same collapsed project.
+ *
+ *  Each entity structure tracks which pages have been already visited for
+ *  that entity (via previously inspected processes) so that these pages are
+ *  not double counted.
+ */
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/zone.h>
+#include <sys/proc.h>
+#include <sys/project.h>
+#include <sys/task.h>
+#include <sys/thread.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <sys/modhash.h>
+#include <sys/modhash_impl.h>
+#include <sys/shm.h>
+#include <sys/swap.h>
+#include <sys/synch.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/vm_usage.h>
+#include <sys/zone.h>
+#include <vm/anon.h>
+#include <vm/as.h>
+#include <vm/seg_vn.h>
+#include <vm/seg_spt.h>
+
+#define	VMUSAGE_HASH_SIZE		512
+
+#define	VMUSAGE_TYPE_VNODE		1
+#define	VMUSAGE_TYPE_AMP		2
+#define	VMUSAGE_TYPE_ANON		3
+
+#define	VMUSAGE_BOUND_UNKNOWN		0
+#define	VMUSAGE_BOUND_INCORE		1
+#define	VMUSAGE_BOUND_NOT_INCORE	2
+
+/*
+ * bounds for vnodes and shared amps
+ * Each bound is either entirely incore, entirely not in core, or
+ * entirely unknown.  bounds are stored in order by offset.
+ */
+typedef struct vmu_bound {
+	struct  vmu_bound *vmb_next;
+	pgcnt_t vmb_start;  /* page offset in vnode/amp on which bound starts */
+	pgcnt_t	vmb_end;    /* page offset in vnode/amp on which bound ends */
+	char	vmb_type;   /* One of VMUSAGE_BOUND_* */
+} vmu_bound_t;
+
+/*
+ * hash of visited objects (vnodes or shared amps)
+ * key is address of vnode or amp.  Bounds lists known incore/non-incore
+ * bounds for vnode/amp.
+ */
+typedef struct vmu_object {
+	struct vmu_object	*vmo_next;	/* free list */
+	caddr_t		vmo_key;
+	short		vmo_type;
+	vmu_bound_t	*vmo_bounds;
+} vmu_object_t;
+
+/*
+ * Entity by which to count results.
+ *
+ * The entity structure keeps the current rss/swap counts for each entity
+ * (zone, project, etc), and hashes of vm structures that have already
+ * been visited for the entity.
+ *
+ * vme_next:	links the list of all entities currently being counted by
+ *		vmu_calculate().
+ *
+ * vme_next_calc: links the list of entities related to the current process
+ *		 being counted by vmu_calculate_proc().
+ *
+ * vmu_calculate_proc() walks all processes.  For each process, it makes a
+ * list of the entities related to that process using vme_next_calc.  This
+ * list changes each time vmu_calculate_proc() is called.
+ *
+ */
+typedef struct vmu_entity {
+	struct vmu_entity *vme_next;
+	struct vmu_entity *vme_next_calc;
+	mod_hash_t	*vme_vnode_hash; /* vnodes visited for entity */
+	mod_hash_t	*vme_amp_hash;	 /* shared amps visited for entity */
+	mod_hash_t	*vme_anon_hash;	 /* cow anons visited for entity */
+	vmusage_t	vme_result;	 /* identifies entity and results */
+} vmu_entity_t;
+
+/*
+ * Hash of entities visited within a zone, and an entity for the zone
+ * itself.
+ */
+typedef struct vmu_zone {
+	struct vmu_zone	*vmz_next;	/* free list */
+	id_t		vmz_id;
+	vmu_entity_t	*vmz_zone;
+	mod_hash_t	*vmz_projects_hash;
+	mod_hash_t	*vmz_tasks_hash;
+	mod_hash_t	*vmz_rusers_hash;
+	mod_hash_t	*vmz_eusers_hash;
+} vmu_zone_t;
+
+/*
+ * Cache of results from last calculation
+ */
+typedef struct vmu_cache {
+	vmusage_t	*vmc_results;	/* Results from last call to */
+					/* vm_getusage(). */
+	uint64_t	vmc_nresults;	/* Count of cached results */
+	uint64_t	vmc_refcnt;	/* refcnt for free */
+	uint_t		vmc_flags;	/* Flags for vm_getusage() */
+	hrtime_t	vmc_timestamp;	/* when cache was created */
+} vmu_cache_t;
+
+/*
+ * top level rss info for the system
+ */
+typedef struct vmu_data {
+	kmutex_t	vmu_lock;		/* Protects vmu_data */
+	kcondvar_t	vmu_cv;			/* Used to signal threads */
+						/* Waiting for */
+						/* Rss_calc_thread to finish */
+	vmu_entity_t	*vmu_system;		/* Entity for tracking */
+						/* rss/swap for all processes */
+						/* in all zones */
+	mod_hash_t	*vmu_zones_hash;	/* Zones visited */
+	mod_hash_t	*vmu_projects_col_hash; /* These *_col_hash hashes */
+	mod_hash_t	*vmu_rusers_col_hash;	/* keep track of entities, */
+	mod_hash_t	*vmu_eusers_col_hash;	/* ignoring zoneid, in order */
+						/* to implement VMUSAGE_COL_* */
+						/* flags, which aggregate by */
+						/* project or user regardless */
+						/* of zoneid. */
+	mod_hash_t	*vmu_all_vnodes_hash;	/* System wide visited vnodes */
+						/* to track incore/not-incore */
+	mod_hash_t	*vmu_all_amps_hash;	/* System wide visited shared */
+						/* amps to track incore/not- */
+						/* incore */
+	vmu_entity_t	*vmu_entities;		/* Linked list of entities */
+	size_t		vmu_nentities;		/* Count of entities in list */
+	vmu_cache_t	*vmu_cache;		/* Cached results */
+	kthread_t	*vmu_calc_thread;	/* NULL, or thread running */
+						/* vmu_calculate() */
+	uint_t		vmu_calc_flags;		/* Flags being using by */
+						/* currently running calc */
+						/* thread */
+	uint_t		vmu_pending_flags;	/* Flags of vm_getusage() */
+						/* threads waiting for */
+						/* calc thread to finish */
+	uint_t		vmu_pending_waiters;	/* Number of threads waiting */
+						/* for calc thread */
+	vmu_bound_t	*vmu_free_bounds;
+	vmu_object_t	*vmu_free_objects;
+	vmu_entity_t	*vmu_free_entities;
+	vmu_zone_t	*vmu_free_zones;
+} vmu_data_t;
+
+extern struct as kas;
+extern proc_t *practive;
+extern zone_t *global_zone;
+extern struct seg_ops segvn_ops;
+extern struct seg_ops segspt_shmops;
+
+static vmu_data_t vmu_data;
+static kmem_cache_t *vmu_bound_cache;
+static kmem_cache_t *vmu_object_cache;
+
+/*
+ * Save a bound on the free list
+ */
+static void
+vmu_free_bound(vmu_bound_t *bound)
+{
+	bound->vmb_next = vmu_data.vmu_free_bounds;
+	vmu_data.vmu_free_bounds = bound;
+}
+
+/*
+ * Free an object, and all visited bound info.
+ */
+static void
+vmu_free_object(mod_hash_val_t val)
+{
+	vmu_object_t *obj = (vmu_object_t *)val;
+	vmu_bound_t *bound = obj->vmo_bounds;
+	vmu_bound_t *tmp;
+
+	while (bound != NULL) {
+		tmp = bound;
+		bound = bound->vmb_next;
+		vmu_free_bound(tmp);
+	}
+	obj->vmo_next = vmu_data.vmu_free_objects;
+	vmu_data.vmu_free_objects = obj;
+}
+
+/*
+ * Free an entity, and hashes of visited objects for that entity.
+ */
+static void
+vmu_free_entity(mod_hash_val_t val)
+{
+	vmu_entity_t *entity = (vmu_entity_t *)val;
+
+	if (entity->vme_vnode_hash != NULL)
+		i_mod_hash_clear_nosync(entity->vme_vnode_hash);
+	if (entity->vme_amp_hash != NULL)
+		i_mod_hash_clear_nosync(entity->vme_amp_hash);
+	if (entity->vme_anon_hash != NULL)
+		i_mod_hash_clear_nosync(entity->vme_anon_hash);
+
+	entity->vme_next = vmu_data.vmu_free_entities;
+	vmu_data.vmu_free_entities = entity;
+}
+
+/*
+ * Free zone entity, and all hashes of entities inside that zone,
+ * which are projects, tasks, and users.
+ */
+static void
+vmu_free_zone(mod_hash_val_t val)
+{
+	vmu_zone_t *zone = (vmu_zone_t *)val;
+
+	if (zone->vmz_zone != NULL) {
+		vmu_free_entity((mod_hash_val_t)zone->vmz_zone);
+		zone->vmz_zone = NULL;
+	}
+	if (zone->vmz_projects_hash != NULL)
+		i_mod_hash_clear_nosync(zone->vmz_projects_hash);
+	if (zone->vmz_tasks_hash != NULL)
+		i_mod_hash_clear_nosync(zone->vmz_tasks_hash);
+	if (zone->vmz_rusers_hash != NULL)
+		i_mod_hash_clear_nosync(zone->vmz_rusers_hash);
+	if (zone->vmz_eusers_hash != NULL)
+		i_mod_hash_clear_nosync(zone->vmz_eusers_hash);
+	zone->vmz_next = vmu_data.vmu_free_zones;
+	vmu_data.vmu_free_zones = zone;
+}
+
+/*
+ * Initialize synchronization primitives and hashes for system-wide tracking
+ * of visited vnodes and shared amps.  Initialize results cache.
+ */
+void
+vm_usage_init()
+{
+	mutex_init(&vmu_data.vmu_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&vmu_data.vmu_cv, NULL, CV_DEFAULT, NULL);
+
+	vmu_data.vmu_system = NULL;
+	vmu_data.vmu_zones_hash = NULL;
+	vmu_data.vmu_projects_col_hash = NULL;
+	vmu_data.vmu_rusers_col_hash = NULL;
+	vmu_data.vmu_eusers_col_hash = NULL;
+
+	vmu_data.vmu_free_bounds = NULL;
+	vmu_data.vmu_free_objects = NULL;
+	vmu_data.vmu_free_entities = NULL;
+	vmu_data.vmu_free_zones = NULL;
+
+	vmu_data.vmu_all_vnodes_hash = mod_hash_create_ptrhash(
+	    "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object,
+	    sizeof (vnode_t));
+	vmu_data.vmu_all_amps_hash = mod_hash_create_ptrhash(
+	    "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object,
+	    sizeof (struct anon_map));
+	vmu_data.vmu_projects_col_hash = mod_hash_create_idhash(
+	    "vmusage collapsed project hash", VMUSAGE_HASH_SIZE,
+	    vmu_free_entity);
+	vmu_data.vmu_rusers_col_hash = mod_hash_create_idhash(
+	    "vmusage collapsed ruser hash", VMUSAGE_HASH_SIZE,
+	    vmu_free_entity);
+	vmu_data.vmu_eusers_col_hash = mod_hash_create_idhash(
+	    "vmusage collpased euser hash", VMUSAGE_HASH_SIZE,
+	    vmu_free_entity);
+	vmu_data.vmu_zones_hash = mod_hash_create_idhash(
+	    "vmusage zone hash", VMUSAGE_HASH_SIZE, vmu_free_zone);
+
+	vmu_bound_cache = kmem_cache_create("vmu_bound_cache",
+	    sizeof (vmu_bound_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+	vmu_object_cache = kmem_cache_create("vmu_object_cache",
+	    sizeof (vmu_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+
+	vmu_data.vmu_entities = NULL;
+	vmu_data.vmu_nentities = 0;
+
+	vmu_data.vmu_cache = NULL;
+	vmu_data.vmu_calc_thread = NULL;
+	vmu_data.vmu_calc_flags = 0;
+	vmu_data.vmu_pending_flags = 0;
+	vmu_data.vmu_pending_waiters = 0;
+}
+
+/*
+ * Allocate hashes for tracking vm objects visited for an entity.
+ * Update list of entities.
+ */
+static vmu_entity_t *
+vmu_alloc_entity(id_t id, int type, id_t zoneid)
+{
+	vmu_entity_t *entity;
+
+	if (vmu_data.vmu_free_entities != NULL) {
+		entity = vmu_data.vmu_free_entities;
+		vmu_data.vmu_free_entities =
+		    vmu_data.vmu_free_entities->vme_next;
+		bzero(&entity->vme_result, sizeof (vmusage_t));
+	} else {
+		entity = kmem_zalloc(sizeof (vmu_entity_t), KM_SLEEP);
+	}
+	entity->vme_result.vmu_id = id;
+	entity->vme_result.vmu_zoneid = zoneid;
+	entity->vme_result.vmu_type = type;
+
+	if (entity->vme_vnode_hash == NULL)
+		entity->vme_vnode_hash = mod_hash_create_ptrhash(
+		    "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object,
+		    sizeof (vnode_t));
+
+	if (entity->vme_amp_hash == NULL)
+		entity->vme_amp_hash = mod_hash_create_ptrhash(
+		    "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object,
+		    sizeof (struct anon_map));
+
+	if (entity->vme_anon_hash == NULL)
+		entity->vme_anon_hash = mod_hash_create_ptrhash(
+		    "vmusage anon hash", VMUSAGE_HASH_SIZE,
+		    mod_hash_null_valdtor, sizeof (struct anon));
+
+	entity->vme_next = vmu_data.vmu_entities;
+	vmu_data.vmu_entities = entity;
+	vmu_data.vmu_nentities++;
+
+	return (entity);
+}
+
+/*
+ * Allocate a zone entity, and hashes for tracking visited vm objects
+ * for projects, tasks, and users within that zone.
+ */
+static vmu_zone_t *
+vmu_alloc_zone(id_t id)
+{
+	vmu_zone_t *zone;
+
+	if (vmu_data.vmu_free_zones != NULL) {
+		zone = vmu_data.vmu_free_zones;
+		vmu_data.vmu_free_zones =
+		    vmu_data.vmu_free_zones->vmz_next;
+		zone->vmz_next = NULL;
+		zone->vmz_zone = NULL;
+	} else {
+		zone = kmem_zalloc(sizeof (vmu_zone_t), KM_SLEEP);
+	}
+
+	zone->vmz_id = id;
+
+	if ((vmu_data.vmu_calc_flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES)) != 0)
+		zone->vmz_zone = vmu_alloc_entity(id, VMUSAGE_ZONE, id);
+
+	if ((vmu_data.vmu_calc_flags & (VMUSAGE_PROJECTS |
+	    VMUSAGE_ALL_PROJECTS)) != 0 && zone->vmz_projects_hash == NULL)
+		zone->vmz_projects_hash = mod_hash_create_idhash(
+		    "vmusage project hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
+
+	if ((vmu_data.vmu_calc_flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS))
+	    != 0 && zone->vmz_tasks_hash == NULL)
+		zone->vmz_tasks_hash = mod_hash_create_idhash(
+		    "vmusage task hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
+
+	if ((vmu_data.vmu_calc_flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS))
+	    != 0 && zone->vmz_rusers_hash == NULL)
+		zone->vmz_rusers_hash = mod_hash_create_idhash(
+		    "vmusage ruser hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
+
+	if ((vmu_data.vmu_calc_flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS))
+	    != 0 && zone->vmz_eusers_hash == NULL)
+		zone->vmz_eusers_hash = mod_hash_create_idhash(
+		    "vmusage euser hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
+
+	return (zone);
+}
+
+/*
+ * Allocate a structure for tracking visited bounds for a vm object.
+ */
+static vmu_object_t *
+vmu_alloc_object(caddr_t key, int type)
+{
+	vmu_object_t *object;
+
+	if (vmu_data.vmu_free_objects != NULL) {
+		object = vmu_data.vmu_free_objects;
+		vmu_data.vmu_free_objects =
+		    vmu_data.vmu_free_objects->vmo_next;
+	} else {
+		object = kmem_cache_alloc(vmu_object_cache, KM_SLEEP);
+	}
+
+	object->vmo_key = key;
+	object->vmo_type = type;
+	object->vmo_bounds = NULL;
+
+	return (object);
+}
+
+/*
+ * Allocate and return a bound structure.
+ */
+static vmu_bound_t *
+vmu_alloc_bound()
+{
+	vmu_bound_t *bound;
+
+	if (vmu_data.vmu_free_bounds != NULL) {
+		bound = vmu_data.vmu_free_bounds;
+		vmu_data.vmu_free_bounds =
+		    vmu_data.vmu_free_bounds->vmb_next;
+		bzero(bound, sizeof (vmu_bound_t));
+	} else {
+		bound = kmem_cache_alloc(vmu_bound_cache, KM_SLEEP);
+		bzero(bound, sizeof (vmu_bound_t));
+	}
+	return (bound);
+}
+
+/*
+ * vmu_find_insert_* functions implement hash lookup or allocate and
+ * insert operations.
+ */
+static vmu_object_t *
+vmu_find_insert_object(mod_hash_t *hash, caddr_t key, uint_t type)
+{
+	int ret;
+	vmu_object_t *object;
+
+	ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key,
+	    (mod_hash_val_t *)&object);
+	if (ret != 0) {
+		object = vmu_alloc_object(key, type);
+		ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key,
+		    (mod_hash_val_t)object, (mod_hash_hndl_t)0);
+		ASSERT(ret == 0);
+	}
+	return (object);
+}
+
+static int
+vmu_find_insert_anon(mod_hash_t *hash, caddr_t key)
+{
+	int ret;
+	caddr_t val;
+
+	ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key,
+	    (mod_hash_val_t *)&val);
+
+	if (ret == 0)
+		return (0);
+
+	ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key,
+	    (mod_hash_val_t)key, (mod_hash_hndl_t)0);
+
+	ASSERT(ret == 0);
+
+	return (1);
+}
+
+static vmu_entity_t *
+vmu_find_insert_entity(mod_hash_t *hash, id_t id, uint_t type, id_t zoneid)
+{
+	int ret;
+	vmu_entity_t *entity;
+
+	ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)(uintptr_t)id,
+	    (mod_hash_val_t *)&entity);
+	if (ret != 0) {
+		entity = vmu_alloc_entity(id, type, zoneid);
+		ret = i_mod_hash_insert_nosync(hash,
+		    (mod_hash_key_t)(uintptr_t)id, (mod_hash_val_t)entity,
+		    (mod_hash_hndl_t)0);
+		ASSERT(ret == 0);
+	}
+	return (entity);
+}
+
+
+
+
+/*
+ * Returns list of object bounds between start and end.  New bounds inserted
+ * by this call are given type.
+ *
+ * Returns the number of pages covered if new bounds are created.  Returns 0
+ * if region between start/end consists of all existing bounds.
+ */
+static pgcnt_t
+vmu_insert_lookup_object_bounds(vmu_object_t *ro, pgcnt_t start, pgcnt_t
+    end, char type, vmu_bound_t **first, vmu_bound_t **last)
+{
+	vmu_bound_t *next;
+	vmu_bound_t *prev = NULL;
+	vmu_bound_t *tmp = NULL;
+	pgcnt_t ret = 0;
+
+	*first = *last = NULL;
+
+	for (next = ro->vmo_bounds; next != NULL; next = next->vmb_next) {
+		/*
+		 * Find bounds overlapping or overlapped by range [start,end].
+		 */
+		if (start > next->vmb_end) {
+			/* bound is before new bound */
+			prev = next;
+			continue;
+		}
+		if (next->vmb_start > end) {
+			/* bound is after new bound */
+			break;
+		}
+		if (*first == NULL)
+			*first = next;
+		*last = next;
+	}
+
+	if (*first == NULL) {
+		ASSERT(*last == NULL);
+		/*
+		 * No bounds overlapping range [start,end], so create new
+		 * bound
+		 */
+		tmp = vmu_alloc_bound();
+		tmp->vmb_start = start;
+		tmp->vmb_end = end;
+		tmp->vmb_type = type;
+		if (prev == NULL) {
+			tmp->vmb_next = ro->vmo_bounds;
+			ro->vmo_bounds = tmp;
+		} else {
+			tmp->vmb_next = prev->vmb_next;
+			prev->vmb_next = tmp;
+		}
+		*first = tmp;
+		*last = tmp;
+		ASSERT(tmp->vmb_end >= tmp->vmb_start);
+		ret = tmp->vmb_end - tmp->vmb_start + 1;
+		return (ret);
+	}
+
+	/* Check to see if start is before first known bound */
+	ASSERT(first != NULL && last != NULL);
+	next = (*first);
+	if (start < (*first)->vmb_start) {
+		/* Create new bound before first bound */
+		tmp = vmu_alloc_bound();
+		tmp->vmb_start = start;
+		tmp->vmb_end = (*first)->vmb_start - 1;
+		tmp->vmb_type = type;
+		tmp->vmb_next = *first;
+		if (*first == ro->vmo_bounds)
+			ro->vmo_bounds = tmp;
+		if (prev != NULL)
+			prev->vmb_next = tmp;
+		ASSERT(tmp->vmb_end >= tmp->vmb_start);
+		ret += tmp->vmb_end - tmp->vmb_start + 1;
+		*first = tmp;
+	}
+	/*
+	 * Between start and end, search for gaps between and after existing
+	 * bounds.  Create new bounds to fill gaps if they exist.
+	 */
+	while (end > next->vmb_end) {
+		/*
+		 * Check for gap between bound and next bound. if no gap,
+		 * continue.
+		 */
+		if ((next != *last) &&
+		    ((next->vmb_end + 1) == next->vmb_next->vmb_start)) {
+			next = next->vmb_next;
+			continue;
+		}
+		/*
+		 * Insert new bound in gap after bound, and before next
+		 * bound if next bound exists.
+		 */
+		tmp = vmu_alloc_bound();
+		tmp->vmb_type = type;
+		tmp->vmb_next = next->vmb_next;
+		tmp->vmb_start = next->vmb_end + 1;
+
+		if (next != *last) {
+			tmp->vmb_end = next->vmb_next->vmb_start - 1;
+			ASSERT(tmp->vmb_end >= tmp->vmb_start);
+			ret += tmp->vmb_end - tmp->vmb_start + 1;
+			next->vmb_next = tmp;
+			next = tmp->vmb_next;
+		} else {
+			tmp->vmb_end = end;
+			ASSERT(tmp->vmb_end >= tmp->vmb_start);
+			ret += tmp->vmb_end - tmp->vmb_start + 1;
+			next->vmb_next = tmp;
+			*last = tmp;
+			break;
+		}
+	}
+	return (ret);
+}
+
+/*
+ * vmu_update_bounds()
+ *
+ * first, last:	list of continuous bounds, of which zero or more are of
+ * 		type VMUSAGE_BOUND_UNKNOWN.
+ *
+ * new_first, new_last:	list of continuous bounds, of which none are of
+ *			type VMUSAGE_BOUND_UNKNOWN.  These bounds are used to
+ *			update the types of bounds in (first,last) with
+ *			type VMUSAGE_BOUND_UNKNOWN.
+ *
+ * For the list of bounds (first,last), this function updates any bounds
+ * with type VMUSAGE_BOUND_UNKNOWN using the type of the corresponding bound in
+ * the list (new_first, new_last).
+ *
+ * If a bound of type VMUSAGE_BOUND_UNKNOWN spans multiple bounds in the list
+ * (new_first, new_last), it will be split into multiple bounds.
+ *
+ * Return value:
+ * 	The number of pages in the list of bounds (first,last) that were of
+ *	type VMUSAGE_BOUND_UNKNOWN, which have been updated to be of type
+ *	VMUSAGE_BOUND_INCORE.
+ *
+ */
+static pgcnt_t
+vmu_update_bounds(vmu_bound_t **first, vmu_bound_t **last,
+    vmu_bound_t *new_first, vmu_bound_t *new_last)
+{
+	vmu_bound_t *next, *new_next, *tmp;
+	pgcnt_t rss = 0;
+
+	next = *first;
+	new_next = new_first;
+
+	/* verify bounds span same pages */
+	ASSERT((*first)->vmb_start >= new_next->vmb_start);
+	ASSERT((*last)->vmb_end <= new_last->vmb_end);
+	for (;;) {
+		/* If bound already has type, proceed to next bound */
+		if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
+			if (next == *last)
+				break;
+			next = next->vmb_next;
+			continue;
+		}
+		while (new_next->vmb_end < next->vmb_start)
+			new_next = new_next->vmb_next;
+		ASSERT(new_next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
+		next->vmb_type = new_next->vmb_type;
+		if (new_next->vmb_end < next->vmb_end) {
+			/* need to split bound */
+			tmp = vmu_alloc_bound();
+			tmp->vmb_type = VMUSAGE_BOUND_UNKNOWN;
+			tmp->vmb_start = new_next->vmb_end + 1;
+			tmp->vmb_end = next->vmb_end;
+			tmp->vmb_next = next->vmb_next;
+			next->vmb_end = new_next->vmb_end;
+			next->vmb_next = tmp;
+			if (*last == next)
+				*last = tmp;
+			if (next->vmb_type == VMUSAGE_BOUND_INCORE)
+				rss += next->vmb_end - next->vmb_start + 1;
+			next = tmp;
+		} else {
+			if (next->vmb_type == VMUSAGE_BOUND_INCORE)
+				rss += next->vmb_end - next->vmb_start + 1;
+			if (next == *last)
+				break;
+			next = next->vmb_next;
+		}
+	}
+	return (rss);
+}
+
+/*
+ * merges adjacent bounds with same type between first and last bound.
+ * After merge, last pointer is no longer valid, as last bound may be
+ * merged away.
+ */
+static void
+vmu_merge_bounds(vmu_bound_t **first, vmu_bound_t **last)
+{
+	vmu_bound_t *next;
+	vmu_bound_t *tmp;
+
+	ASSERT(*first != NULL);
+	ASSERT(*last != NULL);
+
+	next = *first;
+	while (next != *last) {
+
+		/* If bounds are adjacent and have same type, merge them */
+		if (((next->vmb_end + 1) == next->vmb_next->vmb_start) &&
+		    (next->vmb_type == next->vmb_next->vmb_type)) {
+			tmp = next->vmb_next;
+			next->vmb_end = tmp->vmb_end;
+			next->vmb_next = tmp->vmb_next;
+			vmu_free_bound(tmp);
+			if (tmp == *last)
+				*last = next;
+		} else {
+			next = next->vmb_next;
+		}
+	}
+}
+
+/*
+ * Given an amp and a list of bounds, updates each bound's type with
+ * VMUSAGE_BOUND_INCORE or VMUSAGE_BOUND_NOT_INCORE.
+ *
+ * If a bound is partially incore, it will be split into two bounds.
+ * first and last may be modified, as bounds may be split into multiple
+ * bounds if the are partially incore/not-incore.
+ *
+ * Set incore to non-zero if bounds are already known to be incore
+ *
+ */
+static void
+vmu_amp_update_incore_bounds(struct anon_map *amp, vmu_bound_t **first,
+    vmu_bound_t **last, boolean_t incore)
+{
+	vmu_bound_t *next;
+	vmu_bound_t *tmp;
+	pgcnt_t index;
+	short bound_type;
+	short page_type;
+	vnode_t *vn;
+	anoff_t off;
+	struct anon *ap;
+
+	next = *first;
+	/* Shared anon slots don't change once set */
+	ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
+	for (;;) {
+		if (incore == B_TRUE)
+			next->vmb_type = VMUSAGE_BOUND_INCORE;
+
+		if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
+			if (next == *last)
+				break;
+			next = next->vmb_next;
+			continue;
+		}
+		bound_type = next->vmb_type;
+		index = next->vmb_start;
+		while (index <= next->vmb_end) {
+
+			/*
+			 * These are used to determine how much to increment
+			 * index when a large page is found.
+			 */
+			page_t *page;
+			pgcnt_t pgcnt = 1;
+			uint_t pgshft;
+			pgcnt_t pgmsk;
+
+			ap = anon_get_ptr(amp->ahp, index);
+			if (ap != NULL)
+				swap_xlate(ap, &vn, &off);
+
+			if (ap != NULL && vn != NULL && vn->v_pages != NULL &&
+			    (page = page_exists(vn, off)) != NULL) {
+				page_type = VMUSAGE_BOUND_INCORE;
+				if (page->p_szc > 0) {
+					pgcnt = page_get_pagecnt(page->p_szc);
+					pgshft = page_get_shift(page->p_szc);
+					pgmsk = (0x1 << (pgshft - PAGESHIFT))
+					    - 1;
+				}
+			} else {
+				page_type = VMUSAGE_BOUND_NOT_INCORE;
+			}
+			if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
+				next->vmb_type = page_type;
+			} else if (next->vmb_type != page_type) {
+				/*
+				 * if current bound type does not match page
+				 * type, need to split off new bound.
+				 */
+				tmp = vmu_alloc_bound();
+				tmp->vmb_type = page_type;
+				tmp->vmb_start = index;
+				tmp->vmb_end = next->vmb_end;
+				tmp->vmb_next = next->vmb_next;
+				next->vmb_end = index - 1;
+				next->vmb_next = tmp;
+				if (*last == next)
+					*last = tmp;
+				next = tmp;
+			}
+			if (pgcnt > 1) {
+				/*
+				 * If inside large page, jump to next large
+				 * page
+				 */
+				index = (index & ~pgmsk) + pgcnt;
+			} else {
+				index++;
+			}
+		}
+		if (next == *last) {
+			ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
+			break;
+		} else
+			next = next->vmb_next;
+	}
+	ANON_LOCK_EXIT(&amp->a_rwlock);
+}
+
+/*
+ * Same as vmu_amp_update_incore_bounds(), except for tracking
+ * incore-/not-incore for vnodes.
+ */
+static void
+vmu_vnode_update_incore_bounds(vnode_t *vnode, vmu_bound_t **first,
+    vmu_bound_t **last)
+{
+	vmu_bound_t *next;
+	vmu_bound_t *tmp;
+	pgcnt_t index;
+	short bound_type;
+	short page_type;
+
+	next = *first;
+	for (;;) {
+		if (vnode->v_pages == NULL)
+			next->vmb_type = VMUSAGE_BOUND_NOT_INCORE;
+
+		if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
+			if (next == *last)
+				break;
+			next = next->vmb_next;
+			continue;
+		}
+
+		bound_type = next->vmb_type;
+		index = next->vmb_start;
+		while (index <= next->vmb_end) {
+
+			/*
+			 * These are used to determine how much to increment
+			 * index when a large page is found.
+			 */
+			page_t *page;
+			pgcnt_t pgcnt = 1;
+			uint_t pgshft;
+			pgcnt_t pgmsk;
+
+			if (vnode->v_pages != NULL &&
+			    (page = page_exists(vnode, ptob(index))) != NULL) {
+				page_type = VMUSAGE_BOUND_INCORE;
+				if (page->p_szc > 0) {
+					pgcnt = page_get_pagecnt(page->p_szc);
+					pgshft = page_get_shift(page->p_szc);
+					pgmsk = (0x1 << (pgshft - PAGESHIFT))
+					    - 1;
+				}
+			} else {
+				page_type = VMUSAGE_BOUND_NOT_INCORE;
+			}
+			if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
+				next->vmb_type = page_type;
+			} else if (next->vmb_type != page_type) {
+				/*
+				 * if current bound type does not match page
+				 * type, need to split off new bound.
+				 */
+				tmp = vmu_alloc_bound();
+				tmp->vmb_type = page_type;
+				tmp->vmb_start = index;
+				tmp->vmb_end = next->vmb_end;
+				tmp->vmb_next = next->vmb_next;
+				next->vmb_end = index - 1;
+				next->vmb_next = tmp;
+				if (*last == next)
+					*last = tmp;
+				next = tmp;
+			}
+			if (pgcnt > 1) {
+				/*
+				 * If inside large page, jump to next large
+				 * page
+				 */
+				index = (index & ~pgmsk) + pgcnt;
+			} else {
+				index++;
+			}
+		}
+		if (next == *last) {
+			ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
+			break;
+		} else
+			next = next->vmb_next;
+	}
+}
+
+/*
+ * Calculate the rss and swap consumed by a segment.  vmu_entities is the
+ * list of entities to visit.  For shared segments, the vnode or amp
+ * is looked up in each entity to see if has been already counted.  Private
+ * anon pages are checked per entity to ensure that cow pages are not
+ * double counted.
+ *
+ * For private mapped files, first the amp is checked for private pages.
+ * Bounds not backed by the amp are looked up in the vnode for each entity
+ * to avoid double counting of private COW vnode pages.
+ */
+static void
+vmu_calculate_seg(vmu_entity_t *vmu_entities, struct seg *seg)
+{
+	struct segvn_data *svd;
+	struct shm_data *shmd;
+	struct spt_data *sptd;
+	vmu_object_t *shared_object = NULL;
+	vmu_object_t *entity_object = NULL;
+	vmu_entity_t *entity;
+	vmusage_t *result;
+	vmu_bound_t *first = NULL;
+	vmu_bound_t *last = NULL;
+	vmu_bound_t *cur = NULL;
+	vmu_bound_t *e_first = NULL;
+	vmu_bound_t *e_last = NULL;
+	vmu_bound_t *tmp;
+	pgcnt_t p_index, s_index, p_start, p_end, s_start, s_end, rss, virt;
+	struct anon_map *private_amp = NULL;
+	boolean_t incore = B_FALSE;
+	boolean_t shared = B_FALSE;
+	int file = 0;
+	pgcnt_t swresv = 0;
+	pgcnt_t panon = 0;
+
+	/* Can zero-length segments exist?  Not sure, so parenoia */
+	if (seg->s_size <= 0)
+		return;
+
+	/*
+	 * Figure out if there is a shared object (such as a named vnode or
+	 * a shared amp, then figure out if there is a private amp, which
+	 * identifies private pages.
+	 */
+	if (seg->s_ops == &segvn_ops) {
+		svd = (struct segvn_data *)seg->s_data;
+		if (svd->type == MAP_SHARED)
+			shared = B_TRUE;
+		else
+			swresv = svd->swresv;
+
+		if (svd->vp != NULL) {
+			file = 1;
+			shared_object = vmu_find_insert_object(
+			    vmu_data.vmu_all_vnodes_hash, (caddr_t)svd->vp,
+			    VMUSAGE_TYPE_VNODE);
+			s_start = btop(svd->offset);
+			s_end = btop(svd->offset + seg->s_size) - 1;
+		}
+		if (svd->amp != NULL && svd->type == MAP_SHARED) {
+			ASSERT(shared_object == NULL);
+			shared_object = vmu_find_insert_object(
+			    vmu_data.vmu_all_amps_hash, (caddr_t)svd->amp,
+			    VMUSAGE_TYPE_AMP);
+			s_start = svd->anon_index;
+			s_end = svd->anon_index + btop(seg->s_size) - 1;
+			/* schedctl mappings are always in core */
+			if (svd->amp->swresv == 0)
+				incore = B_TRUE;
+		}
+		if (svd->amp != NULL && svd->type == MAP_PRIVATE) {
+			private_amp = svd->amp;
+			p_start = svd->anon_index;
+			p_end = svd->anon_index + btop(seg->s_size) - 1;
+		}
+	} else if (seg->s_ops == &segspt_shmops) {
+		shared = B_TRUE;
+		shmd = (struct shm_data *)seg->s_data;
+		shared_object = vmu_find_insert_object(
+		    vmu_data.vmu_all_amps_hash, (caddr_t)shmd->shm_amp,
+		    VMUSAGE_TYPE_AMP);
+		s_start = 0;
+		s_end = btop(seg->s_size) - 1;
+		sptd = shmd->shm_sptseg->s_data;
+
+		/* ism segments are always incore and do not reserve swap */
+		if (sptd->spt_flags & SHM_SHARE_MMU)
+			incore = B_TRUE;
+
+	} else {
+		return;
+	}
+
+	/*
+	 * If there is a private amp, count anon pages that exist.  If an
+	 * anon has a refcnt > 1 (cow sharing), then save the anon in a
+	 * hash so that it is not double counted.
+	 *
+	 * If there is also a shared object, they figure out the bounds
+	 * which are not mapped by the private amp.
+	 */
+	if (private_amp != NULL) {
+
+		/* Enter as writer to prevent cow anons from being freed */
+		ANON_LOCK_ENTER(&private_amp->a_rwlock, RW_WRITER);
+
+		p_index = p_start;
+		s_index = s_start;
+
+		while (p_index <= p_end) {
+
+			pgcnt_t p_index_next;
+			pgcnt_t p_bound_size;
+			int cnt;
+			anoff_t off;
+			struct vnode *vn;
+			struct anon *ap;
+			page_t *page;		/* For handling of large */
+			pgcnt_t pgcnt = 1;	/* pages */
+			pgcnt_t pgstart;
+			pgcnt_t pgend;
+			uint_t pgshft;
+			pgcnt_t pgmsk;
+
+			p_index_next = p_index;
+			ap = anon_get_next_ptr(private_amp->ahp,
+			    &p_index_next);
+
+			/*
+			 * If next anon is past end of mapping, simulate
+			 * end of anon so loop terminates.
+			 */
+			if (p_index_next > p_end) {
+				p_index_next = p_end + 1;
+				ap = NULL;
+			}
+			/*
+			 * For cow segments, keep track of bounds not
+			 * backed by private amp so they can be looked
+			 * up in the backing vnode
+			 */
+			if (p_index_next != p_index) {
+
+				/*
+				 * Compute index difference between anon and
+				 * previous anon.
+				 */
+				p_bound_size = p_index_next - p_index - 1;
+
+				if (shared_object != NULL) {
+					cur = vmu_alloc_bound();
+					cur->vmb_next = NULL;
+					cur->vmb_start = s_index;
+					cur->vmb_end = s_index + p_bound_size;
+					cur->vmb_type = VMUSAGE_BOUND_UNKNOWN;
+					if (first == NULL) {
+						first = cur;
+						last = cur;
+					} else {
+						last->vmb_next = cur;
+						last = cur;
+					}
+				}
+				p_index = p_index + p_bound_size + 1;
+				s_index = s_index + p_bound_size + 1;
+			}
+
+			/* Detect end of anons in amp */
+			if (ap == NULL)
+				break;
+
+			cnt = ap->an_refcnt;
+			swap_xlate(ap, &vn, &off);
+
+			if (vn == NULL || vn->v_pages == NULL ||
+			    (page = page_exists(vn, off)) == NULL) {
+				p_index++;
+				s_index++;
+				continue;
+			}
+
+			/*
+			 * If large page is found, compute portion of large
+			 * page in mapping, and increment indicies to the next
+			 * large page.
+			 */
+			if (page->p_szc > 0) {
+
+				pgcnt = page_get_pagecnt(page->p_szc);
+				pgshft = page_get_shift(page->p_szc);
+				pgmsk = (0x1 << (pgshft - PAGESHIFT)) - 1;
+
+				/* First page in large page */
+				pgstart = p_index & ~pgmsk;
+				/* Last page in large page */
+				pgend = pgstart + pgcnt - 1;
+				/*
+				 * Artifically end page if page extends past
+				 * end of mapping.
+				 */
+				if (pgend > p_end)
+					pgend = p_end;
+
+				/*
+				 * Compute number of pages from large page
+				 * which are mapped.
+				 */
+				pgcnt = pgend - p_index + 1;
+
+				/*
+				 * Point indicies at page after large page,
+				 * or at page after end of mapping.
+				 */
+				p_index += pgcnt;
+				s_index += pgcnt;
+			} else {
+				p_index++;
+				s_index++;
+			}
+
+			/*
+			 * Assume anon structs with a refcnt
+			 * of 1 are not cow shared, so there
+			 * is no reason to track them per entity.
+			 */
+			if (cnt == 1) {
+				panon += pgcnt;
+				continue;
+			}
+			for (entity = vmu_entities; entity != NULL;
+			    entity = entity->vme_next_calc) {
+
+				result = &entity->vme_result;
+				/*
+				 * Track cow anons per entity so
+				 * they are not double counted.
+				 */
+				if (vmu_find_insert_anon(entity->vme_anon_hash,
+				    (caddr_t)ap) == 0)
+					continue;
+
+				result->vmu_rss_all += (pgcnt << PAGESHIFT);
+				result->vmu_rss_private +=
+				    (pgcnt << PAGESHIFT);
+			}
+		}
+		ANON_LOCK_EXIT(&private_amp->a_rwlock);
+	}
+
+	/* Add up resident anon and swap reserved for private mappings */
+	if (swresv > 0 || panon > 0) {
+		for (entity = vmu_entities; entity != NULL;
+		    entity = entity->vme_next_calc) {
+			result = &entity->vme_result;
+			result->vmu_swap_all += swresv;
+			result->vmu_swap_private += swresv;
+			result->vmu_rss_all += (panon << PAGESHIFT);
+			result->vmu_rss_private += (panon << PAGESHIFT);
+		}
+	}
+
+	/* Compute resident pages backing shared amp or named vnode */
+	if (shared_object != NULL) {
+		if (first == NULL) {
+			/*
+			 * No private amp, or private amp has no anon
+			 * structs.  This means entire segment is backed by
+			 * the shared object.
+			 */
+			first = vmu_alloc_bound();
+			first->vmb_next = NULL;
+			first->vmb_start = s_start;
+			first->vmb_end = s_end;
+			first->vmb_type = VMUSAGE_BOUND_UNKNOWN;
+		}
+		/*
+		 * Iterate bounds not backed by private amp, and compute
+		 * resident pages.
+		 */
+		cur = first;
+		while (cur != NULL) {
+
+			if (vmu_insert_lookup_object_bounds(shared_object,
+			    cur->vmb_start, cur->vmb_end, VMUSAGE_BOUND_UNKNOWN,
+			    &first, &last) > 0) {
+				/* new bounds, find incore/not-incore */
+				if (shared_object->vmo_type ==
+				    VMUSAGE_TYPE_VNODE)
+					vmu_vnode_update_incore_bounds(
+					    (vnode_t *)
+					    shared_object->vmo_key, &first,
+					    &last);
+				else
+					vmu_amp_update_incore_bounds(
+					    (struct anon_map *)
+					    shared_object->vmo_key, &first,
+					    &last, incore);
+				vmu_merge_bounds(&first, &last);
+			}
+			for (entity = vmu_entities; entity != NULL;
+			    entity = entity->vme_next_calc) {
+
+				result = &entity->vme_result;
+
+				entity_object = vmu_find_insert_object(
+				    shared_object->vmo_type ==
+				    VMUSAGE_TYPE_VNODE ? entity->vme_vnode_hash:
+					entity->vme_amp_hash,
+					shared_object->vmo_key,
+					shared_object->vmo_type);
+
+				virt = vmu_insert_lookup_object_bounds(
+				    entity_object, cur->vmb_start, cur->vmb_end,
+				    VMUSAGE_BOUND_UNKNOWN, &e_first, &e_last);
+
+				if (virt == 0)
+					continue;
+				/*
+				 * Range visited for this entity
+				 */
+				rss = vmu_update_bounds(&e_first,
+				    &e_last, first, last);
+				result->vmu_rss_all += (rss << PAGESHIFT);
+				if (shared == B_TRUE && file == B_FALSE) {
+					/* shared anon mapping */
+					result->vmu_swap_all +=
+					    (virt << PAGESHIFT);
+					result->vmu_swap_shared +=
+					    (virt << PAGESHIFT);
+					result->vmu_rss_shared +=
+					    (rss << PAGESHIFT);
+				} else if (shared == B_TRUE && file == B_TRUE) {
+					/* shared file mapping */
+					result->vmu_rss_shared +=
+					    (rss << PAGESHIFT);
+				} else if (shared == B_FALSE &&
+				    file == B_TRUE) {
+					/* private file mapping */
+					result->vmu_rss_private +=
+					    (rss << PAGESHIFT);
+				}
+				vmu_merge_bounds(&e_first, &e_last);
+			}
+			tmp = cur;
+			cur = cur->vmb_next;
+			vmu_free_bound(tmp);
+		}
+	}
+}
+
+/*
+ * Based on the current calculation flags, find the relevant entities
+ * which are relative to the process.  Then calculate each segment
+ * in the process'es address space for each relevant entity.
+ */
+static void
+vmu_calculate_proc(proc_t *p)
+{
+	vmu_entity_t *entities = NULL;
+	vmu_zone_t *zone;
+	vmu_entity_t *tmp;
+	struct as *as;
+	struct seg *seg;
+	int ret;
+
+	/* Figure out which entities are being computed */
+	if ((vmu_data.vmu_system) != NULL) {
+		tmp = vmu_data.vmu_system;
+		tmp->vme_next_calc = entities;
+		entities = tmp;
+	}
+	if (vmu_data.vmu_calc_flags &
+	    (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_PROJECTS |
+	    VMUSAGE_ALL_PROJECTS | VMUSAGE_TASKS | VMUSAGE_ALL_TASKS |
+	    VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_EUSERS |
+	    VMUSAGE_ALL_EUSERS)) {
+		ret = i_mod_hash_find_nosync(vmu_data.vmu_zones_hash,
+		    (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id,
+		    (mod_hash_val_t *)&zone);
+		if (ret != 0) {
+			zone = vmu_alloc_zone(p->p_zone->zone_id);
+			ret = i_mod_hash_insert_nosync(vmu_data.vmu_zones_hash,
+			    (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id,
+			    (mod_hash_val_t)zone, (mod_hash_hndl_t)0);
+			ASSERT(ret == 0);
+		}
+		if (zone->vmz_zone != NULL) {
+			tmp = zone->vmz_zone;
+			tmp->vme_next_calc = entities;
+			entities = tmp;
+		}
+		if (vmu_data.vmu_calc_flags &
+		    (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS)) {
+			tmp = vmu_find_insert_entity(zone->vmz_projects_hash,
+			    p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS,
+			    zone->vmz_id);
+			tmp->vme_next_calc = entities;
+			entities = tmp;
+		}
+		if (vmu_data.vmu_calc_flags &
+		    (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS)) {
+			tmp = vmu_find_insert_entity(zone->vmz_tasks_hash,
+			    p->p_task->tk_tkid, VMUSAGE_TASKS, zone->vmz_id);
+			tmp->vme_next_calc = entities;
+			entities = tmp;
+		}
+		if (vmu_data.vmu_calc_flags &
+		    (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS)) {
+			tmp = vmu_find_insert_entity(zone->vmz_rusers_hash,
+			    crgetruid(p->p_cred), VMUSAGE_RUSERS, zone->vmz_id);
+			tmp->vme_next_calc = entities;
+			entities = tmp;
+		}
+		if (vmu_data.vmu_calc_flags &
+		    (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS)) {
+			tmp = vmu_find_insert_entity(zone->vmz_eusers_hash,
+			    crgetuid(p->p_cred), VMUSAGE_EUSERS, zone->vmz_id);
+			tmp->vme_next_calc = entities;
+			entities = tmp;
+		}
+	}
+	/* Entities which collapse projects and users for all zones */
+	if (vmu_data.vmu_calc_flags & VMUSAGE_COL_PROJECTS) {
+		tmp = vmu_find_insert_entity(vmu_data.vmu_projects_col_hash,
+		    p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS, ALL_ZONES);
+		tmp->vme_next_calc = entities;
+		entities = tmp;
+	}
+	if (vmu_data.vmu_calc_flags & VMUSAGE_COL_RUSERS) {
+		tmp = vmu_find_insert_entity(vmu_data.vmu_rusers_col_hash,
+		    crgetruid(p->p_cred), VMUSAGE_RUSERS, ALL_ZONES);
+		tmp->vme_next_calc = entities;
+		entities = tmp;
+	}
+	if (vmu_data.vmu_calc_flags & VMUSAGE_COL_EUSERS) {
+		tmp = vmu_find_insert_entity(vmu_data.vmu_eusers_col_hash,
+		    crgetuid(p->p_cred), VMUSAGE_EUSERS, ALL_ZONES);
+		tmp->vme_next_calc = entities;
+		entities = tmp;
+	}
+
+	ASSERT(entities != NULL);
+	/* process all segs in process's address space */
+	as = p->p_as;
+	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+	for (seg = AS_SEGFIRST(as); seg != NULL;
+	    seg = AS_SEGNEXT(as, seg)) {
+		vmu_calculate_seg(entities, seg);
+	}
+	AS_LOCK_EXIT(as, &as->a_lock);
+}
+
+/*
+ * Free data created by previous call to vmu_calculate().
+ */
+static void
+vmu_clear_calc()
+{
+	if (vmu_data.vmu_system != NULL)
+		vmu_free_entity(vmu_data.vmu_system);
+		vmu_data.vmu_system = NULL;
+	if (vmu_data.vmu_zones_hash != NULL)
+		i_mod_hash_clear_nosync(vmu_data.vmu_zones_hash);
+	if (vmu_data.vmu_projects_col_hash != NULL)
+		i_mod_hash_clear_nosync(vmu_data.vmu_projects_col_hash);
+	if (vmu_data.vmu_rusers_col_hash != NULL)
+		i_mod_hash_clear_nosync(vmu_data.vmu_rusers_col_hash);
+	if (vmu_data.vmu_eusers_col_hash != NULL)
+		i_mod_hash_clear_nosync(vmu_data.vmu_eusers_col_hash);
+
+	i_mod_hash_clear_nosync(vmu_data.vmu_all_vnodes_hash);
+	i_mod_hash_clear_nosync(vmu_data.vmu_all_amps_hash);
+}
+
+/*
+ * Free unused data structures.  These can result if the system workload
+ * decreases between calculations.
+ */
+static void
+vmu_free_extra()
+{
+	vmu_bound_t *tb;
+	vmu_object_t *to;
+	vmu_entity_t *te;
+	vmu_zone_t *tz;
+
+	while (vmu_data.vmu_free_bounds != NULL) {
+		tb = vmu_data.vmu_free_bounds;
+		vmu_data.vmu_free_bounds = vmu_data.vmu_free_bounds->vmb_next;
+		kmem_cache_free(vmu_bound_cache, tb);
+	}
+	while (vmu_data.vmu_free_objects != NULL) {
+		to = vmu_data.vmu_free_objects;
+		vmu_data.vmu_free_objects =
+		    vmu_data.vmu_free_objects->vmo_next;
+		kmem_cache_free(vmu_object_cache, to);
+	}
+	while (vmu_data.vmu_free_entities != NULL) {
+		te = vmu_data.vmu_free_entities;
+		vmu_data.vmu_free_entities =
+		    vmu_data.vmu_free_entities->vme_next;
+		if (te->vme_vnode_hash != NULL)
+			mod_hash_destroy_hash(te->vme_vnode_hash);
+		if (te->vme_amp_hash != NULL)
+			mod_hash_destroy_hash(te->vme_amp_hash);
+		if (te->vme_anon_hash != NULL)
+			mod_hash_destroy_hash(te->vme_anon_hash);
+		kmem_free(te, sizeof (vmu_entity_t));
+	}
+	while (vmu_data.vmu_free_zones != NULL) {
+		tz = vmu_data.vmu_free_zones;
+		vmu_data.vmu_free_zones =
+		    vmu_data.vmu_free_zones->vmz_next;
+		if (tz->vmz_projects_hash != NULL)
+			mod_hash_destroy_hash(tz->vmz_projects_hash);
+		if (tz->vmz_tasks_hash != NULL)
+			mod_hash_destroy_hash(tz->vmz_tasks_hash);
+		if (tz->vmz_rusers_hash != NULL)
+			mod_hash_destroy_hash(tz->vmz_rusers_hash);
+		if (tz->vmz_eusers_hash != NULL)
+			mod_hash_destroy_hash(tz->vmz_eusers_hash);
+		kmem_free(tz, sizeof (vmu_zone_t));
+	}
+}
+
+extern kcondvar_t *pr_pid_cv;
+
+/*
+ * Determine which entity types are relevant and allocate the hashes to
+ * track them.  Then walk the process table and count rss and swap
+ * for each process'es address space.  Address space object such as
+ * vnodes, amps and anons are tracked per entity, so that they are
+ * not double counted in the results.
+ *
+ */
+static void
+vmu_calculate()
+{
+	int i = 0;
+	int ret;
+	proc_t *p;
+
+	vmu_clear_calc();
+
+	if (vmu_data.vmu_calc_flags & VMUSAGE_SYSTEM)
+		vmu_data.vmu_system = vmu_alloc_entity(0, VMUSAGE_SYSTEM,
+		    ALL_ZONES);
+
+	/*
+	 * Walk process table and calculate rss of each proc.
+	 *
+	 * Pidlock and p_lock cannot be held while doing the rss calculation.
+	 * This is because:
+	 *	1.  The calculation allocates using KM_SLEEP.
+	 *	2.  The calculation grabs a_lock, which cannot be grabbed
+	 *	    after p_lock.
+	 *
+	 * Since pidlock must be dropped, we cannot simply just walk the
+	 * practive list.  Instead, we walk the process table, and sprlock
+	 * each process to ensure that it does not exit during the
+	 * calculation.
+	 */
+
+	mutex_enter(&pidlock);
+	for (i = 0; i < v.v_proc; i++) {
+again:
+		p = pid_entry(i);
+		if (p == NULL)
+			continue;
+
+		mutex_enter(&p->p_lock);
+		mutex_exit(&pidlock);
+
+		if (panicstr) {
+			mutex_exit(&p->p_lock);
+			return;
+		}
+
+		/* Try to set P_PR_LOCK */
+		ret = sprtrylock_proc(p);
+		if (ret == -1) {
+			/* Process in invalid state */
+			mutex_exit(&p->p_lock);
+			mutex_enter(&pidlock);
+			continue;
+		} else if (ret == 1) {
+			/*
+			 * P_PR_LOCK is already set.  Wait and try again.
+			 * This also drops p_lock.
+			 */
+			sprwaitlock_proc(p);
+			mutex_enter(&pidlock);
+			goto again;
+		}
+		mutex_exit(&p->p_lock);
+
+		vmu_calculate_proc(p);
+
+		mutex_enter(&p->p_lock);
+		sprunlock(p);
+		mutex_enter(&pidlock);
+	}
+	mutex_exit(&pidlock);
+
+	vmu_free_extra();
+}
+
+/*
+ * allocate a new cache for N results satisfying flags
+ */
+vmu_cache_t *
+vmu_cache_alloc(size_t nres, uint_t flags)
+{
+	vmu_cache_t *cache;
+
+	cache = kmem_zalloc(sizeof (vmu_cache_t), KM_SLEEP);
+	cache->vmc_results = kmem_zalloc(sizeof (vmusage_t) * nres, KM_SLEEP);
+	cache->vmc_nresults = nres;
+	cache->vmc_flags = flags;
+	cache->vmc_refcnt = 1;
+	return (cache);
+}
+
+/*
+ * Make sure cached results are not freed
+ */
+static void
+vmu_cache_hold(vmu_cache_t *cache)
+{
+	ASSERT(MUTEX_HELD(&vmu_data.vmu_lock));
+	cache->vmc_refcnt++;
+}
+
+/*
+ * free cache data
+ */
+static void
+vmu_cache_rele(vmu_cache_t *cache)
+{
+	ASSERT(MUTEX_HELD(&vmu_data.vmu_lock));
+	ASSERT(cache->vmc_refcnt > 0);
+	cache->vmc_refcnt--;
+	if (cache->vmc_refcnt == 0) {
+		kmem_free(cache->vmc_results, sizeof (vmusage_t) *
+			cache->vmc_nresults);
+		kmem_free(cache, sizeof (vmu_cache_t));
+	}
+}
+
+/*
+ * Copy out the cached results to a caller.  Inspect the callers flags
+ * and zone to determine which cached results should be copied.
+ */
+static int
+vmu_copyout_results(vmu_cache_t *cache, vmusage_t *buf, size_t *nres,
+    uint_t flags)
+{
+	vmusage_t *result, *out_result;
+	vmusage_t dummy;
+	size_t i, count = 0;
+	size_t bufsize;
+	int ret = 0;
+	uint_t types = 0;
+
+	if (nres != NULL) {
+		if (copyin((caddr_t)nres, &bufsize, sizeof (size_t)))
+			return (set_errno(EFAULT));
+	} else {
+		bufsize = 0;
+	}
+
+	/* figure out what results the caller is interested in. */
+	if ((flags & VMUSAGE_SYSTEM) && curproc->p_zone == global_zone)
+		types |= VMUSAGE_SYSTEM;
+	if (flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES))
+		types |= VMUSAGE_ZONE;
+	if (flags & (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS |
+	    VMUSAGE_COL_PROJECTS))
+		types |= VMUSAGE_PROJECTS;
+	if (flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS))
+		types |= VMUSAGE_TASKS;
+	if (flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS))
+		types |= VMUSAGE_RUSERS;
+	if (flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS))
+		types |= VMUSAGE_EUSERS;
+
+	/* count results for current zone */
+	out_result = buf;
+	for (result = cache->vmc_results, i = 0;
+	    i < cache->vmc_nresults; result++, i++) {
+
+		/* Do not return "other-zone" results to non-global zones */
+		if (curproc->p_zone != global_zone &&
+		    curproc->p_zone->zone_id != result->vmu_zoneid)
+			continue;
+
+		/*
+		 * If non-global zone requests VMUSAGE_SYSTEM, fake
+		 * up VMUSAGE_ZONE result as VMUSAGE_SYSTEM result.
+		 */
+		if (curproc->p_zone != global_zone &&
+		    (flags & VMUSAGE_SYSTEM) != 0 &&
+		    result->vmu_type == VMUSAGE_ZONE) {
+			count++;
+			if (out_result != NULL) {
+				if (bufsize < count) {
+					ret = set_errno(EOVERFLOW);
+				} else {
+					dummy = *result;
+					dummy.vmu_zoneid = ALL_ZONES;
+					dummy.vmu_id = 0;
+					dummy.vmu_type = VMUSAGE_SYSTEM;
+					if (copyout(&dummy, out_result,
+					    sizeof (vmusage_t)))
+						return (set_errno(
+						    EFAULT));
+					out_result++;
+				}
+			}
+		}
+
+		/* Skip results that do not match requested type */
+		if ((result->vmu_type & types) == 0)
+			continue;
+
+		/* Skip collated results if not requested */
+		if (result->vmu_zoneid == ALL_ZONES) {
+			if (result->vmu_type == VMUSAGE_PROJECTS &&
+			    (flags & VMUSAGE_COL_PROJECTS) == 0)
+				continue;
+			if (result->vmu_type == VMUSAGE_EUSERS &&
+			    (flags & VMUSAGE_COL_EUSERS) == 0)
+				continue;
+			if (result->vmu_type == VMUSAGE_RUSERS &&
+			    (flags & VMUSAGE_COL_RUSERS) == 0)
+				continue;
+		}
+
+		/* Skip "other zone" results if not requested */
+		if (result->vmu_zoneid != curproc->p_zone->zone_id) {
+			if (result->vmu_type == VMUSAGE_ZONE &&
+			    (flags & VMUSAGE_ALL_ZONES) == 0)
+				continue;
+			if (result->vmu_type == VMUSAGE_PROJECTS &&
+			    (flags & (VMUSAGE_ALL_PROJECTS |
+			    VMUSAGE_COL_PROJECTS)) == 0)
+				continue;
+			if (result->vmu_type == VMUSAGE_TASKS &&
+			    (flags & VMUSAGE_ALL_TASKS) == 0)
+				continue;
+			if (result->vmu_type == VMUSAGE_RUSERS &&
+			    (flags & (VMUSAGE_ALL_RUSERS |
+			    VMUSAGE_COL_RUSERS)) == 0)
+				continue;
+			if (result->vmu_type == VMUSAGE_EUSERS &&
+			    (flags & (VMUSAGE_ALL_EUSERS |
+			    VMUSAGE_COL_EUSERS)) == 0)
+				continue;
+		}
+		count++;
+		if (out_result != NULL) {
+			if (bufsize < count) {
+				ret = set_errno(EOVERFLOW);
+			} else {
+				if (copyout(result, out_result,
+				    sizeof (vmusage_t)))
+					return (set_errno(EFAULT));
+				out_result++;
+			}
+		}
+	}
+	if (nres != NULL)
+		if (copyout(&count, (void *)nres, sizeof (size_t)))
+			return (set_errno(EFAULT));
+
+	return (ret);
+}
+
+/*
+ * vm_getusage()
+ *
+ * Counts rss and swap by zone, project, task, and/or user.  The flags argument
+ * determines the type of results structures returned.  Flags requesting
+ * results from more than one zone are "flattened" to the local zone if the
+ * caller is not the global zone.
+ *
+ * args:
+ *	flags:	bitmap consisting of one or more of VMUSAGE_*.
+ *	age:	maximum allowable age (time since counting was done) in
+ *		seconds of the results.  Results from previous callers are
+ *		cached in kernel.
+ *	buf:	pointer to buffer array of vmusage_t.  If NULL, then only nres
+ *		set on success.
+ *	nres:	Set to number of vmusage_t structures pointed to by buf
+ *		before calling vm_getusage().
+ *		On return 0 (success) or ENOSPC, is set to the number of result
+ *		structures returned or attempted to return.
+ *
+ * returns 0 on success, -1 on failure:
+ *	EINTR (interrupted)
+ *	ENOSPC (nres to small for results, nres set to needed value for success)
+ *	EINVAL (flags invalid)
+ *	EFAULT (bad address for buf or nres)
+ */
+int
+vm_getusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres)
+{
+	vmu_entity_t *entity;
+	vmusage_t *result;
+	int ret = 0;
+	int cacherecent = 0;
+	hrtime_t now;
+	uint_t flags_orig;
+
+	/*
+	 * Non-global zones cannot request system wide and/or collated
+	 * results, or the system result, so munge the flags accordingly.
+	 */
+	flags_orig = flags;
+	if (curproc->p_zone != global_zone) {
+		if (flags & (VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS)) {
+			flags &= ~(VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS);
+			flags |= VMUSAGE_PROJECTS;
+		}
+		if (flags & (VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS)) {
+			flags &= ~(VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS);
+			flags |= VMUSAGE_RUSERS;
+		}
+		if (flags & (VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS)) {
+			flags &= ~(VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS);
+			flags |= VMUSAGE_EUSERS;
+		}
+		if (flags & VMUSAGE_SYSTEM) {
+			flags &= ~VMUSAGE_SYSTEM;
+			flags |= VMUSAGE_ZONE;
+		}
+	}
+
+	/* Check for unknown flags */
+	if ((flags & (~VMUSAGE_MASK)) != 0)
+		return (set_errno(EINVAL));
+
+	/* Check for no flags */
+	if ((flags & VMUSAGE_MASK) == 0)
+		return (set_errno(EINVAL));
+
+	mutex_enter(&vmu_data.vmu_lock);
+	now = gethrtime();
+
+start:
+	if (vmu_data.vmu_cache != NULL) {
+
+		vmu_cache_t *cache;
+
+		if ((vmu_data.vmu_cache->vmc_timestamp +
+		    ((hrtime_t)age * NANOSEC)) > now)
+			cacherecent = 1;
+
+		if ((vmu_data.vmu_cache->vmc_flags & flags) == flags &&
+		    cacherecent == 1) {
+			cache = vmu_data.vmu_cache;
+			vmu_cache_hold(cache);
+			mutex_exit(&vmu_data.vmu_lock);
+
+			ret = vmu_copyout_results(cache, buf, nres, flags_orig);
+			mutex_enter(&vmu_data.vmu_lock);
+			vmu_cache_rele(cache);
+			if (vmu_data.vmu_pending_waiters > 0)
+				cv_broadcast(&vmu_data.vmu_cv);
+			mutex_exit(&vmu_data.vmu_lock);
+			return (ret);
+		}
+		/*
+		 * If the cache is recent, it is likely that there are other
+		 * consumers of vm_getusage running, so add their flags to the
+		 * desired flags for the calculation.
+		 */
+		if (cacherecent == 1)
+			flags = vmu_data.vmu_cache->vmc_flags | flags;
+	}
+	if (vmu_data.vmu_calc_thread == NULL) {
+
+		vmu_cache_t *cache;
+
+		vmu_data.vmu_calc_thread = curthread;
+		vmu_data.vmu_calc_flags = flags;
+		vmu_data.vmu_entities = NULL;
+		vmu_data.vmu_nentities = 0;
+		if (vmu_data.vmu_pending_waiters > 0)
+			vmu_data.vmu_calc_flags |=
+			    vmu_data.vmu_pending_flags;
+
+		vmu_data.vmu_pending_flags = 0;
+		mutex_exit(&vmu_data.vmu_lock);
+		vmu_calculate();
+		mutex_enter(&vmu_data.vmu_lock);
+		/* copy results to cache */
+		if (vmu_data.vmu_cache != NULL)
+			vmu_cache_rele(vmu_data.vmu_cache);
+		cache = vmu_data.vmu_cache =
+		    vmu_cache_alloc(vmu_data.vmu_nentities,
+			vmu_data.vmu_calc_flags);
+
+		result = cache->vmc_results;
+		for (entity = vmu_data.vmu_entities; entity != NULL;
+		    entity = entity->vme_next) {
+			*result = entity->vme_result;
+			result++;
+		}
+		cache->vmc_timestamp = gethrtime();
+		vmu_cache_hold(cache);
+
+		vmu_data.vmu_calc_flags = 0;
+		vmu_data.vmu_calc_thread = NULL;
+
+		if (vmu_data.vmu_pending_waiters > 0)
+			cv_broadcast(&vmu_data.vmu_cv);
+
+		mutex_exit(&vmu_data.vmu_lock);
+
+		/* copy cache */
+		ret = vmu_copyout_results(cache, buf, nres, flags_orig);
+		mutex_enter(&vmu_data.vmu_lock);
+		vmu_cache_rele(cache);
+		mutex_exit(&vmu_data.vmu_lock);
+
+		return (ret);
+	}
+	vmu_data.vmu_pending_flags |= flags;
+	vmu_data.vmu_pending_waiters++;
+	while (vmu_data.vmu_calc_thread != NULL) {
+		if (cv_wait_sig(&vmu_data.vmu_cv,
+		    &vmu_data.vmu_lock) == 0) {
+			vmu_data.vmu_pending_waiters--;
+			mutex_exit(&vmu_data.vmu_lock);
+			return (set_errno(EINTR));
+		}
+	}
+	vmu_data.vmu_pending_waiters--;
+	goto start;
+}