1 files changed, 1978 insertions, 0 deletions
diff --git a/usr/src/uts/common/vm/vm_usage.c b/usr/src/uts/common/vm/vm_usage.c
new file mode 100644
index 0000000000..32a8811e10
--- /dev/null
+++ b/usr/src/uts/common/vm/vm_usage.c
@@ -0,0 +1,1978 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * vm_usage
+ *
+ * This file implements the getvmusage() private system call.
+ * getvmusage() counts the amount of resident memory pages and swap
+ * reserved by the specified process collective. A "process collective" is
+ * the set of processes owned by a particular, zone, project, task, or user.
+ *
+ * rss and swap are counted so that for a given process collective, a page is
+ * only counted once.  For example, this means that if multiple processes in
+ * the same project map the same page, then the project will only be charged
+ * once for that page.  On the other hand, if two processes in different
+ * projects map the same page, then both projects will be charged
+ * for the page.
+ *
+ * The vm_getusage() calculation is implemented so that the first thread
+ * performs the rss/swap counting. Other callers will wait for that thread to
+ * finish, copying the results.  This enables multiple rcapds and prstats to
+ * consume data from the same calculation.  The results are also cached so that
+ * a caller interested in recent results can just copy them instead of starting
+ * a new calculation. The caller passes the maximium age (in seconds) of the
+ * data.  If the cached data is young enough, the cache is copied, otherwise,
+ * a new calculation is executed and the cache is replaced with the new
+ * data.
+ *
+ * The rss calculation for each process collective is as follows:
+ *
+ *   - Inspect flags, determine if counting rss for zones, projects, tasks,
+ *     and/or users.
+ *   - For each proc:
+ *	- Figure out proc's collectives (zone, project, task, and/or user).
+ *	- For each seg in proc's address space:
+ *		- If seg is private:
+ *			- Lookup anons in the amp.
+ *			- For incore pages not previously visited each of the
+ *			  proc's collectives, add incore pagesize to each.
+ *			  collective.
+ *			  Anon's with a refcnt of 1 can be assummed to be not
+ *			  previously visited.
+ *			- For address ranges without anons in the amp:
+ *				- Lookup pages in underlying vnode.
+ *				- For incore pages not previously visiting for
+ *				  each of the proc's collectives, add incore
+ *				  pagesize to each collective.
+ *		- If seg is shared:
+ *			- Lookup pages in the shared amp or vnode.
+ *			- For incore pages not previously visited for each of
+ *			  the proc's collectives, add incore pagesize to each
+ *			  collective.
+ *
+ * Swap is reserved by private segments, and shared anonymous segments.
+ * The only shared anon segments which do not reserve swap are ISM segments
+ * and schedctl segments, both of which can be identified by having
+ * amp->swresv == 0.
+ *
+ * The swap calculation for each collective is as follows:
+ *
+ *   - Inspect flags, determine if counting rss for zones, projects, tasks,
+ *     and/or users.
+ *   - For each proc:
+ *	- Figure out proc's collectives (zone, project, task, and/or user).
+ *	- For each seg in proc's address space:
+ *		- If seg is private:
+ *			- Add svd->swresv pages to swap count for each of the
+ *			  proc's collectives.
+ *		- If seg is anon, shared, and amp->swresv != 0
+ *			- For address ranges in amp not previously visited for
+ *			  each of the proc's collectives, add size of address
+ *			  range to the swap count for each collective.
+ *
+ * These two calculations are done simultaneously, with most of the work
+ * being done in vmu_calculate_seg().  The results of the calculation are
+ * copied into "vmu_data.vmu_cache_results".
+ *
+ * To perform the calculation, various things are tracked and cached:
+ *
+ *    - incore/not-incore page ranges for all vnodes.
+ *	(vmu_data.vmu_all_vnodes_hash)
+ *	This eliminates looking up the same page more than once.
+ *
+ *    - incore/not-incore page ranges for all shared amps.
+ *	(vmu_data.vmu_all_amps_hash)
+ *	This eliminates looking up the same page more than once.
+ *
+ *    - visited page ranges for each collective.
+ *	   - per vnode (entity->vme_vnode_hash)
+ *	   - per shared amp (entity->vme_amp_hash)
+ *	For accurate counting of map-shared and cow-shared pages.
+ *
+ *    - visited private anons (refcnt > 1) for each collective.
+ *	(entity->vme_anon_hash)
+ *	For accurate counting of cow-shared pages.
+ *
+ * The common accounting structure is the vmu_entity_t, which represents
+ * collectives:
+ *
+ *    - A zone.
+ *    - A project, task, or user within a zone.
+ *    - The entire system (vmu_data.vmu_system).
+ *    - Each collapsed (col) project and user.  This means a given projid or
+ *	uid, regardless of which zone the process is in.  For instance,
+ *      project 0 in the global zone and project 0 in a non global zone are
+ *	the same collapsed project.
+ *
+ *  Each entity structure tracks which pages have been already visited for
+ *  that entity (via previously inspected processes) so that these pages are
+ *  not double counted.
+ */
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/zone.h>
+#include <sys/proc.h>
+#include <sys/project.h>
+#include <sys/task.h>
+#include <sys/thread.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <sys/modhash.h>
+#include <sys/modhash_impl.h>
+#include <sys/shm.h>
+#include <sys/swap.h>
+#include <sys/synch.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/vm_usage.h>
+#include <sys/zone.h>
+#include <vm/anon.h>
+#include <vm/as.h>
+#include <vm/seg_vn.h>
+#include <vm/seg_spt.h>
+
+#define	VMUSAGE_HASH_SIZE		512
+
+#define	VMUSAGE_TYPE_VNODE		1
+#define	VMUSAGE_TYPE_AMP		2
+#define	VMUSAGE_TYPE_ANON		3
+
+#define	VMUSAGE_BOUND_UNKNOWN		0
+#define	VMUSAGE_BOUND_INCORE		1
+#define	VMUSAGE_BOUND_NOT_INCORE	2
+
+/*
+ * bounds for vnodes and shared amps
+ * Each bound is either entirely incore, entirely not in core, or
+ * entirely unknown.  bounds are stored in order by offset.
+ */
+typedef struct vmu_bound {
+	struct  vmu_bound *vmb_next;
+	pgcnt_t vmb_start;  /* page offset in vnode/amp on which bound starts */
+	pgcnt_t	vmb_end;    /* page offset in vnode/amp on which bound ends */
+	char	vmb_type;   /* One of VMUSAGE_BOUND_* */
+} vmu_bound_t;
+
+/*
+ * hash of visited objects (vnodes or shared amps)
+ * key is address of vnode or amp.  Bounds lists known incore/non-incore
+ * bounds for vnode/amp.
+ */
+typedef struct vmu_object {
+	struct vmu_object	*vmo_next;	/* free list */
+	caddr_t		vmo_key;
+	short		vmo_type;
+	vmu_bound_t	*vmo_bounds;
+} vmu_object_t;
+
+/*
+ * Entity by which to count results.
+ *
+ * The entity structure keeps the current rss/swap counts for each entity
+ * (zone, project, etc), and hashes of vm structures that have already
+ * been visited for the entity.
+ *
+ * vme_next:	links the list of all entities currently being counted by
+ *		vmu_calculate().
+ *
+ * vme_next_calc: links the list of entities related to the current process
+ *		 being counted by vmu_calculate_proc().
+ *
+ * vmu_calculate_proc() walks all processes.  For each process, it makes a
+ * list of the entities related to that process using vme_next_calc.  This
+ * list changes each time vmu_calculate_proc() is called.
+ *
+ */
+typedef struct vmu_entity {
+	struct vmu_entity *vme_next;
+	struct vmu_entity *vme_next_calc;
+	mod_hash_t	*vme_vnode_hash; /* vnodes visited for entity */
+	mod_hash_t	*vme_amp_hash;	 /* shared amps visited for entity */
+	mod_hash_t	*vme_anon_hash;	 /* cow anons visited for entity */
+	vmusage_t	vme_result;	 /* identifies entity and results */
+} vmu_entity_t;
+
+/*
+ * Hash of entities visited within a zone, and an entity for the zone
+ * itself.
+ */
+typedef struct vmu_zone {
+	struct vmu_zone	*vmz_next;	/* free list */
+	id_t		vmz_id;
+	vmu_entity_t	*vmz_zone;
+	mod_hash_t	*vmz_projects_hash;
+	mod_hash_t	*vmz_tasks_hash;
+	mod_hash_t	*vmz_rusers_hash;
+	mod_hash_t	*vmz_eusers_hash;
+} vmu_zone_t;
+
+/*
+ * Cache of results from last calculation
+ */
+typedef struct vmu_cache {
+	vmusage_t	*vmc_results;	/* Results from last call to */
+					/* vm_getusage(). */
+	uint64_t	vmc_nresults;	/* Count of cached results */
+	uint64_t	vmc_refcnt;	/* refcnt for free */
+	uint_t		vmc_flags;	/* Flags for vm_getusage() */
+	hrtime_t	vmc_timestamp;	/* when cache was created */
+} vmu_cache_t;
+
+/*
+ * top level rss info for the system
+ */
+typedef struct vmu_data {
+	kmutex_t	vmu_lock;		/* Protects vmu_data */
+	kcondvar_t	vmu_cv;			/* Used to signal threads */
+						/* Waiting for */
+						/* Rss_calc_thread to finish */
+	vmu_entity_t	*vmu_system;		/* Entity for tracking */
+						/* rss/swap for all processes */
+						/* in all zones */
+	mod_hash_t	*vmu_zones_hash;	/* Zones visited */
+	mod_hash_t	*vmu_projects_col_hash; /* These *_col_hash hashes */
+	mod_hash_t	*vmu_rusers_col_hash;	/* keep track of entities, */
+	mod_hash_t	*vmu_eusers_col_hash;	/* ignoring zoneid, in order */
+						/* to implement VMUSAGE_COL_* */
+						/* flags, which aggregate by */
+						/* project or user regardless */
+						/* of zoneid. */
+	mod_hash_t	*vmu_all_vnodes_hash;	/* System wide visited vnodes */
+						/* to track incore/not-incore */
+	mod_hash_t	*vmu_all_amps_hash;	/* System wide visited shared */
+						/* amps to track incore/not- */
+						/* incore */
+	vmu_entity_t	*vmu_entities;		/* Linked list of entities */
+	size_t		vmu_nentities;		/* Count of entities in list */
+	vmu_cache_t	*vmu_cache;		/* Cached results */
+	kthread_t	*vmu_calc_thread;	/* NULL, or thread running */
+						/* vmu_calculate() */
+	uint_t		vmu_calc_flags;		/* Flags being using by */
+						/* currently running calc */
+						/* thread */
+	uint_t		vmu_pending_flags;	/* Flags of vm_getusage() */
+						/* threads waiting for */
+						/* calc thread to finish */
+	uint_t		vmu_pending_waiters;	/* Number of threads waiting */
+						/* for calc thread */
+	vmu_bound_t	*vmu_free_bounds;
+	vmu_object_t	*vmu_free_objects;
+	vmu_entity_t	*vmu_free_entities;
+	vmu_zone_t	*vmu_free_zones;
+} vmu_data_t;
+
+extern struct as kas;
+extern proc_t *practive;
+extern zone_t *global_zone;
+extern struct seg_ops segvn_ops;
+extern struct seg_ops segspt_shmops;
+
+static vmu_data_t vmu_data;
+static kmem_cache_t *vmu_bound_cache;
+static kmem_cache_t *vmu_object_cache;
+
+/*
+ * Save a bound on the free list
+ */
+static void
+vmu_free_bound(vmu_bound_t *bound)
+{
+	bound->vmb_next = vmu_data.vmu_free_bounds;
+	vmu_data.vmu_free_bounds = bound;
+}
+
+/*
+ * Free an object, and all visited bound info.
+ */
+static void
+vmu_free_object(mod_hash_val_t val)
+{
+	vmu_object_t *obj = (vmu_object_t *)val;
+	vmu_bound_t *bound = obj->vmo_bounds;
+	vmu_bound_t *tmp;
+
+	while (bound != NULL) {
+		tmp = bound;
+		bound = bound->vmb_next;
+		vmu_free_bound(tmp);
+	}
+	obj->vmo_next = vmu_data.vmu_free_objects;
+	vmu_data.vmu_free_objects = obj;
+}
+
+/*
+ * Free an entity, and hashes of visited objects for that entity.
+ */
+static void
+vmu_free_entity(mod_hash_val_t val)
+{
+	vmu_entity_t *entity = (vmu_entity_t *)val;
+
+	if (entity->vme_vnode_hash != NULL)
+		i_mod_hash_clear_nosync(entity->vme_vnode_hash);
+	if (entity->vme_amp_hash != NULL)
+		i_mod_hash_clear_nosync(entity->vme_amp_hash);
+	if (entity->vme_anon_hash != NULL)
+		i_mod_hash_clear_nosync(entity->vme_anon_hash);
+
+	entity->vme_next = vmu_data.vmu_free_entities;
+	vmu_data.vmu_free_entities = entity;
+}
+
+/*
+ * Free zone entity, and all hashes of entities inside that zone,
+ * which are projects, tasks, and users.
+ */
+static void
+vmu_free_zone(mod_hash_val_t val)
+{
+	vmu_zone_t *zone = (vmu_zone_t *)val;
+
+	if (zone->vmz_zone != NULL) {
+		vmu_free_entity((mod_hash_val_t)zone->vmz_zone);
+		zone->vmz_zone = NULL;
+	}
+	if (zone->vmz_projects_hash != NULL)
+		i_mod_hash_clear_nosync(zone->vmz_projects_hash);
+	if (zone->vmz_tasks_hash != NULL)
+		i_mod_hash_clear_nosync(zone->vmz_tasks_hash);
+	if (zone->vmz_rusers_hash != NULL)
+		i_mod_hash_clear_nosync(zone->vmz_rusers_hash);
+	if (zone->vmz_eusers_hash != NULL)
+		i_mod_hash_clear_nosync(zone->vmz_eusers_hash);
+	zone->vmz_next = vmu_data.vmu_free_zones;
+	vmu_data.vmu_free_zones = zone;
+}
+
+/*
+ * Initialize synchronization primitives and hashes for system-wide tracking
+ * of visited vnodes and shared amps.  Initialize results cache.
+ */
+void
+vm_usage_init()
+{
+	mutex_init(&vmu_data.vmu_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&vmu_data.vmu_cv, NULL, CV_DEFAULT, NULL);
+
+	vmu_data.vmu_system = NULL;
+	vmu_data.vmu_zones_hash = NULL;
+	vmu_data.vmu_projects_col_hash = NULL;
+	vmu_data.vmu_rusers_col_hash = NULL;
+	vmu_data.vmu_eusers_col_hash = NULL;
+
+	vmu_data.vmu_free_bounds = NULL;
+	vmu_data.vmu_free_objects = NULL;
+	vmu_data.vmu_free_entities = NULL;
+	vmu_data.vmu_free_zones = NULL;
+
+	vmu_data.vmu_all_vnodes_hash = mod_hash_create_ptrhash(
+	    "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object,
+	    sizeof (vnode_t));
+	vmu_data.vmu_all_amps_hash = mod_hash_create_ptrhash(
+	    "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object,
+	    sizeof (struct anon_map));
+	vmu_data.vmu_projects_col_hash = mod_hash_create_idhash(
+	    "vmusage collapsed project hash", VMUSAGE_HASH_SIZE,
+	    vmu_free_entity);
+	vmu_data.vmu_rusers_col_hash = mod_hash_create_idhash(
+	    "vmusage collapsed ruser hash", VMUSAGE_HASH_SIZE,
+	    vmu_free_entity);
+	vmu_data.vmu_eusers_col_hash = mod_hash_create_idhash(
+	    "vmusage collpased euser hash", VMUSAGE_HASH_SIZE,
+	    vmu_free_entity);
+	vmu_data.vmu_zones_hash = mod_hash_create_idhash(
+	    "vmusage zone hash", VMUSAGE_HASH_SIZE, vmu_free_zone);
+
+	vmu_bound_cache = kmem_cache_create("vmu_bound_cache",
+	    sizeof (vmu_bound_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+	vmu_object_cache = kmem_cache_create("vmu_object_cache",
+	    sizeof (vmu_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+
+	vmu_data.vmu_entities = NULL;
+	vmu_data.vmu_nentities = 0;
+
+	vmu_data.vmu_cache = NULL;
+	vmu_data.vmu_calc_thread = NULL;
+	vmu_data.vmu_calc_flags = 0;
+	vmu_data.vmu_pending_flags = 0;
+	vmu_data.vmu_pending_waiters = 0;
+}
+
+/*
+ * Allocate hashes for tracking vm objects visited for an entity.
+ * Update list of entities.
+ */
+static vmu_entity_t *
+vmu_alloc_entity(id_t id, int type, id_t zoneid)
+{
+	vmu_entity_t *entity;
+
+	if (vmu_data.vmu_free_entities != NULL) {
+		entity = vmu_data.vmu_free_entities;
+		vmu_data.vmu_free_entities =
+		    vmu_data.vmu_free_entities->vme_next;
+		bzero(&entity->vme_result, sizeof (vmusage_t));
+	} else {
+		entity = kmem_zalloc(sizeof (vmu_entity_t), KM_SLEEP);
+	}
+	entity->vme_result.vmu_id = id;
+	entity->vme_result.vmu_zoneid = zoneid;
+	entity->vme_result.vmu_type = type;
+
+	if (entity->vme_vnode_hash == NULL)
+		entity->vme_vnode_hash = mod_hash_create_ptrhash(
+		    "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object,
+		    sizeof (vnode_t));
+
+	if (entity->vme_amp_hash == NULL)
+		entity->vme_amp_hash = mod_hash_create_ptrhash(
+		    "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object,
+		    sizeof (struct anon_map));
+
+	if (entity->vme_anon_hash == NULL)
+		entity->vme_anon_hash = mod_hash_create_ptrhash(
+		    "vmusage anon hash", VMUSAGE_HASH_SIZE,
+		    mod_hash_null_valdtor, sizeof (struct anon));
+
+	entity->vme_next = vmu_data.vmu_entities;
+	vmu_data.vmu_entities = entity;
+	vmu_data.vmu_nentities++;
+
+	return (entity);
+}
+
+/*
+ * Allocate a zone entity, and hashes for tracking visited vm objects
+ * for projects, tasks, and users within that zone.
+ */
+static vmu_zone_t *
+vmu_alloc_zone(id_t id)
+{
+	vmu_zone_t *zone;
+
+	if (vmu_data.vmu_free_zones != NULL) {
+		zone = vmu_data.vmu_free_zones;
+		vmu_data.vmu_free_zones =
+		    vmu_data.vmu_free_zones->vmz_next;
+		zone->vmz_next = NULL;
+		zone->vmz_zone = NULL;
+	} else {
+		zone = kmem_zalloc(sizeof (vmu_zone_t), KM_SLEEP);
+	}
+
+	zone->vmz_id = id;
+
+	if ((vmu_data.vmu_calc_flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES)) != 0)
+		zone->vmz_zone = vmu_alloc_entity(id, VMUSAGE_ZONE, id);
+
+	if ((vmu_data.vmu_calc_flags & (VMUSAGE_PROJECTS |
+	    VMUSAGE_ALL_PROJECTS)) != 0 && zone->vmz_projects_hash == NULL)
+		zone->vmz_projects_hash = mod_hash_create_idhash(
+		    "vmusage project hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
+
+	if ((vmu_data.vmu_calc_flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS))
+	    != 0 && zone->vmz_tasks_hash == NULL)
+		zone->vmz_tasks_hash = mod_hash_create_idhash(
+		    "vmusage task hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
+
+	if ((vmu_data.vmu_calc_flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS))
+	    != 0 && zone->vmz_rusers_hash == NULL)
+		zone->vmz_rusers_hash = mod_hash_create_idhash(
+		    "vmusage ruser hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
+
+	if ((vmu_data.vmu_calc_flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS))
+	    != 0 && zone->vmz_eusers_hash == NULL)
+		zone->vmz_eusers_hash = mod_hash_create_idhash(
+		    "vmusage euser hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
+
+	return (zone);
+}
+
+/*
+ * Allocate a structure for tracking visited bounds for a vm object.
+ */
+static vmu_object_t *
+vmu_alloc_object(caddr_t key, int type)
+{
+	vmu_object_t *object;
+
+	if (vmu_data.vmu_free_objects != NULL) {
+		object = vmu_data.vmu_free_objects;
+		vmu_data.vmu_free_objects =
+		    vmu_data.vmu_free_objects->vmo_next;
+	} else {
+		object = kmem_cache_alloc(vmu_object_cache, KM_SLEEP);
+	}
+
+	object->vmo_key = key;
+	object->vmo_type = type;
+	object->vmo_bounds = NULL;
+
+	return (object);
+}
+
+/*
+ * Allocate and return a bound structure.
+ */
+static vmu_bound_t *
+vmu_alloc_bound()
+{
+	vmu_bound_t *bound;
+
+	if (vmu_data.vmu_free_bounds != NULL) {
+		bound = vmu_data.vmu_free_bounds;
+		vmu_data.vmu_free_bounds =
+		    vmu_data.vmu_free_bounds->vmb_next;
+		bzero(bound, sizeof (vmu_bound_t));
+	} else {
+		bound = kmem_cache_alloc(vmu_bound_cache, KM_SLEEP);
+		bzero(bound, sizeof (vmu_bound_t));
+	}
+	return (bound);
+}
+
+/*
+ * vmu_find_insert_* functions implement hash lookup or allocate and
+ * insert operations.
+ */
+static vmu_object_t *
+vmu_find_insert_object(mod_hash_t *hash, caddr_t key, uint_t type)
+{
+	int ret;
+	vmu_object_t *object;
+
+	ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key,
+	    (mod_hash_val_t *)&object);
+	if (ret != 0) {
+		object = vmu_alloc_object(key, type);
+		ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key,
+		    (mod_hash_val_t)object, (mod_hash_hndl_t)0);
+		ASSERT(ret == 0);
+	}
+	return (object);
+}
+
+static int
+vmu_find_insert_anon(mod_hash_t *hash, caddr_t key)
+{
+	int ret;
+	caddr_t val;
+
+	ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key,
+	    (mod_hash_val_t *)&val);
+
+	if (ret == 0)
+		return (0);
+
+	ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key,
+	    (mod_hash_val_t)key, (mod_hash_hndl_t)0);
+
+	ASSERT(ret == 0);
+
+	return (1);
+}
+
+static vmu_entity_t *
+vmu_find_insert_entity(mod_hash_t *hash, id_t id, uint_t type, id_t zoneid)
+{
+	int ret;
+	vmu_entity_t *entity;
+
+	ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)(uintptr_t)id,
+	    (mod_hash_val_t *)&entity);
+	if (ret != 0) {
+		entity = vmu_alloc_entity(id, type, zoneid);
+		ret = i_mod_hash_insert_nosync(hash,
+		    (mod_hash_key_t)(uintptr_t)id, (mod_hash_val_t)entity,
+		    (mod_hash_hndl_t)0);
+		ASSERT(ret == 0);
+	}
+	return (entity);
+}
+
+
+
+
+/*
+ * Returns list of object bounds between start and end.  New bounds inserted
+ * by this call are given type.
+ *
+ * Returns the number of pages covered if new bounds are created.  Returns 0
+ * if region between start/end consists of all existing bounds.
+ */
+static pgcnt_t
+vmu_insert_lookup_object_bounds(vmu_object_t *ro, pgcnt_t start, pgcnt_t
+    end, char type, vmu_bound_t **first, vmu_bound_t **last)
+{
+	vmu_bound_t *next;
+	vmu_bound_t *prev = NULL;
+	vmu_bound_t *tmp = NULL;
+	pgcnt_t ret = 0;
+
+	*first = *last = NULL;
+
+	for (next = ro->vmo_bounds; next != NULL; next = next->vmb_next) {
+		/*
+		 * Find bounds overlapping or overlapped by range [start,end].
+		 */
+		if (start > next->vmb_end) {
+			/* bound is before new bound */
+			prev = next;
+			continue;
+		}
+		if (next->vmb_start > end) {
+			/* bound is after new bound */
+			break;
+		}
+		if (*first == NULL)
+			*first = next;
+		*last = next;
+	}
+
+	if (*first == NULL) {
+		ASSERT(*last == NULL);
+		/*
+		 * No bounds overlapping range [start,end], so create new
+		 * bound
+		 */
+		tmp = vmu_alloc_bound();
+		tmp->vmb_start = start;
+		tmp->vmb_end = end;
+		tmp->vmb_type = type;
+		if (prev == NULL) {
+			tmp->vmb_next = ro->vmo_bounds;
+			ro->vmo_bounds = tmp;
+		} else {
+			tmp->vmb_next = prev->vmb_next;
+			prev->vmb_next = tmp;
+		}
+		*first = tmp;
+		*last = tmp;
+		ASSERT(tmp->vmb_end >= tmp->vmb_start);
+		ret = tmp->vmb_end - tmp->vmb_start + 1;
+		return (ret);
+	}
+
+	/* Check to see if start is before first known bound */
+	ASSERT(first != NULL && last != NULL);
+	next = (*first);
+	if (start < (*first)->vmb_start) {
+		/* Create new bound before first bound */
+		tmp = vmu_alloc_bound();
+		tmp->vmb_start = start;
+		tmp->vmb_end = (*first)->vmb_start - 1;
+		tmp->vmb_type = type;
+		tmp->vmb_next = *first;
+		if (*first == ro->vmo_bounds)
+			ro->vmo_bounds = tmp;
+		if (prev != NULL)
+			prev->vmb_next = tmp;
+		ASSERT(tmp->vmb_end >= tmp->vmb_start);
+		ret += tmp->vmb_end - tmp->vmb_start + 1;
+		*first = tmp;
+	}
+	/*
+	 * Between start and end, search for gaps between and after existing
+	 * bounds.  Create new bounds to fill gaps if they exist.
+	 */
+	while (end > next->vmb_end) {
+		/*
+		 * Check for gap between bound and next bound. if no gap,
+		 * continue.
+		 */
+		if ((next != *last) &&
+		    ((next->vmb_end + 1) == next->vmb_next->vmb_start)) {
+			next = next->vmb_next;
+			continue;
+		}
+		/*
+		 * Insert new bound in gap after bound, and before next
+		 * bound if next bound exists.
+		 */
+		tmp = vmu_alloc_bound();
+		tmp->vmb_type = type;
+		tmp->vmb_next = next->vmb_next;
+		tmp->vmb_start = next->vmb_end + 1;
+
+		if (next != *last) {
+			tmp->vmb_end = next->vmb_next->vmb_start - 1;
+			ASSERT(tmp->vmb_end >= tmp->vmb_start);
+			ret += tmp->vmb_end - tmp->vmb_start + 1;
+			next->vmb_next = tmp;
+			next = tmp->vmb_next;
+		} else {
+			tmp->vmb_end = end;
+			ASSERT(tmp->vmb_end >= tmp->vmb_start);
+			ret += tmp->vmb_end - tmp->vmb_start + 1;
+			next->vmb_next = tmp;
+			*last = tmp;
+			break;
+		}
+	}
+	return (ret);
+}
+
+/*
+ * vmu_update_bounds()
+ *
+ * first, last:	list of continuous bounds, of which zero or more are of
+ * 		type VMUSAGE_BOUND_UNKNOWN.
+ *
+ * new_first, new_last:	list of continuous bounds, of which none are of
+ *			type VMUSAGE_BOUND_UNKNOWN.  These bounds are used to
+ *			update the types of bounds in (first,last) with
+ *			type VMUSAGE_BOUND_UNKNOWN.
+ *
+ * For the list of bounds (first,last), this function updates any bounds
+ * with type VMUSAGE_BOUND_UNKNOWN using the type of the corresponding bound in
+ * the list (new_first, new_last).
+ *
+ * If a bound of type VMUSAGE_BOUND_UNKNOWN spans multiple bounds in the list
+ * (new_first, new_last), it will be split into multiple bounds.
+ *
+ * Return value:
+ * 	The number of pages in the list of bounds (first,last) that were of
+ *	type VMUSAGE_BOUND_UNKNOWN, which have been updated to be of type
+ *	VMUSAGE_BOUND_INCORE.
+ *
+ */
+static pgcnt_t
+vmu_update_bounds(vmu_bound_t **first, vmu_bound_t **last,
+    vmu_bound_t *new_first, vmu_bound_t *new_last)
+{
+	vmu_bound_t *next, *new_next, *tmp;
+	pgcnt_t rss = 0;
+
+	next = *first;
+	new_next = new_first;
+
+	/* verify bounds span same pages */
+	ASSERT((*first)->vmb_start >= new_next->vmb_start);
+	ASSERT((*last)->vmb_end <= new_last->vmb_end);
+	for (;;) {
+		/* If bound already has type, proceed to next bound */
+		if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
+			if (next == *last)
+				break;
+			next = next->vmb_next;
+			continue;
+		}
+		while (new_next->vmb_end < next->vmb_start)
+			new_next = new_next->vmb_next;
+		ASSERT(new_next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
+		next->vmb_type = new_next->vmb_type;
+		if (new_next->vmb_end < next->vmb_end) {
+			/* need to split bound */
+			tmp = vmu_alloc_bound();
+			tmp->vmb_type = VMUSAGE_BOUND_UNKNOWN;
+			tmp->vmb_start = new_next->vmb_end + 1;
+			tmp->vmb_end = next->vmb_end;
+			tmp->vmb_next = next->vmb_next;
+			next->vmb_end = new_next->vmb_end;
+			next->vmb_next = tmp;
+			if (*last == next)
+				*last = tmp;
+			if (next->vmb_type == VMUSAGE_BOUND_INCORE)
+				rss += next->vmb_end - next->vmb_start + 1;
+			next = tmp;
+		} else {
+			if (next->vmb_type == VMUSAGE_BOUND_INCORE)
+				rss += next->vmb_end - next->vmb_start + 1;
+			if (next == *last)
+				break;
+			next = next->vmb_next;
+		}
+	}
+	return (rss);
+}
+
+/*
+ * merges adjacent bounds with same type between first and last bound.
+ * After merge, last pointer is no longer valid, as last bound may be
+ * merged away.
+ */
+static void
+vmu_merge_bounds(vmu_bound_t **first, vmu_bound_t **last)
+{
+	vmu_bound_t *next;
+	vmu_bound_t *tmp;
+
+	ASSERT(*first != NULL);
+	ASSERT(*last != NULL);
+
+	next = *first;
+	while (next != *last) {
+
+		/* If bounds are adjacent and have same type, merge them */
+		if (((next->vmb_end + 1) == next->vmb_next->vmb_start) &&
+		    (next->vmb_type == next->vmb_next->vmb_type)) {
+			tmp = next->vmb_next;
+			next->vmb_end = tmp->vmb_end;
+			next->vmb_next = tmp->vmb_next;
+			vmu_free_bound(tmp);
+			if (tmp == *last)
+				*last = next;
+		} else {
+			next = next->vmb_next;
+		}
+	}
+}
+
+/*
+ * Given an amp and a list of bounds, updates each bound's type with
+ * VMUSAGE_BOUND_INCORE or VMUSAGE_BOUND_NOT_INCORE.
+ *
+ * If a bound is partially incore, it will be split into two bounds.
+ * first and last may be modified, as bounds may be split into multiple
+ * bounds if the are partially incore/not-incore.
+ *
+ * Set incore to non-zero if bounds are already known to be incore
+ *
+ */
+static void
+vmu_amp_update_incore_bounds(struct anon_map *amp, vmu_bound_t **first,
+    vmu_bound_t **last, boolean_t incore)
+{
+	vmu_bound_t *next;
+	vmu_bound_t *tmp;
+	pgcnt_t index;
+	short bound_type;
+	short page_type;
+	vnode_t *vn;
+	anoff_t off;
+	struct anon *ap;
+
+	next = *first;
+	/* Shared anon slots don't change once set */
+	ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
+	for (;;) {
+		if (incore == B_TRUE)
+			next->vmb_type = VMUSAGE_BOUND_INCORE;
+
+		if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
+			if (next == *last)
+				break;
+			next = next->vmb_next;
+			continue;
+		}
+		bound_type = next->vmb_type;
+		index = next->vmb_start;
+		while (index <= next->vmb_end) {
+
+			/*
+			 * These are used to determine how much to increment
+			 * index when a large page is found.
+			 */
+			page_t *page;
+			pgcnt_t pgcnt = 1;
+			uint_t pgshft;
+			pgcnt_t pgmsk;
+
+			ap = anon_get_ptr(amp->ahp, index);
+			if (ap != NULL)
+				swap_xlate(ap, &vn, &off);
+
+			if (ap != NULL && vn != NULL && vn->v_pages != NULL &&
+			    (page = page_exists(vn, off)) != NULL) {
+				page_type = VMUSAGE_BOUND_INCORE;
+				if (page->p_szc > 0) {
+					pgcnt = page_get_pagecnt(page->p_szc);
+					pgshft = page_get_shift(page->p_szc);
+					pgmsk = (0x1 << (pgshft - PAGESHIFT))
+					    - 1;
+				}
+			} else {
+				page_type = VMUSAGE_BOUND_NOT_INCORE;
+			}
+			if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
+				next->vmb_type = page_type;
+			} else if (next->vmb_type != page_type) {
+				/*
+				 * if current bound type does not match page
+				 * type, need to split off new bound.
+				 */
+				tmp = vmu_alloc_bound();
+				tmp->vmb_type = page_type;
+				tmp->vmb_start = index;
+				tmp->vmb_end = next->vmb_end;
+				tmp->vmb_next = next->vmb_next;
+				next->vmb_end = index - 1;
+				next->vmb_next = tmp;
+				if (*last == next)
+					*last = tmp;
+				next = tmp;
+			}
+			if (pgcnt > 1) {
+				/*
+				 * If inside large page, jump to next large
+				 * page
+				 */
+				index = (index & ~pgmsk) + pgcnt;
+			} else {
+				index++;
+			}
+		}
+		if (next == *last) {
+			ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
+			break;
+		} else
+			next = next->vmb_next;
+	}
+	ANON_LOCK_EXIT(&amp->a_rwlock);
+}
+
+/*
+ * Same as vmu_amp_update_incore_bounds(), except for tracking
+ * incore-/not-incore for vnodes.
+ */
+static void
+vmu_vnode_update_incore_bounds(vnode_t *vnode, vmu_bound_t **first,
+    vmu_bound_t **last)
+{
+	vmu_bound_t *next;
+	vmu_bound_t *tmp;
+	pgcnt_t index;
+	short bound_type;
+	short page_type;
+
+	next = *first;
+	for (;;) {
+		if (vnode->v_pages == NULL)
+			next->vmb_type = VMUSAGE_BOUND_NOT_INCORE;
+
+		if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
+			if (next == *last)
+				break;
+			next = next->vmb_next;
+			continue;
+		}
+
+		bound_type = next->vmb_type;
+		index = next->vmb_start;
+		while (index <= next->vmb_end) {
+
+			/*
+			 * These are used to determine how much to increment
+			 * index when a large page is found.
+			 */
+			page_t *page;
+			pgcnt_t pgcnt = 1;
+			uint_t pgshft;
+			pgcnt_t pgmsk;
+
+			if (vnode->v_pages != NULL &&
+			    (page = page_exists(vnode, ptob(index))) != NULL) {
+				page_type = VMUSAGE_BOUND_INCORE;
+				if (page->p_szc > 0) {
+					pgcnt = page_get_pagecnt(page->p_szc);
+					pgshft = page_get_shift(page->p_szc);
+					pgmsk = (0x1 << (pgshft - PAGESHIFT))
+					    - 1;
+				}
+			} else {
+				page_type = VMUSAGE_BOUND_NOT_INCORE;
+			}
+			if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
+				next->vmb_type = page_type;
+			} else if (next->vmb_type != page_type) {
+				/*
+				 * if current bound type does not match page
+				 * type, need to split off new bound.
+				 */
+				tmp = vmu_alloc_bound();
+				tmp->vmb_type = page_type;
+				tmp->vmb_start = index;
+				tmp->vmb_end = next->vmb_end;
+				tmp->vmb_next = next->vmb_next;
+				next->vmb_end = index - 1;
+				next->vmb_next = tmp;
+				if (*last == next)
+					*last = tmp;
+				next = tmp;
+			}
+			if (pgcnt > 1) {
+				/*
+				 * If inside large page, jump to next large
+				 * page
+				 */
+				index = (index & ~pgmsk) + pgcnt;
+			} else {
+				index++;
+			}
+		}
+		if (next == *last) {
+			ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
+			break;
+		} else
+			next = next->vmb_next;
+	}
+}
+
+/*
+ * Calculate the rss and swap consumed by a segment.  vmu_entities is the
+ * list of entities to visit.  For shared segments, the vnode or amp
+ * is looked up in each entity to see if has been already counted.  Private
+ * anon pages are checked per entity to ensure that cow pages are not
+ * double counted.
+ *
+ * For private mapped files, first the amp is checked for private pages.
+ * Bounds not backed by the amp are looked up in the vnode for each entity
+ * to avoid double counting of private COW vnode pages.
+ */
+static void
+vmu_calculate_seg(vmu_entity_t *vmu_entities, struct seg *seg)
+{
+	struct segvn_data *svd;
+	struct shm_data *shmd;
+	struct spt_data *sptd;
+	vmu_object_t *shared_object = NULL;
+	vmu_object_t *entity_object = NULL;
+	vmu_entity_t *entity;
+	vmusage_t *result;
+	vmu_bound_t *first = NULL;
+	vmu_bound_t *last = NULL;
+	vmu_bound_t *cur = NULL;
+	vmu_bound_t *e_first = NULL;
+	vmu_bound_t *e_last = NULL;
+	vmu_bound_t *tmp;
+	pgcnt_t p_index, s_index, p_start, p_end, s_start, s_end, rss, virt;
+	struct anon_map *private_amp = NULL;
+	boolean_t incore = B_FALSE;
+	boolean_t shared = B_FALSE;
+	int file = 0;
+	pgcnt_t swresv = 0;
+	pgcnt_t panon = 0;
+
+	/* Can zero-length segments exist?  Not sure, so parenoia */
+	if (seg->s_size <= 0)
+		return;
+
+	/*
+	 * Figure out if there is a shared object (such as a named vnode or
+	 * a shared amp, then figure out if there is a private amp, which
+	 * identifies private pages.
+	 */
+	if (seg->s_ops == &segvn_ops) {
+		svd = (struct segvn_data *)seg->s_data;
+		if (svd->type == MAP_SHARED)
+			shared = B_TRUE;
+		else
+			swresv = svd->swresv;
+
+		if (svd->vp != NULL) {
+			file = 1;
+			shared_object = vmu_find_insert_object(
+			    vmu_data.vmu_all_vnodes_hash, (caddr_t)svd->vp,
+			    VMUSAGE_TYPE_VNODE);
+			s_start = btop(svd->offset);
+			s_end = btop(svd->offset + seg->s_size) - 1;
+		}
+		if (svd->amp != NULL && svd->type == MAP_SHARED) {
+			ASSERT(shared_object == NULL);
+			shared_object = vmu_find_insert_object(
+			    vmu_data.vmu_all_amps_hash, (caddr_t)svd->amp,
+			    VMUSAGE_TYPE_AMP);
+			s_start = svd->anon_index;
+			s_end = svd->anon_index + btop(seg->s_size) - 1;
+			/* schedctl mappings are always in core */
+			if (svd->amp->swresv == 0)
+				incore = B_TRUE;
+		}
+		if (svd->amp != NULL && svd->type == MAP_PRIVATE) {
+			private_amp = svd->amp;
+			p_start = svd->anon_index;
+			p_end = svd->anon_index + btop(seg->s_size) - 1;
+		}
+	} else if (seg->s_ops == &segspt_shmops) {
+		shared = B_TRUE;
+		shmd = (struct shm_data *)seg->s_data;
+		shared_object = vmu_find_insert_object(
+		    vmu_data.vmu_all_amps_hash, (caddr_t)shmd->shm_amp,
+		    VMUSAGE_TYPE_AMP);
+		s_start = 0;
+		s_end = btop(seg->s_size) - 1;
+		sptd = shmd->shm_sptseg->s_data;
+
+		/* ism segments are always incore and do not reserve swap */
+		if (sptd->spt_flags & SHM_SHARE_MMU)
+			incore = B_TRUE;
+
+	} else {
+		return;
+	}
+
+	/*
+	 * If there is a private amp, count anon pages that exist.  If an
+	 * anon has a refcnt > 1 (cow sharing), then save the anon in a
+	 * hash so that it is not double counted.
+	 *
+	 * If there is also a shared object, they figure out the bounds
+	 * which are not mapped by the private amp.
+	 */
+	if (private_amp != NULL) {
+
+		/* Enter as writer to prevent cow anons from being freed */
+		ANON_LOCK_ENTER(&private_amp->a_rwlock, RW_WRITER);
+
+		p_index = p_start;
+		s_index = s_start;
+
+		while (p_index <= p_end) {
+
+			pgcnt_t p_index_next;
+			pgcnt_t p_bound_size;
+			int cnt;
+			anoff_t off;
+			struct vnode *vn;
+			struct anon *ap;
+			page_t *page;		/* For handling of large */
+			pgcnt_t pgcnt = 1;	/* pages */
+			pgcnt_t pgstart;
+			pgcnt_t pgend;
+			uint_t pgshft;
+			pgcnt_t pgmsk;
+
+			p_index_next = p_index;
+			ap = anon_get_next_ptr(private_amp->ahp,
+			    &p_index_next);
+
+			/*
+			 * If next anon is past end of mapping, simulate
+			 * end of anon so loop terminates.
+			 */
+			if (p_index_next > p_end) {
+				p_index_next = p_end + 1;
+				ap = NULL;
+			}
+			/*
+			 * For cow segments, keep track of bounds not
+			 * backed by private amp so they can be looked
+			 * up in the backing vnode
+			 */
+			if (p_index_next != p_index) {
+
+				/*
+				 * Compute index difference between anon and
+				 * previous anon.
+				 */
+				p_bound_size = p_index_next - p_index - 1;
+
+				if (shared_object != NULL) {
+					cur = vmu_alloc_bound();
+					cur->vmb_next = NULL;
+					cur->vmb_start = s_index;
+					cur->vmb_end = s_index + p_bound_size;
+					cur->vmb_type = VMUSAGE_BOUND_UNKNOWN;
+					if (first == NULL) {
+						first = cur;
+						last = cur;
+					} else {
+						last->vmb_next = cur;
+						last = cur;
+					}
+				}
+				p_index = p_index + p_bound_size + 1;
+				s_index = s_index + p_bound_size + 1;
+			}
+
+			/* Detect end of anons in amp */
+			if (ap == NULL)
+				break;
+
+			cnt = ap->an_refcnt;
+			swap_xlate(ap, &vn, &off);
+
+			if (vn == NULL || vn->v_pages == NULL ||
+			    (page = page_exists(vn, off)) == NULL) {
+				p_index++;
+				s_index++;
+				continue;
+			}
+
+			/*
+			 * If large page is found, compute portion of large
+			 * page in mapping, and increment indicies to the next
+			 * large page.
+			 */
+			if (page->p_szc > 0) {
+
+				pgcnt = page_get_pagecnt(page->p_szc);
+				pgshft = page_get_shift(page->p_szc);
+				pgmsk = (0x1 << (pgshft - PAGESHIFT)) - 1;
+
+				/* First page in large page */
+				pgstart = p_index & ~pgmsk;
+				/* Last page in large page */
+				pgend = pgstart + pgcnt - 1;
+				/*
+				 * Artifically end page if page extends past
+				 * end of mapping.
+				 */
+				if (pgend > p_end)
+					pgend = p_end;
+
+				/*
+				 * Compute number of pages from large page
+				 * which are mapped.
+				 */
+				pgcnt = pgend - p_index + 1;
+
+				/*
+				 * Point indicies at page after large page,
+				 * or at page after end of mapping.
+				 */
+				p_index += pgcnt;
+				s_index += pgcnt;
+			} else {
+				p_index++;
+				s_index++;
+			}
+
+			/*
+			 * Assume anon structs with a refcnt
+			 * of 1 are not cow shared, so there
+			 * is no reason to track them per entity.
+			 */
+			if (cnt == 1) {
+				panon += pgcnt;
+				continue;
+			}
+			for (entity = vmu_entities; entity != NULL;
+			    entity = entity->vme_next_calc) {
+
+				result = &entity->vme_result;
+				/*
+				 * Track cow anons per entity so
+				 * they are not double counted.
+				 */
+				if (vmu_find_insert_anon(entity->vme_anon_hash,
+				    (caddr_t)ap) == 0)
+					continue;
+
+				result->vmu_rss_all += (pgcnt << PAGESHIFT);
+				result->vmu_rss_private +=
+				    (pgcnt << PAGESHIFT);
+			}
+		}
+		ANON_LOCK_EXIT(&private_amp->a_rwlock);
+	}
+
+	/* Add up resident anon and swap reserved for private mappings */
+	if (swresv > 0 || panon > 0) {
+		for (entity = vmu_entities; entity != NULL;
+		    entity = entity->vme_next_calc) {
+			result = &entity->vme_result;
+			result->vmu_swap_all += swresv;
+			result->vmu_swap_private += swresv;
+			result->vmu_rss_all += (panon << PAGESHIFT);
+			result->vmu_rss_private += (panon << PAGESHIFT);
+		}
+	}
+
+	/* Compute resident pages backing shared amp or named vnode */
+	if (shared_object != NULL) {
+		if (first == NULL) {
+			/*
+			 * No private amp, or private amp has no anon
+			 * structs.  This means entire segment is backed by
+			 * the shared object.
+			 */
+			first = vmu_alloc_bound();
+			first->vmb_next = NULL;
+			first->vmb_start = s_start;
+			first->vmb_end = s_end;
+			first->vmb_type = VMUSAGE_BOUND_UNKNOWN;
+		}
+		/*
+		 * Iterate bounds not backed by private amp, and compute
+		 * resident pages.
+		 */
+		cur = first;
+		while (cur != NULL) {
+
+			if (vmu_insert_lookup_object_bounds(shared_object,
+			    cur->vmb_start, cur->vmb_end, VMUSAGE_BOUND_UNKNOWN,
+			    &first, &last) > 0) {
+				/* new bounds, find incore/not-incore */
+				if (shared_object->vmo_type ==
+				    VMUSAGE_TYPE_VNODE)
+					vmu_vnode_update_incore_bounds(
+					    (vnode_t *)
+					    shared_object->vmo_key, &first,
+					    &last);
+				else
+					vmu_amp_update_incore_bounds(
+					    (struct anon_map *)
+					    shared_object->vmo_key, &first,
+					    &last, incore);
+				vmu_merge_bounds(&first, &last);
+			}
+			for (entity = vmu_entities; entity != NULL;
+			    entity = entity->vme_next_calc) {
+
+				result = &entity->vme_result;
+
+				entity_object = vmu_find_insert_object(
+				    shared_object->vmo_type ==
+				    VMUSAGE_TYPE_VNODE ? entity->vme_vnode_hash:
+					entity->vme_amp_hash,
+					shared_object->vmo_key,
+					shared_object->vmo_type);
+
+				virt = vmu_insert_lookup_object_bounds(
+				    entity_object, cur->vmb_start, cur->vmb_end,
+				    VMUSAGE_BOUND_UNKNOWN, &e_first, &e_last);
+
+				if (virt == 0)
+					continue;
+				/*
+				 * Range visited for this entity
+				 */
+				rss = vmu_update_bounds(&e_first,
+				    &e_last, first, last);
+				result->vmu_rss_all += (rss << PAGESHIFT);
+				if (shared == B_TRUE && file == B_FALSE) {
+					/* shared anon mapping */
+					result->vmu_swap_all +=
+					    (virt << PAGESHIFT);
+					result->vmu_swap_shared +=
+					    (virt << PAGESHIFT);
+					result->vmu_rss_shared +=
+					    (rss << PAGESHIFT);
+				} else if (shared == B_TRUE && file == B_TRUE) {
+					/* shared file mapping */
+					result->vmu_rss_shared +=
+					    (rss << PAGESHIFT);
+				} else if (shared == B_FALSE &&
+				    file == B_TRUE) {
+					/* private file mapping */
+					result->vmu_rss_private +=
+					    (rss << PAGESHIFT);
+				}
+				vmu_merge_bounds(&e_first, &e_last);
+			}
+			tmp = cur;
+			cur = cur->vmb_next;
+			vmu_free_bound(tmp);
+		}
+	}
+}
+
+/*
+ * Based on the current calculation flags, find the relevant entities
+ * which are relative to the process.  Then calculate each segment
+ * in the process'es address space for each relevant entity.
+ */
+static void
+vmu_calculate_proc(proc_t *p)
+{
+	vmu_entity_t *entities = NULL;
+	vmu_zone_t *zone;
+	vmu_entity_t *tmp;
+	struct as *as;
+	struct seg *seg;
+	int ret;
+
+	/* Figure out which entities are being computed */
+	if ((vmu_data.vmu_system) != NULL) {
+		tmp = vmu_data.vmu_system;
+		tmp->vme_next_calc = entities;
+		entities = tmp;
+	}
+	if (vmu_data.vmu_calc_flags &
+	    (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_PROJECTS |
+	    VMUSAGE_ALL_PROJECTS | VMUSAGE_TASKS | VMUSAGE_ALL_TASKS |
+	    VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_EUSERS |
+	    VMUSAGE_ALL_EUSERS)) {
+		ret = i_mod_hash_find_nosync(vmu_data.vmu_zones_hash,
+		    (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id,
+		    (mod_hash_val_t *)&zone);
+		if (ret != 0) {
+			zone = vmu_alloc_zone(p->p_zone->zone_id);
+			ret = i_mod_hash_insert_nosync(vmu_data.vmu_zones_hash,
+			    (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id,
+			    (mod_hash_val_t)zone, (mod_hash_hndl_t)0);
+			ASSERT(ret == 0);
+		}
+		if (zone->vmz_zone != NULL) {
+			tmp = zone->vmz_zone;
+			tmp->vme_next_calc = entities;
+			entities = tmp;
+		}
+		if (vmu_data.vmu_calc_flags &
+		    (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS)) {
+			tmp = vmu_find_insert_entity(zone->vmz_projects_hash,
+			    p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS,
+			    zone->vmz_id);
+			tmp->vme_next_calc = entities;
+			entities = tmp;
+		}
+		if (vmu_data.vmu_calc_flags &
+		    (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS)) {
+			tmp = vmu_find_insert_entity(zone->vmz_tasks_hash,
+			    p->p_task->tk_tkid, VMUSAGE_TASKS, zone->vmz_id);
+			tmp->vme_next_calc = entities;
+			entities = tmp;
+		}
+		if (vmu_data.vmu_calc_flags &
+		    (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS)) {
+			tmp = vmu_find_insert_entity(zone->vmz_rusers_hash,
+			    crgetruid(p->p_cred), VMUSAGE_RUSERS, zone->vmz_id);
+			tmp->vme_next_calc = entities;
+			entities = tmp;
+		}
+		if (vmu_data.vmu_calc_flags &
+		    (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS)) {
+			tmp = vmu_find_insert_entity(zone->vmz_eusers_hash,
+			    crgetuid(p->p_cred), VMUSAGE_EUSERS, zone->vmz_id);
+			tmp->vme_next_calc = entities;
+			entities = tmp;
+		}
+	}
+	/* Entities which collapse projects and users for all zones */
+	if (vmu_data.vmu_calc_flags & VMUSAGE_COL_PROJECTS) {
+		tmp = vmu_find_insert_entity(vmu_data.vmu_projects_col_hash,
+		    p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS, ALL_ZONES);
+		tmp->vme_next_calc = entities;
+		entities = tmp;
+	}
+	if (vmu_data.vmu_calc_flags & VMUSAGE_COL_RUSERS) {
+		tmp = vmu_find_insert_entity(vmu_data.vmu_rusers_col_hash,
+		    crgetruid(p->p_cred), VMUSAGE_RUSERS, ALL_ZONES);
+		tmp->vme_next_calc = entities;
+		entities = tmp;
+	}
+	if (vmu_data.vmu_calc_flags & VMUSAGE_COL_EUSERS) {
+		tmp = vmu_find_insert_entity(vmu_data.vmu_eusers_col_hash,
+		    crgetuid(p->p_cred), VMUSAGE_EUSERS, ALL_ZONES);
+		tmp->vme_next_calc = entities;
+		entities = tmp;
+	}
+
+	ASSERT(entities != NULL);
+	/* process all segs in process's address space */
+	as = p->p_as;
+	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+	for (seg = AS_SEGFIRST(as); seg != NULL;
+	    seg = AS_SEGNEXT(as, seg)) {
+		vmu_calculate_seg(entities, seg);
+	}
+	AS_LOCK_EXIT(as, &as->a_lock);
+}
+
+/*
+ * Free data created by previous call to vmu_calculate().
+ */
+static void
+vmu_clear_calc()
+{
+	if (vmu_data.vmu_system != NULL)
+		vmu_free_entity(vmu_data.vmu_system);
+		vmu_data.vmu_system = NULL;
+	if (vmu_data.vmu_zones_hash != NULL)
+		i_mod_hash_clear_nosync(vmu_data.vmu_zones_hash);
+	if (vmu_data.vmu_projects_col_hash != NULL)
+		i_mod_hash_clear_nosync(vmu_data.vmu_projects_col_hash);
+	if (vmu_data.vmu_rusers_col_hash != NULL)
+		i_mod_hash_clear_nosync(vmu_data.vmu_rusers_col_hash);
+	if (vmu_data.vmu_eusers_col_hash != NULL)
+		i_mod_hash_clear_nosync(vmu_data.vmu_eusers_col_hash);
+
+	i_mod_hash_clear_nosync(vmu_data.vmu_all_vnodes_hash);
+	i_mod_hash_clear_nosync(vmu_data.vmu_all_amps_hash);
+}
+
+/*
+ * Free unused data structures.  These can result if the system workload
+ * decreases between calculations.
+ */
+static void
+vmu_free_extra()
+{
+	vmu_bound_t *tb;
+	vmu_object_t *to;
+	vmu_entity_t *te;
+	vmu_zone_t *tz;
+
+	while (vmu_data.vmu_free_bounds != NULL) {
+		tb = vmu_data.vmu_free_bounds;
+		vmu_data.vmu_free_bounds = vmu_data.vmu_free_bounds->vmb_next;
+		kmem_cache_free(vmu_bound_cache, tb);
+	}
+	while (vmu_data.vmu_free_objects != NULL) {
+		to = vmu_data.vmu_free_objects;
+		vmu_data.vmu_free_objects =
+		    vmu_data.vmu_free_objects->vmo_next;
+		kmem_cache_free(vmu_object_cache, to);
+	}
+	while (vmu_data.vmu_free_entities != NULL) {
+		te = vmu_data.vmu_free_entities;
+		vmu_data.vmu_free_entities =
+		    vmu_data.vmu_free_entities->vme_next;
+		if (te->vme_vnode_hash != NULL)
+			mod_hash_destroy_hash(te->vme_vnode_hash);
+		if (te->vme_amp_hash != NULL)
+			mod_hash_destroy_hash(te->vme_amp_hash);
+		if (te->vme_anon_hash != NULL)
+			mod_hash_destroy_hash(te->vme_anon_hash);
+		kmem_free(te, sizeof (vmu_entity_t));
+	}
+	while (vmu_data.vmu_free_zones != NULL) {
+		tz = vmu_data.vmu_free_zones;
+		vmu_data.vmu_free_zones =
+		    vmu_data.vmu_free_zones->vmz_next;
+		if (tz->vmz_projects_hash != NULL)
+			mod_hash_destroy_hash(tz->vmz_projects_hash);
+		if (tz->vmz_tasks_hash != NULL)
+			mod_hash_destroy_hash(tz->vmz_tasks_hash);
+		if (tz->vmz_rusers_hash != NULL)
+			mod_hash_destroy_hash(tz->vmz_rusers_hash);
+		if (tz->vmz_eusers_hash != NULL)
+			mod_hash_destroy_hash(tz->vmz_eusers_hash);
+		kmem_free(tz, sizeof (vmu_zone_t));
+	}
+}
+
+extern kcondvar_t *pr_pid_cv;
+
+/*
+ * Determine which entity types are relevant and allocate the hashes to
+ * track them.  Then walk the process table and count rss and swap
+ * for each process'es address space.  Address space object such as
+ * vnodes, amps and anons are tracked per entity, so that they are
+ * not double counted in the results.
+ *
+ */
+static void
+vmu_calculate()
+{
+	int i = 0;
+	int ret;
+	proc_t *p;
+
+	vmu_clear_calc();
+
+	if (vmu_data.vmu_calc_flags & VMUSAGE_SYSTEM)
+		vmu_data.vmu_system = vmu_alloc_entity(0, VMUSAGE_SYSTEM,
+		    ALL_ZONES);
+
+	/*
+	 * Walk process table and calculate rss of each proc.
+	 *
+	 * Pidlock and p_lock cannot be held while doing the rss calculation.
+	 * This is because:
+	 *	1.  The calculation allocates using KM_SLEEP.
+	 *	2.  The calculation grabs a_lock, which cannot be grabbed
+	 *	    after p_lock.
+	 *
+	 * Since pidlock must be dropped, we cannot simply just walk the
+	 * practive list.  Instead, we walk the process table, and sprlock
+	 * each process to ensure that it does not exit during the
+	 * calculation.
+	 */
+
+	mutex_enter(&pidlock);
+	for (i = 0; i < v.v_proc; i++) {
+again:
+		p = pid_entry(i);
+		if (p == NULL)
+			continue;
+
+		mutex_enter(&p->p_lock);
+		mutex_exit(&pidlock);
+
+		if (panicstr) {
+			mutex_exit(&p->p_lock);
+			return;
+		}
+
+		/* Try to set P_PR_LOCK */
+		ret = sprtrylock_proc(p);
+		if (ret == -1) {
+			/* Process in invalid state */
+			mutex_exit(&p->p_lock);
+			mutex_enter(&pidlock);
+			continue;
+		} else if (ret == 1) {
+			/*
+			 * P_PR_LOCK is already set.  Wait and try again.
+			 * This also drops p_lock.
+			 */
+			sprwaitlock_proc(p);
+			mutex_enter(&pidlock);
+			goto again;
+		}
+		mutex_exit(&p->p_lock);
+
+		vmu_calculate_proc(p);
+
+		mutex_enter(&p->p_lock);
+		sprunlock(p);
+		mutex_enter(&pidlock);
+	}
+	mutex_exit(&pidlock);
+
+	vmu_free_extra();
+}
+
+/*
+ * allocate a new cache for N results satisfying flags
+ */
+vmu_cache_t *
+vmu_cache_alloc(size_t nres, uint_t flags)
+{
+	vmu_cache_t *cache;
+
+	cache = kmem_zalloc(sizeof (vmu_cache_t), KM_SLEEP);
+	cache->vmc_results = kmem_zalloc(sizeof (vmusage_t) * nres, KM_SLEEP);
+	cache->vmc_nresults = nres;
+	cache->vmc_flags = flags;
+	cache->vmc_refcnt = 1;
+	return (cache);
+}
+
+/*
+ * Make sure cached results are not freed
+ */
+static void
+vmu_cache_hold(vmu_cache_t *cache)
+{
+	ASSERT(MUTEX_HELD(&vmu_data.vmu_lock));
+	cache->vmc_refcnt++;
+}
+
+/*
+ * free cache data
+ */
+static void
+vmu_cache_rele(vmu_cache_t *cache)
+{
+	ASSERT(MUTEX_HELD(&vmu_data.vmu_lock));
+	ASSERT(cache->vmc_refcnt > 0);
+	cache->vmc_refcnt--;
+	if (cache->vmc_refcnt == 0) {
+		kmem_free(cache->vmc_results, sizeof (vmusage_t) *
+			cache->vmc_nresults);
+		kmem_free(cache, sizeof (vmu_cache_t));
+	}
+}
+
+/*
+ * Copy out the cached results to a caller.  Inspect the callers flags
+ * and zone to determine which cached results should be copied.
+ */
+static int
+vmu_copyout_results(vmu_cache_t *cache, vmusage_t *buf, size_t *nres,
+    uint_t flags)
+{
+	vmusage_t *result, *out_result;
+	vmusage_t dummy;
+	size_t i, count = 0;
+	size_t bufsize;
+	int ret = 0;
+	uint_t types = 0;
+
+	if (nres != NULL) {
+		if (copyin((caddr_t)nres, &bufsize, sizeof (size_t)))
+			return (set_errno(EFAULT));
+	} else {
+		bufsize = 0;
+	}
+
+	/* figure out what results the caller is interested in. */
+	if ((flags & VMUSAGE_SYSTEM) && curproc->p_zone == global_zone)
+		types |= VMUSAGE_SYSTEM;
+	if (flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES))
+		types |= VMUSAGE_ZONE;
+	if (flags & (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS |
+	    VMUSAGE_COL_PROJECTS))
+		types |= VMUSAGE_PROJECTS;
+	if (flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS))
+		types |= VMUSAGE_TASKS;
+	if (flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS))
+		types |= VMUSAGE_RUSERS;
+	if (flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS))
+		types |= VMUSAGE_EUSERS;
+
+	/* count results for current zone */
+	out_result = buf;
+	for (result = cache->vmc_results, i = 0;
+	    i < cache->vmc_nresults; result++, i++) {
+
+		/* Do not return "other-zone" results to non-global zones */
+		if (curproc->p_zone != global_zone &&
+		    curproc->p_zone->zone_id != result->vmu_zoneid)
+			continue;
+
+		/*
+		 * If non-global zone requests VMUSAGE_SYSTEM, fake
+		 * up VMUSAGE_ZONE result as VMUSAGE_SYSTEM result.
+		 */
+		if (curproc->p_zone != global_zone &&
+		    (flags & VMUSAGE_SYSTEM) != 0 &&
+		    result->vmu_type == VMUSAGE_ZONE) {
+			count++;
+			if (out_result != NULL) {
+				if (bufsize < count) {
+					ret = set_errno(EOVERFLOW);
+				} else {
+					dummy = *result;
+					dummy.vmu_zoneid = ALL_ZONES;
+					dummy.vmu_id = 0;
+					dummy.vmu_type = VMUSAGE_SYSTEM;
+					if (copyout(&dummy, out_result,
+					    sizeof (vmusage_t)))
+						return (set_errno(
+						    EFAULT));
+					out_result++;
+				}
+			}
+		}
+
+		/* Skip results that do not match requested type */
+		if ((result->vmu_type & types) == 0)
+			continue;
+
+		/* Skip collated results if not requested */
+		if (result->vmu_zoneid == ALL_ZONES) {
+			if (result->vmu_type == VMUSAGE_PROJECTS &&
+			    (flags & VMUSAGE_COL_PROJECTS) == 0)
+				continue;
+			if (result->vmu_type == VMUSAGE_EUSERS &&
+			    (flags & VMUSAGE_COL_EUSERS) == 0)
+				continue;
+			if (result->vmu_type == VMUSAGE_RUSERS &&
+			    (flags & VMUSAGE_COL_RUSERS) == 0)
+				continue;
+		}
+
+		/* Skip "other zone" results if not requested */
+		if (result->vmu_zoneid != curproc->p_zone->zone_id) {
+			if (result->vmu_type == VMUSAGE_ZONE &&
+			    (flags & VMUSAGE_ALL_ZONES) == 0)
+				continue;
+			if (result->vmu_type == VMUSAGE_PROJECTS &&
+			    (flags & (VMUSAGE_ALL_PROJECTS |
+			    VMUSAGE_COL_PROJECTS)) == 0)
+				continue;
+			if (result->vmu_type == VMUSAGE_TASKS &&
+			    (flags & VMUSAGE_ALL_TASKS) == 0)
+				continue;
+			if (result->vmu_type == VMUSAGE_RUSERS &&
+			    (flags & (VMUSAGE_ALL_RUSERS |
+			    VMUSAGE_COL_RUSERS)) == 0)
+				continue;
+			if (result->vmu_type == VMUSAGE_EUSERS &&
+			    (flags & (VMUSAGE_ALL_EUSERS |
+			    VMUSAGE_COL_EUSERS)) == 0)
+				continue;
+		}
+		count++;
+		if (out_result != NULL) {
+			if (bufsize < count) {
+				ret = set_errno(EOVERFLOW);
+			} else {
+				if (copyout(result, out_result,
+				    sizeof (vmusage_t)))
+					return (set_errno(EFAULT));
+				out_result++;
+			}
+		}
+	}
+	if (nres != NULL)
+		if (copyout(&count, (void *)nres, sizeof (size_t)))
+			return (set_errno(EFAULT));
+
+	return (ret);
+}
+
+/*
+ * vm_getusage()
+ *
+ * Counts rss and swap by zone, project, task, and/or user.  The flags argument
+ * determines the type of results structures returned.  Flags requesting
+ * results from more than one zone are "flattened" to the local zone if the
+ * caller is not the global zone.
+ *
+ * args:
+ *	flags:	bitmap consisting of one or more of VMUSAGE_*.
+ *	age:	maximum allowable age (time since counting was done) in
+ *		seconds of the results.  Results from previous callers are
+ *		cached in kernel.
+ *	buf:	pointer to buffer array of vmusage_t.  If NULL, then only nres
+ *		set on success.
+ *	nres:	Set to number of vmusage_t structures pointed to by buf
+ *		before calling vm_getusage().
+ *		On return 0 (success) or ENOSPC, is set to the number of result
+ *		structures returned or attempted to return.
+ *
+ * returns 0 on success, -1 on failure:
+ *	EINTR (interrupted)
+ *	ENOSPC (nres to small for results, nres set to needed value for success)
+ *	EINVAL (flags invalid)
+ *	EFAULT (bad address for buf or nres)
+ */
+int
+vm_getusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres)
+{
+	vmu_entity_t *entity;
+	vmusage_t *result;
+	int ret = 0;
+	int cacherecent = 0;
+	hrtime_t now;
+	uint_t flags_orig;
+
+	/*
+	 * Non-global zones cannot request system wide and/or collated
+	 * results, or the system result, so munge the flags accordingly.
+	 */
+	flags_orig = flags;
+	if (curproc->p_zone != global_zone) {
+		if (flags & (VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS)) {
+			flags &= ~(VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS);
+			flags |= VMUSAGE_PROJECTS;
+		}
+		if (flags & (VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS)) {
+			flags &= ~(VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS);
+			flags |= VMUSAGE_RUSERS;
+		}
+		if (flags & (VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS)) {
+			flags &= ~(VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS);
+			flags |= VMUSAGE_EUSERS;
+		}
+		if (flags & VMUSAGE_SYSTEM) {
+			flags &= ~VMUSAGE_SYSTEM;
+			flags |= VMUSAGE_ZONE;
+		}
+	}
+
+	/* Check for unknown flags */
+	if ((flags & (~VMUSAGE_MASK)) != 0)
+		return (set_errno(EINVAL));
+
+	/* Check for no flags */
+	if ((flags & VMUSAGE_MASK) == 0)
+		return (set_errno(EINVAL));
+
+	mutex_enter(&vmu_data.vmu_lock);
+	now = gethrtime();
+
+start:
+	if (vmu_data.vmu_cache != NULL) {
+
+		vmu_cache_t *cache;
+
+		if ((vmu_data.vmu_cache->vmc_timestamp +
+		    ((hrtime_t)age * NANOSEC)) > now)
+			cacherecent = 1;
+
+		if ((vmu_data.vmu_cache->vmc_flags & flags) == flags &&
+		    cacherecent == 1) {
+			cache = vmu_data.vmu_cache;
+			vmu_cache_hold(cache);
+			mutex_exit(&vmu_data.vmu_lock);
+
+			ret = vmu_copyout_results(cache, buf, nres, flags_orig);
+			mutex_enter(&vmu_data.vmu_lock);
+			vmu_cache_rele(cache);
+			if (vmu_data.vmu_pending_waiters > 0)
+				cv_broadcast(&vmu_data.vmu_cv);
+			mutex_exit(&vmu_data.vmu_lock);
+			return (ret);
+		}
+		/*
+		 * If the cache is recent, it is likely that there are other
+		 * consumers of vm_getusage running, so add their flags to the
+		 * desired flags for the calculation.
+		 */
+		if (cacherecent == 1)
+			flags = vmu_data.vmu_cache->vmc_flags | flags;
+	}
+	if (vmu_data.vmu_calc_thread == NULL) {
+
+		vmu_cache_t *cache;
+
+		vmu_data.vmu_calc_thread = curthread;
+		vmu_data.vmu_calc_flags = flags;
+		vmu_data.vmu_entities = NULL;
+		vmu_data.vmu_nentities = 0;
+		if (vmu_data.vmu_pending_waiters > 0)
+			vmu_data.vmu_calc_flags |=
+			    vmu_data.vmu_pending_flags;
+
+		vmu_data.vmu_pending_flags = 0;
+		mutex_exit(&vmu_data.vmu_lock);
+		vmu_calculate();
+		mutex_enter(&vmu_data.vmu_lock);
+		/* copy results to cache */
+		if (vmu_data.vmu_cache != NULL)
+			vmu_cache_rele(vmu_data.vmu_cache);
+		cache = vmu_data.vmu_cache =
+		    vmu_cache_alloc(vmu_data.vmu_nentities,
+			vmu_data.vmu_calc_flags);
+
+		result = cache->vmc_results;
+		for (entity = vmu_data.vmu_entities; entity != NULL;
+		    entity = entity->vme_next) {
+			*result = entity->vme_result;
+			result++;
+		}
+		cache->vmc_timestamp = gethrtime();
+		vmu_cache_hold(cache);
+
+		vmu_data.vmu_calc_flags = 0;
+		vmu_data.vmu_calc_thread = NULL;
+
+		if (vmu_data.vmu_pending_waiters > 0)
+			cv_broadcast(&vmu_data.vmu_cv);
+
+		mutex_exit(&vmu_data.vmu_lock);
+
+		/* copy cache */
+		ret = vmu_copyout_results(cache, buf, nres, flags_orig);
+		mutex_enter(&vmu_data.vmu_lock);
+		vmu_cache_rele(cache);
+		mutex_exit(&vmu_data.vmu_lock);
+
+		return (ret);
+	}
+	vmu_data.vmu_pending_flags |= flags;
+	vmu_data.vmu_pending_waiters++;
+	while (vmu_data.vmu_calc_thread != NULL) {
+		if (cv_wait_sig(&vmu_data.vmu_cv,
+		    &vmu_data.vmu_lock) == 0) {
+			vmu_data.vmu_pending_waiters--;
+			mutex_exit(&vmu_data.vmu_lock);
+			return (set_errno(EINTR));
+		}
+	}
+	vmu_data.vmu_pending_waiters--;
+	goto start;
+}