summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/vm/vm_usage.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/vm/vm_usage.c')
-rw-r--r--usr/src/uts/common/vm/vm_usage.c1978
1 files changed, 1978 insertions, 0 deletions
diff --git a/usr/src/uts/common/vm/vm_usage.c b/usr/src/uts/common/vm/vm_usage.c
new file mode 100644
index 0000000000..32a8811e10
--- /dev/null
+++ b/usr/src/uts/common/vm/vm_usage.c
@@ -0,0 +1,1978 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * vm_usage
+ *
+ * This file implements the getvmusage() private system call.
+ * getvmusage() counts the amount of resident memory pages and swap
+ * reserved by the specified process collective. A "process collective" is
+ * the set of processes owned by a particular, zone, project, task, or user.
+ *
+ * rss and swap are counted so that for a given process collective, a page is
+ * only counted once. For example, this means that if multiple processes in
+ * the same project map the same page, then the project will only be charged
+ * once for that page. On the other hand, if two processes in different
+ * projects map the same page, then both projects will be charged
+ * for the page.
+ *
+ * The vm_getusage() calculation is implemented so that the first thread
+ * performs the rss/swap counting. Other callers will wait for that thread to
+ * finish, copying the results. This enables multiple rcapds and prstats to
+ * consume data from the same calculation. The results are also cached so that
+ * a caller interested in recent results can just copy them instead of starting
+ * a new calculation. The caller passes the maximium age (in seconds) of the
+ * data. If the cached data is young enough, the cache is copied, otherwise,
+ * a new calculation is executed and the cache is replaced with the new
+ * data.
+ *
+ * The rss calculation for each process collective is as follows:
+ *
+ * - Inspect flags, determine if counting rss for zones, projects, tasks,
+ * and/or users.
+ * - For each proc:
+ * - Figure out proc's collectives (zone, project, task, and/or user).
+ * - For each seg in proc's address space:
+ * - If seg is private:
+ * - Lookup anons in the amp.
+ * - For incore pages not previously visited each of the
+ * proc's collectives, add incore pagesize to each.
+ * collective.
+ * Anon's with a refcnt of 1 can be assummed to be not
+ * previously visited.
+ * - For address ranges without anons in the amp:
+ * - Lookup pages in underlying vnode.
+ * - For incore pages not previously visiting for
+ * each of the proc's collectives, add incore
+ * pagesize to each collective.
+ * - If seg is shared:
+ * - Lookup pages in the shared amp or vnode.
+ * - For incore pages not previously visited for each of
+ * the proc's collectives, add incore pagesize to each
+ * collective.
+ *
+ * Swap is reserved by private segments, and shared anonymous segments.
+ * The only shared anon segments which do not reserve swap are ISM segments
+ * and schedctl segments, both of which can be identified by having
+ * amp->swresv == 0.
+ *
+ * The swap calculation for each collective is as follows:
+ *
+ * - Inspect flags, determine if counting rss for zones, projects, tasks,
+ * and/or users.
+ * - For each proc:
+ * - Figure out proc's collectives (zone, project, task, and/or user).
+ * - For each seg in proc's address space:
+ * - If seg is private:
+ * - Add svd->swresv pages to swap count for each of the
+ * proc's collectives.
+ * - If seg is anon, shared, and amp->swresv != 0
+ * - For address ranges in amp not previously visited for
+ * each of the proc's collectives, add size of address
+ * range to the swap count for each collective.
+ *
+ * These two calculations are done simultaneously, with most of the work
+ * being done in vmu_calculate_seg(). The results of the calculation are
+ * copied into "vmu_data.vmu_cache_results".
+ *
+ * To perform the calculation, various things are tracked and cached:
+ *
+ * - incore/not-incore page ranges for all vnodes.
+ * (vmu_data.vmu_all_vnodes_hash)
+ * This eliminates looking up the same page more than once.
+ *
+ * - incore/not-incore page ranges for all shared amps.
+ * (vmu_data.vmu_all_amps_hash)
+ * This eliminates looking up the same page more than once.
+ *
+ * - visited page ranges for each collective.
+ * - per vnode (entity->vme_vnode_hash)
+ * - per shared amp (entity->vme_amp_hash)
+ * For accurate counting of map-shared and cow-shared pages.
+ *
+ * - visited private anons (refcnt > 1) for each collective.
+ * (entity->vme_anon_hash)
+ * For accurate counting of cow-shared pages.
+ *
+ * The common accounting structure is the vmu_entity_t, which represents
+ * collectives:
+ *
+ * - A zone.
+ * - A project, task, or user within a zone.
+ * - The entire system (vmu_data.vmu_system).
+ * - Each collapsed (col) project and user. This means a given projid or
+ * uid, regardless of which zone the process is in. For instance,
+ * project 0 in the global zone and project 0 in a non global zone are
+ * the same collapsed project.
+ *
+ * Each entity structure tracks which pages have been already visited for
+ * that entity (via previously inspected processes) so that these pages are
+ * not double counted.
+ */
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/zone.h>
+#include <sys/proc.h>
+#include <sys/project.h>
+#include <sys/task.h>
+#include <sys/thread.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <sys/modhash.h>
+#include <sys/modhash_impl.h>
+#include <sys/shm.h>
+#include <sys/swap.h>
+#include <sys/synch.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/vm_usage.h>
+#include <sys/zone.h>
+#include <vm/anon.h>
+#include <vm/as.h>
+#include <vm/seg_vn.h>
+#include <vm/seg_spt.h>
+
+#define VMUSAGE_HASH_SIZE 512
+
+#define VMUSAGE_TYPE_VNODE 1
+#define VMUSAGE_TYPE_AMP 2
+#define VMUSAGE_TYPE_ANON 3
+
+#define VMUSAGE_BOUND_UNKNOWN 0
+#define VMUSAGE_BOUND_INCORE 1
+#define VMUSAGE_BOUND_NOT_INCORE 2
+
+/*
+ * bounds for vnodes and shared amps
+ * Each bound is either entirely incore, entirely not in core, or
+ * entirely unknown. bounds are stored in order by offset.
+ */
+typedef struct vmu_bound {
+ struct vmu_bound *vmb_next;
+ pgcnt_t vmb_start; /* page offset in vnode/amp on which bound starts */
+ pgcnt_t vmb_end; /* page offset in vnode/amp on which bound ends */
+ char vmb_type; /* One of VMUSAGE_BOUND_* */
+} vmu_bound_t;
+
+/*
+ * hash of visited objects (vnodes or shared amps)
+ * key is address of vnode or amp. Bounds lists known incore/non-incore
+ * bounds for vnode/amp.
+ */
+typedef struct vmu_object {
+ struct vmu_object *vmo_next; /* free list */
+ caddr_t vmo_key;
+ short vmo_type;
+ vmu_bound_t *vmo_bounds;
+} vmu_object_t;
+
+/*
+ * Entity by which to count results.
+ *
+ * The entity structure keeps the current rss/swap counts for each entity
+ * (zone, project, etc), and hashes of vm structures that have already
+ * been visited for the entity.
+ *
+ * vme_next: links the list of all entities currently being counted by
+ * vmu_calculate().
+ *
+ * vme_next_calc: links the list of entities related to the current process
+ * being counted by vmu_calculate_proc().
+ *
+ * vmu_calculate_proc() walks all processes. For each process, it makes a
+ * list of the entities related to that process using vme_next_calc. This
+ * list changes each time vmu_calculate_proc() is called.
+ *
+ */
+typedef struct vmu_entity {
+ struct vmu_entity *vme_next;
+ struct vmu_entity *vme_next_calc;
+ mod_hash_t *vme_vnode_hash; /* vnodes visited for entity */
+ mod_hash_t *vme_amp_hash; /* shared amps visited for entity */
+ mod_hash_t *vme_anon_hash; /* cow anons visited for entity */
+ vmusage_t vme_result; /* identifies entity and results */
+} vmu_entity_t;
+
+/*
+ * Hash of entities visited within a zone, and an entity for the zone
+ * itself.
+ */
+typedef struct vmu_zone {
+ struct vmu_zone *vmz_next; /* free list */
+ id_t vmz_id;
+ vmu_entity_t *vmz_zone;
+ mod_hash_t *vmz_projects_hash;
+ mod_hash_t *vmz_tasks_hash;
+ mod_hash_t *vmz_rusers_hash;
+ mod_hash_t *vmz_eusers_hash;
+} vmu_zone_t;
+
+/*
+ * Cache of results from last calculation
+ */
+typedef struct vmu_cache {
+ vmusage_t *vmc_results; /* Results from last call to */
+ /* vm_getusage(). */
+ uint64_t vmc_nresults; /* Count of cached results */
+ uint64_t vmc_refcnt; /* refcnt for free */
+ uint_t vmc_flags; /* Flags for vm_getusage() */
+ hrtime_t vmc_timestamp; /* when cache was created */
+} vmu_cache_t;
+
+/*
+ * top level rss info for the system
+ */
+typedef struct vmu_data {
+ kmutex_t vmu_lock; /* Protects vmu_data */
+ kcondvar_t vmu_cv; /* Used to signal threads */
+ /* Waiting for */
+ /* Rss_calc_thread to finish */
+ vmu_entity_t *vmu_system; /* Entity for tracking */
+ /* rss/swap for all processes */
+ /* in all zones */
+ mod_hash_t *vmu_zones_hash; /* Zones visited */
+ mod_hash_t *vmu_projects_col_hash; /* These *_col_hash hashes */
+ mod_hash_t *vmu_rusers_col_hash; /* keep track of entities, */
+ mod_hash_t *vmu_eusers_col_hash; /* ignoring zoneid, in order */
+ /* to implement VMUSAGE_COL_* */
+ /* flags, which aggregate by */
+ /* project or user regardless */
+ /* of zoneid. */
+ mod_hash_t *vmu_all_vnodes_hash; /* System wide visited vnodes */
+ /* to track incore/not-incore */
+ mod_hash_t *vmu_all_amps_hash; /* System wide visited shared */
+ /* amps to track incore/not- */
+ /* incore */
+ vmu_entity_t *vmu_entities; /* Linked list of entities */
+ size_t vmu_nentities; /* Count of entities in list */
+ vmu_cache_t *vmu_cache; /* Cached results */
+ kthread_t *vmu_calc_thread; /* NULL, or thread running */
+ /* vmu_calculate() */
+ uint_t vmu_calc_flags; /* Flags being using by */
+ /* currently running calc */
+ /* thread */
+ uint_t vmu_pending_flags; /* Flags of vm_getusage() */
+ /* threads waiting for */
+ /* calc thread to finish */
+ uint_t vmu_pending_waiters; /* Number of threads waiting */
+ /* for calc thread */
+ vmu_bound_t *vmu_free_bounds;
+ vmu_object_t *vmu_free_objects;
+ vmu_entity_t *vmu_free_entities;
+ vmu_zone_t *vmu_free_zones;
+} vmu_data_t;
+
+extern struct as kas;
+extern proc_t *practive;
+extern zone_t *global_zone;
+extern struct seg_ops segvn_ops;
+extern struct seg_ops segspt_shmops;
+
+static vmu_data_t vmu_data;
+static kmem_cache_t *vmu_bound_cache;
+static kmem_cache_t *vmu_object_cache;
+
+/*
+ * Save a bound on the free list
+ */
+static void
+vmu_free_bound(vmu_bound_t *bound)
+{
+ bound->vmb_next = vmu_data.vmu_free_bounds;
+ vmu_data.vmu_free_bounds = bound;
+}
+
+/*
+ * Free an object, and all visited bound info.
+ */
+static void
+vmu_free_object(mod_hash_val_t val)
+{
+ vmu_object_t *obj = (vmu_object_t *)val;
+ vmu_bound_t *bound = obj->vmo_bounds;
+ vmu_bound_t *tmp;
+
+ while (bound != NULL) {
+ tmp = bound;
+ bound = bound->vmb_next;
+ vmu_free_bound(tmp);
+ }
+ obj->vmo_next = vmu_data.vmu_free_objects;
+ vmu_data.vmu_free_objects = obj;
+}
+
+/*
+ * Free an entity, and hashes of visited objects for that entity.
+ */
+static void
+vmu_free_entity(mod_hash_val_t val)
+{
+ vmu_entity_t *entity = (vmu_entity_t *)val;
+
+ if (entity->vme_vnode_hash != NULL)
+ i_mod_hash_clear_nosync(entity->vme_vnode_hash);
+ if (entity->vme_amp_hash != NULL)
+ i_mod_hash_clear_nosync(entity->vme_amp_hash);
+ if (entity->vme_anon_hash != NULL)
+ i_mod_hash_clear_nosync(entity->vme_anon_hash);
+
+ entity->vme_next = vmu_data.vmu_free_entities;
+ vmu_data.vmu_free_entities = entity;
+}
+
+/*
+ * Free zone entity, and all hashes of entities inside that zone,
+ * which are projects, tasks, and users.
+ */
+static void
+vmu_free_zone(mod_hash_val_t val)
+{
+ vmu_zone_t *zone = (vmu_zone_t *)val;
+
+ if (zone->vmz_zone != NULL) {
+ vmu_free_entity((mod_hash_val_t)zone->vmz_zone);
+ zone->vmz_zone = NULL;
+ }
+ if (zone->vmz_projects_hash != NULL)
+ i_mod_hash_clear_nosync(zone->vmz_projects_hash);
+ if (zone->vmz_tasks_hash != NULL)
+ i_mod_hash_clear_nosync(zone->vmz_tasks_hash);
+ if (zone->vmz_rusers_hash != NULL)
+ i_mod_hash_clear_nosync(zone->vmz_rusers_hash);
+ if (zone->vmz_eusers_hash != NULL)
+ i_mod_hash_clear_nosync(zone->vmz_eusers_hash);
+ zone->vmz_next = vmu_data.vmu_free_zones;
+ vmu_data.vmu_free_zones = zone;
+}
+
+/*
+ * Initialize synchronization primitives and hashes for system-wide tracking
+ * of visited vnodes and shared amps. Initialize results cache.
+ */
+void
+vm_usage_init()
+{
+ mutex_init(&vmu_data.vmu_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&vmu_data.vmu_cv, NULL, CV_DEFAULT, NULL);
+
+ vmu_data.vmu_system = NULL;
+ vmu_data.vmu_zones_hash = NULL;
+ vmu_data.vmu_projects_col_hash = NULL;
+ vmu_data.vmu_rusers_col_hash = NULL;
+ vmu_data.vmu_eusers_col_hash = NULL;
+
+ vmu_data.vmu_free_bounds = NULL;
+ vmu_data.vmu_free_objects = NULL;
+ vmu_data.vmu_free_entities = NULL;
+ vmu_data.vmu_free_zones = NULL;
+
+ vmu_data.vmu_all_vnodes_hash = mod_hash_create_ptrhash(
+ "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object,
+ sizeof (vnode_t));
+ vmu_data.vmu_all_amps_hash = mod_hash_create_ptrhash(
+ "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object,
+ sizeof (struct anon_map));
+ vmu_data.vmu_projects_col_hash = mod_hash_create_idhash(
+ "vmusage collapsed project hash", VMUSAGE_HASH_SIZE,
+ vmu_free_entity);
+ vmu_data.vmu_rusers_col_hash = mod_hash_create_idhash(
+ "vmusage collapsed ruser hash", VMUSAGE_HASH_SIZE,
+ vmu_free_entity);
+ vmu_data.vmu_eusers_col_hash = mod_hash_create_idhash(
+ "vmusage collpased euser hash", VMUSAGE_HASH_SIZE,
+ vmu_free_entity);
+ vmu_data.vmu_zones_hash = mod_hash_create_idhash(
+ "vmusage zone hash", VMUSAGE_HASH_SIZE, vmu_free_zone);
+
+ vmu_bound_cache = kmem_cache_create("vmu_bound_cache",
+ sizeof (vmu_bound_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+ vmu_object_cache = kmem_cache_create("vmu_object_cache",
+ sizeof (vmu_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+
+ vmu_data.vmu_entities = NULL;
+ vmu_data.vmu_nentities = 0;
+
+ vmu_data.vmu_cache = NULL;
+ vmu_data.vmu_calc_thread = NULL;
+ vmu_data.vmu_calc_flags = 0;
+ vmu_data.vmu_pending_flags = 0;
+ vmu_data.vmu_pending_waiters = 0;
+}
+
+/*
+ * Allocate hashes for tracking vm objects visited for an entity.
+ * Update list of entities.
+ */
+static vmu_entity_t *
+vmu_alloc_entity(id_t id, int type, id_t zoneid)
+{
+ vmu_entity_t *entity;
+
+ if (vmu_data.vmu_free_entities != NULL) {
+ entity = vmu_data.vmu_free_entities;
+ vmu_data.vmu_free_entities =
+ vmu_data.vmu_free_entities->vme_next;
+ bzero(&entity->vme_result, sizeof (vmusage_t));
+ } else {
+ entity = kmem_zalloc(sizeof (vmu_entity_t), KM_SLEEP);
+ }
+ entity->vme_result.vmu_id = id;
+ entity->vme_result.vmu_zoneid = zoneid;
+ entity->vme_result.vmu_type = type;
+
+ if (entity->vme_vnode_hash == NULL)
+ entity->vme_vnode_hash = mod_hash_create_ptrhash(
+ "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object,
+ sizeof (vnode_t));
+
+ if (entity->vme_amp_hash == NULL)
+ entity->vme_amp_hash = mod_hash_create_ptrhash(
+ "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object,
+ sizeof (struct anon_map));
+
+ if (entity->vme_anon_hash == NULL)
+ entity->vme_anon_hash = mod_hash_create_ptrhash(
+ "vmusage anon hash", VMUSAGE_HASH_SIZE,
+ mod_hash_null_valdtor, sizeof (struct anon));
+
+ entity->vme_next = vmu_data.vmu_entities;
+ vmu_data.vmu_entities = entity;
+ vmu_data.vmu_nentities++;
+
+ return (entity);
+}
+
+/*
+ * Allocate a zone entity, and hashes for tracking visited vm objects
+ * for projects, tasks, and users within that zone.
+ */
+static vmu_zone_t *
+vmu_alloc_zone(id_t id)
+{
+ vmu_zone_t *zone;
+
+ if (vmu_data.vmu_free_zones != NULL) {
+ zone = vmu_data.vmu_free_zones;
+ vmu_data.vmu_free_zones =
+ vmu_data.vmu_free_zones->vmz_next;
+ zone->vmz_next = NULL;
+ zone->vmz_zone = NULL;
+ } else {
+ zone = kmem_zalloc(sizeof (vmu_zone_t), KM_SLEEP);
+ }
+
+ zone->vmz_id = id;
+
+ if ((vmu_data.vmu_calc_flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES)) != 0)
+ zone->vmz_zone = vmu_alloc_entity(id, VMUSAGE_ZONE, id);
+
+ if ((vmu_data.vmu_calc_flags & (VMUSAGE_PROJECTS |
+ VMUSAGE_ALL_PROJECTS)) != 0 && zone->vmz_projects_hash == NULL)
+ zone->vmz_projects_hash = mod_hash_create_idhash(
+ "vmusage project hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
+
+ if ((vmu_data.vmu_calc_flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS))
+ != 0 && zone->vmz_tasks_hash == NULL)
+ zone->vmz_tasks_hash = mod_hash_create_idhash(
+ "vmusage task hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
+
+ if ((vmu_data.vmu_calc_flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS))
+ != 0 && zone->vmz_rusers_hash == NULL)
+ zone->vmz_rusers_hash = mod_hash_create_idhash(
+ "vmusage ruser hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
+
+ if ((vmu_data.vmu_calc_flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS))
+ != 0 && zone->vmz_eusers_hash == NULL)
+ zone->vmz_eusers_hash = mod_hash_create_idhash(
+ "vmusage euser hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
+
+ return (zone);
+}
+
+/*
+ * Allocate a structure for tracking visited bounds for a vm object.
+ */
+static vmu_object_t *
+vmu_alloc_object(caddr_t key, int type)
+{
+ vmu_object_t *object;
+
+ if (vmu_data.vmu_free_objects != NULL) {
+ object = vmu_data.vmu_free_objects;
+ vmu_data.vmu_free_objects =
+ vmu_data.vmu_free_objects->vmo_next;
+ } else {
+ object = kmem_cache_alloc(vmu_object_cache, KM_SLEEP);
+ }
+
+ object->vmo_key = key;
+ object->vmo_type = type;
+ object->vmo_bounds = NULL;
+
+ return (object);
+}
+
+/*
+ * Allocate and return a bound structure.
+ */
+static vmu_bound_t *
+vmu_alloc_bound()
+{
+ vmu_bound_t *bound;
+
+ if (vmu_data.vmu_free_bounds != NULL) {
+ bound = vmu_data.vmu_free_bounds;
+ vmu_data.vmu_free_bounds =
+ vmu_data.vmu_free_bounds->vmb_next;
+ bzero(bound, sizeof (vmu_bound_t));
+ } else {
+ bound = kmem_cache_alloc(vmu_bound_cache, KM_SLEEP);
+ bzero(bound, sizeof (vmu_bound_t));
+ }
+ return (bound);
+}
+
+/*
+ * vmu_find_insert_* functions implement hash lookup or allocate and
+ * insert operations.
+ */
+static vmu_object_t *
+vmu_find_insert_object(mod_hash_t *hash, caddr_t key, uint_t type)
+{
+ int ret;
+ vmu_object_t *object;
+
+ ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key,
+ (mod_hash_val_t *)&object);
+ if (ret != 0) {
+ object = vmu_alloc_object(key, type);
+ ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key,
+ (mod_hash_val_t)object, (mod_hash_hndl_t)0);
+ ASSERT(ret == 0);
+ }
+ return (object);
+}
+
+static int
+vmu_find_insert_anon(mod_hash_t *hash, caddr_t key)
+{
+ int ret;
+ caddr_t val;
+
+ ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key,
+ (mod_hash_val_t *)&val);
+
+ if (ret == 0)
+ return (0);
+
+ ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key,
+ (mod_hash_val_t)key, (mod_hash_hndl_t)0);
+
+ ASSERT(ret == 0);
+
+ return (1);
+}
+
+static vmu_entity_t *
+vmu_find_insert_entity(mod_hash_t *hash, id_t id, uint_t type, id_t zoneid)
+{
+ int ret;
+ vmu_entity_t *entity;
+
+ ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)(uintptr_t)id,
+ (mod_hash_val_t *)&entity);
+ if (ret != 0) {
+ entity = vmu_alloc_entity(id, type, zoneid);
+ ret = i_mod_hash_insert_nosync(hash,
+ (mod_hash_key_t)(uintptr_t)id, (mod_hash_val_t)entity,
+ (mod_hash_hndl_t)0);
+ ASSERT(ret == 0);
+ }
+ return (entity);
+}
+
+
+
+
+/*
+ * Returns list of object bounds between start and end. New bounds inserted
+ * by this call are given type.
+ *
+ * Returns the number of pages covered if new bounds are created. Returns 0
+ * if region between start/end consists of all existing bounds.
+ */
+static pgcnt_t
+vmu_insert_lookup_object_bounds(vmu_object_t *ro, pgcnt_t start, pgcnt_t
+ end, char type, vmu_bound_t **first, vmu_bound_t **last)
+{
+ vmu_bound_t *next;
+ vmu_bound_t *prev = NULL;
+ vmu_bound_t *tmp = NULL;
+ pgcnt_t ret = 0;
+
+ *first = *last = NULL;
+
+ for (next = ro->vmo_bounds; next != NULL; next = next->vmb_next) {
+ /*
+ * Find bounds overlapping or overlapped by range [start,end].
+ */
+ if (start > next->vmb_end) {
+ /* bound is before new bound */
+ prev = next;
+ continue;
+ }
+ if (next->vmb_start > end) {
+ /* bound is after new bound */
+ break;
+ }
+ if (*first == NULL)
+ *first = next;
+ *last = next;
+ }
+
+ if (*first == NULL) {
+ ASSERT(*last == NULL);
+ /*
+ * No bounds overlapping range [start,end], so create new
+ * bound
+ */
+ tmp = vmu_alloc_bound();
+ tmp->vmb_start = start;
+ tmp->vmb_end = end;
+ tmp->vmb_type = type;
+ if (prev == NULL) {
+ tmp->vmb_next = ro->vmo_bounds;
+ ro->vmo_bounds = tmp;
+ } else {
+ tmp->vmb_next = prev->vmb_next;
+ prev->vmb_next = tmp;
+ }
+ *first = tmp;
+ *last = tmp;
+ ASSERT(tmp->vmb_end >= tmp->vmb_start);
+ ret = tmp->vmb_end - tmp->vmb_start + 1;
+ return (ret);
+ }
+
+ /* Check to see if start is before first known bound */
+ ASSERT(first != NULL && last != NULL);
+ next = (*first);
+ if (start < (*first)->vmb_start) {
+ /* Create new bound before first bound */
+ tmp = vmu_alloc_bound();
+ tmp->vmb_start = start;
+ tmp->vmb_end = (*first)->vmb_start - 1;
+ tmp->vmb_type = type;
+ tmp->vmb_next = *first;
+ if (*first == ro->vmo_bounds)
+ ro->vmo_bounds = tmp;
+ if (prev != NULL)
+ prev->vmb_next = tmp;
+ ASSERT(tmp->vmb_end >= tmp->vmb_start);
+ ret += tmp->vmb_end - tmp->vmb_start + 1;
+ *first = tmp;
+ }
+ /*
+ * Between start and end, search for gaps between and after existing
+ * bounds. Create new bounds to fill gaps if they exist.
+ */
+ while (end > next->vmb_end) {
+ /*
+ * Check for gap between bound and next bound. if no gap,
+ * continue.
+ */
+ if ((next != *last) &&
+ ((next->vmb_end + 1) == next->vmb_next->vmb_start)) {
+ next = next->vmb_next;
+ continue;
+ }
+ /*
+ * Insert new bound in gap after bound, and before next
+ * bound if next bound exists.
+ */
+ tmp = vmu_alloc_bound();
+ tmp->vmb_type = type;
+ tmp->vmb_next = next->vmb_next;
+ tmp->vmb_start = next->vmb_end + 1;
+
+ if (next != *last) {
+ tmp->vmb_end = next->vmb_next->vmb_start - 1;
+ ASSERT(tmp->vmb_end >= tmp->vmb_start);
+ ret += tmp->vmb_end - tmp->vmb_start + 1;
+ next->vmb_next = tmp;
+ next = tmp->vmb_next;
+ } else {
+ tmp->vmb_end = end;
+ ASSERT(tmp->vmb_end >= tmp->vmb_start);
+ ret += tmp->vmb_end - tmp->vmb_start + 1;
+ next->vmb_next = tmp;
+ *last = tmp;
+ break;
+ }
+ }
+ return (ret);
+}
+
+/*
+ * vmu_update_bounds()
+ *
+ * first, last: list of continuous bounds, of which zero or more are of
+ * type VMUSAGE_BOUND_UNKNOWN.
+ *
+ * new_first, new_last: list of continuous bounds, of which none are of
+ * type VMUSAGE_BOUND_UNKNOWN. These bounds are used to
+ * update the types of bounds in (first,last) with
+ * type VMUSAGE_BOUND_UNKNOWN.
+ *
+ * For the list of bounds (first,last), this function updates any bounds
+ * with type VMUSAGE_BOUND_UNKNOWN using the type of the corresponding bound in
+ * the list (new_first, new_last).
+ *
+ * If a bound of type VMUSAGE_BOUND_UNKNOWN spans multiple bounds in the list
+ * (new_first, new_last), it will be split into multiple bounds.
+ *
+ * Return value:
+ * The number of pages in the list of bounds (first,last) that were of
+ * type VMUSAGE_BOUND_UNKNOWN, which have been updated to be of type
+ * VMUSAGE_BOUND_INCORE.
+ *
+ */
+static pgcnt_t
+vmu_update_bounds(vmu_bound_t **first, vmu_bound_t **last,
+ vmu_bound_t *new_first, vmu_bound_t *new_last)
+{
+ vmu_bound_t *next, *new_next, *tmp;
+ pgcnt_t rss = 0;
+
+ next = *first;
+ new_next = new_first;
+
+ /* verify bounds span same pages */
+ ASSERT((*first)->vmb_start >= new_next->vmb_start);
+ ASSERT((*last)->vmb_end <= new_last->vmb_end);
+ for (;;) {
+ /* If bound already has type, proceed to next bound */
+ if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
+ if (next == *last)
+ break;
+ next = next->vmb_next;
+ continue;
+ }
+ while (new_next->vmb_end < next->vmb_start)
+ new_next = new_next->vmb_next;
+ ASSERT(new_next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
+ next->vmb_type = new_next->vmb_type;
+ if (new_next->vmb_end < next->vmb_end) {
+ /* need to split bound */
+ tmp = vmu_alloc_bound();
+ tmp->vmb_type = VMUSAGE_BOUND_UNKNOWN;
+ tmp->vmb_start = new_next->vmb_end + 1;
+ tmp->vmb_end = next->vmb_end;
+ tmp->vmb_next = next->vmb_next;
+ next->vmb_end = new_next->vmb_end;
+ next->vmb_next = tmp;
+ if (*last == next)
+ *last = tmp;
+ if (next->vmb_type == VMUSAGE_BOUND_INCORE)
+ rss += next->vmb_end - next->vmb_start + 1;
+ next = tmp;
+ } else {
+ if (next->vmb_type == VMUSAGE_BOUND_INCORE)
+ rss += next->vmb_end - next->vmb_start + 1;
+ if (next == *last)
+ break;
+ next = next->vmb_next;
+ }
+ }
+ return (rss);
+}
+
+/*
+ * merges adjacent bounds with same type between first and last bound.
+ * After merge, last pointer is no longer valid, as last bound may be
+ * merged away.
+ */
+static void
+vmu_merge_bounds(vmu_bound_t **first, vmu_bound_t **last)
+{
+ vmu_bound_t *next;
+ vmu_bound_t *tmp;
+
+ ASSERT(*first != NULL);
+ ASSERT(*last != NULL);
+
+ next = *first;
+ while (next != *last) {
+
+ /* If bounds are adjacent and have same type, merge them */
+ if (((next->vmb_end + 1) == next->vmb_next->vmb_start) &&
+ (next->vmb_type == next->vmb_next->vmb_type)) {
+ tmp = next->vmb_next;
+ next->vmb_end = tmp->vmb_end;
+ next->vmb_next = tmp->vmb_next;
+ vmu_free_bound(tmp);
+ if (tmp == *last)
+ *last = next;
+ } else {
+ next = next->vmb_next;
+ }
+ }
+}
+
+/*
+ * Given an amp and a list of bounds, updates each bound's type with
+ * VMUSAGE_BOUND_INCORE or VMUSAGE_BOUND_NOT_INCORE.
+ *
+ * If a bound is partially incore, it will be split into two bounds.
+ * first and last may be modified, as bounds may be split into multiple
+ * bounds if the are partially incore/not-incore.
+ *
+ * Set incore to non-zero if bounds are already known to be incore
+ *
+ */
+static void
+vmu_amp_update_incore_bounds(struct anon_map *amp, vmu_bound_t **first,
+ vmu_bound_t **last, boolean_t incore)
+{
+ vmu_bound_t *next;
+ vmu_bound_t *tmp;
+ pgcnt_t index;
+ short bound_type;
+ short page_type;
+ vnode_t *vn;
+ anoff_t off;
+ struct anon *ap;
+
+ next = *first;
+ /* Shared anon slots don't change once set */
+ ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
+ for (;;) {
+ if (incore == B_TRUE)
+ next->vmb_type = VMUSAGE_BOUND_INCORE;
+
+ if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
+ if (next == *last)
+ break;
+ next = next->vmb_next;
+ continue;
+ }
+ bound_type = next->vmb_type;
+ index = next->vmb_start;
+ while (index <= next->vmb_end) {
+
+ /*
+ * These are used to determine how much to increment
+ * index when a large page is found.
+ */
+ page_t *page;
+ pgcnt_t pgcnt = 1;
+ uint_t pgshft;
+ pgcnt_t pgmsk;
+
+ ap = anon_get_ptr(amp->ahp, index);
+ if (ap != NULL)
+ swap_xlate(ap, &vn, &off);
+
+ if (ap != NULL && vn != NULL && vn->v_pages != NULL &&
+ (page = page_exists(vn, off)) != NULL) {
+ page_type = VMUSAGE_BOUND_INCORE;
+ if (page->p_szc > 0) {
+ pgcnt = page_get_pagecnt(page->p_szc);
+ pgshft = page_get_shift(page->p_szc);
+ pgmsk = (0x1 << (pgshft - PAGESHIFT))
+ - 1;
+ }
+ } else {
+ page_type = VMUSAGE_BOUND_NOT_INCORE;
+ }
+ if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
+ next->vmb_type = page_type;
+ } else if (next->vmb_type != page_type) {
+ /*
+ * if current bound type does not match page
+ * type, need to split off new bound.
+ */
+ tmp = vmu_alloc_bound();
+ tmp->vmb_type = page_type;
+ tmp->vmb_start = index;
+ tmp->vmb_end = next->vmb_end;
+ tmp->vmb_next = next->vmb_next;
+ next->vmb_end = index - 1;
+ next->vmb_next = tmp;
+ if (*last == next)
+ *last = tmp;
+ next = tmp;
+ }
+ if (pgcnt > 1) {
+ /*
+ * If inside large page, jump to next large
+ * page
+ */
+ index = (index & ~pgmsk) + pgcnt;
+ } else {
+ index++;
+ }
+ }
+ if (next == *last) {
+ ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
+ break;
+ } else
+ next = next->vmb_next;
+ }
+ ANON_LOCK_EXIT(&amp->a_rwlock);
+}
+
+/*
+ * Same as vmu_amp_update_incore_bounds(), except for tracking
+ * incore-/not-incore for vnodes.
+ */
+static void
+vmu_vnode_update_incore_bounds(vnode_t *vnode, vmu_bound_t **first,
+ vmu_bound_t **last)
+{
+ vmu_bound_t *next;
+ vmu_bound_t *tmp;
+ pgcnt_t index;
+ short bound_type;
+ short page_type;
+
+ next = *first;
+ for (;;) {
+ if (vnode->v_pages == NULL)
+ next->vmb_type = VMUSAGE_BOUND_NOT_INCORE;
+
+ if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
+ if (next == *last)
+ break;
+ next = next->vmb_next;
+ continue;
+ }
+
+ bound_type = next->vmb_type;
+ index = next->vmb_start;
+ while (index <= next->vmb_end) {
+
+ /*
+ * These are used to determine how much to increment
+ * index when a large page is found.
+ */
+ page_t *page;
+ pgcnt_t pgcnt = 1;
+ uint_t pgshft;
+ pgcnt_t pgmsk;
+
+ if (vnode->v_pages != NULL &&
+ (page = page_exists(vnode, ptob(index))) != NULL) {
+ page_type = VMUSAGE_BOUND_INCORE;
+ if (page->p_szc > 0) {
+ pgcnt = page_get_pagecnt(page->p_szc);
+ pgshft = page_get_shift(page->p_szc);
+ pgmsk = (0x1 << (pgshft - PAGESHIFT))
+ - 1;
+ }
+ } else {
+ page_type = VMUSAGE_BOUND_NOT_INCORE;
+ }
+ if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
+ next->vmb_type = page_type;
+ } else if (next->vmb_type != page_type) {
+ /*
+ * if current bound type does not match page
+ * type, need to split off new bound.
+ */
+ tmp = vmu_alloc_bound();
+ tmp->vmb_type = page_type;
+ tmp->vmb_start = index;
+ tmp->vmb_end = next->vmb_end;
+ tmp->vmb_next = next->vmb_next;
+ next->vmb_end = index - 1;
+ next->vmb_next = tmp;
+ if (*last == next)
+ *last = tmp;
+ next = tmp;
+ }
+ if (pgcnt > 1) {
+ /*
+ * If inside large page, jump to next large
+ * page
+ */
+ index = (index & ~pgmsk) + pgcnt;
+ } else {
+ index++;
+ }
+ }
+ if (next == *last) {
+ ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
+ break;
+ } else
+ next = next->vmb_next;
+ }
+}
+
+/*
+ * Calculate the rss and swap consumed by a segment. vmu_entities is the
+ * list of entities to visit. For shared segments, the vnode or amp
+ * is looked up in each entity to see if has been already counted. Private
+ * anon pages are checked per entity to ensure that cow pages are not
+ * double counted.
+ *
+ * For private mapped files, first the amp is checked for private pages.
+ * Bounds not backed by the amp are looked up in the vnode for each entity
+ * to avoid double counting of private COW vnode pages.
+ */
+static void
+vmu_calculate_seg(vmu_entity_t *vmu_entities, struct seg *seg)
+{
+ struct segvn_data *svd;
+ struct shm_data *shmd;
+ struct spt_data *sptd;
+ vmu_object_t *shared_object = NULL;
+ vmu_object_t *entity_object = NULL;
+ vmu_entity_t *entity;
+ vmusage_t *result;
+ vmu_bound_t *first = NULL;
+ vmu_bound_t *last = NULL;
+ vmu_bound_t *cur = NULL;
+ vmu_bound_t *e_first = NULL;
+ vmu_bound_t *e_last = NULL;
+ vmu_bound_t *tmp;
+ pgcnt_t p_index, s_index, p_start, p_end, s_start, s_end, rss, virt;
+ struct anon_map *private_amp = NULL;
+ boolean_t incore = B_FALSE;
+ boolean_t shared = B_FALSE;
+ int file = 0;
+ pgcnt_t swresv = 0;
+ pgcnt_t panon = 0;
+
+ /* Can zero-length segments exist? Not sure, so parenoia */
+ if (seg->s_size <= 0)
+ return;
+
+ /*
+ * Figure out if there is a shared object (such as a named vnode or
+ * a shared amp, then figure out if there is a private amp, which
+ * identifies private pages.
+ */
+ if (seg->s_ops == &segvn_ops) {
+ svd = (struct segvn_data *)seg->s_data;
+ if (svd->type == MAP_SHARED)
+ shared = B_TRUE;
+ else
+ swresv = svd->swresv;
+
+ if (svd->vp != NULL) {
+ file = 1;
+ shared_object = vmu_find_insert_object(
+ vmu_data.vmu_all_vnodes_hash, (caddr_t)svd->vp,
+ VMUSAGE_TYPE_VNODE);
+ s_start = btop(svd->offset);
+ s_end = btop(svd->offset + seg->s_size) - 1;
+ }
+ if (svd->amp != NULL && svd->type == MAP_SHARED) {
+ ASSERT(shared_object == NULL);
+ shared_object = vmu_find_insert_object(
+ vmu_data.vmu_all_amps_hash, (caddr_t)svd->amp,
+ VMUSAGE_TYPE_AMP);
+ s_start = svd->anon_index;
+ s_end = svd->anon_index + btop(seg->s_size) - 1;
+ /* schedctl mappings are always in core */
+ if (svd->amp->swresv == 0)
+ incore = B_TRUE;
+ }
+ if (svd->amp != NULL && svd->type == MAP_PRIVATE) {
+ private_amp = svd->amp;
+ p_start = svd->anon_index;
+ p_end = svd->anon_index + btop(seg->s_size) - 1;
+ }
+ } else if (seg->s_ops == &segspt_shmops) {
+ shared = B_TRUE;
+ shmd = (struct shm_data *)seg->s_data;
+ shared_object = vmu_find_insert_object(
+ vmu_data.vmu_all_amps_hash, (caddr_t)shmd->shm_amp,
+ VMUSAGE_TYPE_AMP);
+ s_start = 0;
+ s_end = btop(seg->s_size) - 1;
+ sptd = shmd->shm_sptseg->s_data;
+
+ /* ism segments are always incore and do not reserve swap */
+ if (sptd->spt_flags & SHM_SHARE_MMU)
+ incore = B_TRUE;
+
+ } else {
+ return;
+ }
+
+ /*
+ * If there is a private amp, count anon pages that exist. If an
+ * anon has a refcnt > 1 (cow sharing), then save the anon in a
+ * hash so that it is not double counted.
+ *
+ * If there is also a shared object, they figure out the bounds
+ * which are not mapped by the private amp.
+ */
+ if (private_amp != NULL) {
+
+ /* Enter as writer to prevent cow anons from being freed */
+ ANON_LOCK_ENTER(&private_amp->a_rwlock, RW_WRITER);
+
+ p_index = p_start;
+ s_index = s_start;
+
+ while (p_index <= p_end) {
+
+ pgcnt_t p_index_next;
+ pgcnt_t p_bound_size;
+ int cnt;
+ anoff_t off;
+ struct vnode *vn;
+ struct anon *ap;
+ page_t *page; /* For handling of large */
+ pgcnt_t pgcnt = 1; /* pages */
+ pgcnt_t pgstart;
+ pgcnt_t pgend;
+ uint_t pgshft;
+ pgcnt_t pgmsk;
+
+ p_index_next = p_index;
+ ap = anon_get_next_ptr(private_amp->ahp,
+ &p_index_next);
+
+ /*
+ * If next anon is past end of mapping, simulate
+ * end of anon so loop terminates.
+ */
+ if (p_index_next > p_end) {
+ p_index_next = p_end + 1;
+ ap = NULL;
+ }
+ /*
+ * For cow segments, keep track of bounds not
+ * backed by private amp so they can be looked
+ * up in the backing vnode
+ */
+ if (p_index_next != p_index) {
+
+ /*
+ * Compute index difference between anon and
+ * previous anon.
+ */
+ p_bound_size = p_index_next - p_index - 1;
+
+ if (shared_object != NULL) {
+ cur = vmu_alloc_bound();
+ cur->vmb_next = NULL;
+ cur->vmb_start = s_index;
+ cur->vmb_end = s_index + p_bound_size;
+ cur->vmb_type = VMUSAGE_BOUND_UNKNOWN;
+ if (first == NULL) {
+ first = cur;
+ last = cur;
+ } else {
+ last->vmb_next = cur;
+ last = cur;
+ }
+ }
+ p_index = p_index + p_bound_size + 1;
+ s_index = s_index + p_bound_size + 1;
+ }
+
+ /* Detect end of anons in amp */
+ if (ap == NULL)
+ break;
+
+ cnt = ap->an_refcnt;
+ swap_xlate(ap, &vn, &off);
+
+ if (vn == NULL || vn->v_pages == NULL ||
+ (page = page_exists(vn, off)) == NULL) {
+ p_index++;
+ s_index++;
+ continue;
+ }
+
+ /*
+ * If large page is found, compute portion of large
+ * page in mapping, and increment indicies to the next
+ * large page.
+ */
+ if (page->p_szc > 0) {
+
+ pgcnt = page_get_pagecnt(page->p_szc);
+ pgshft = page_get_shift(page->p_szc);
+ pgmsk = (0x1 << (pgshft - PAGESHIFT)) - 1;
+
+ /* First page in large page */
+ pgstart = p_index & ~pgmsk;
+ /* Last page in large page */
+ pgend = pgstart + pgcnt - 1;
+ /*
+ * Artifically end page if page extends past
+ * end of mapping.
+ */
+ if (pgend > p_end)
+ pgend = p_end;
+
+ /*
+ * Compute number of pages from large page
+ * which are mapped.
+ */
+ pgcnt = pgend - p_index + 1;
+
+ /*
+ * Point indicies at page after large page,
+ * or at page after end of mapping.
+ */
+ p_index += pgcnt;
+ s_index += pgcnt;
+ } else {
+ p_index++;
+ s_index++;
+ }
+
+ /*
+ * Assume anon structs with a refcnt
+ * of 1 are not cow shared, so there
+ * is no reason to track them per entity.
+ */
+ if (cnt == 1) {
+ panon += pgcnt;
+ continue;
+ }
+ for (entity = vmu_entities; entity != NULL;
+ entity = entity->vme_next_calc) {
+
+ result = &entity->vme_result;
+ /*
+ * Track cow anons per entity so
+ * they are not double counted.
+ */
+ if (vmu_find_insert_anon(entity->vme_anon_hash,
+ (caddr_t)ap) == 0)
+ continue;
+
+ result->vmu_rss_all += (pgcnt << PAGESHIFT);
+ result->vmu_rss_private +=
+ (pgcnt << PAGESHIFT);
+ }
+ }
+ ANON_LOCK_EXIT(&private_amp->a_rwlock);
+ }
+
+ /* Add up resident anon and swap reserved for private mappings */
+ if (swresv > 0 || panon > 0) {
+ for (entity = vmu_entities; entity != NULL;
+ entity = entity->vme_next_calc) {
+ result = &entity->vme_result;
+ result->vmu_swap_all += swresv;
+ result->vmu_swap_private += swresv;
+ result->vmu_rss_all += (panon << PAGESHIFT);
+ result->vmu_rss_private += (panon << PAGESHIFT);
+ }
+ }
+
+ /* Compute resident pages backing shared amp or named vnode */
+ if (shared_object != NULL) {
+ if (first == NULL) {
+ /*
+ * No private amp, or private amp has no anon
+ * structs. This means entire segment is backed by
+ * the shared object.
+ */
+ first = vmu_alloc_bound();
+ first->vmb_next = NULL;
+ first->vmb_start = s_start;
+ first->vmb_end = s_end;
+ first->vmb_type = VMUSAGE_BOUND_UNKNOWN;
+ }
+ /*
+ * Iterate bounds not backed by private amp, and compute
+ * resident pages.
+ */
+ cur = first;
+ while (cur != NULL) {
+
+ if (vmu_insert_lookup_object_bounds(shared_object,
+ cur->vmb_start, cur->vmb_end, VMUSAGE_BOUND_UNKNOWN,
+ &first, &last) > 0) {
+ /* new bounds, find incore/not-incore */
+ if (shared_object->vmo_type ==
+ VMUSAGE_TYPE_VNODE)
+ vmu_vnode_update_incore_bounds(
+ (vnode_t *)
+ shared_object->vmo_key, &first,
+ &last);
+ else
+ vmu_amp_update_incore_bounds(
+ (struct anon_map *)
+ shared_object->vmo_key, &first,
+ &last, incore);
+ vmu_merge_bounds(&first, &last);
+ }
+ for (entity = vmu_entities; entity != NULL;
+ entity = entity->vme_next_calc) {
+
+ result = &entity->vme_result;
+
+ entity_object = vmu_find_insert_object(
+ shared_object->vmo_type ==
+ VMUSAGE_TYPE_VNODE ? entity->vme_vnode_hash:
+ entity->vme_amp_hash,
+ shared_object->vmo_key,
+ shared_object->vmo_type);
+
+ virt = vmu_insert_lookup_object_bounds(
+ entity_object, cur->vmb_start, cur->vmb_end,
+ VMUSAGE_BOUND_UNKNOWN, &e_first, &e_last);
+
+ if (virt == 0)
+ continue;
+ /*
+ * Range visited for this entity
+ */
+ rss = vmu_update_bounds(&e_first,
+ &e_last, first, last);
+ result->vmu_rss_all += (rss << PAGESHIFT);
+ if (shared == B_TRUE && file == B_FALSE) {
+ /* shared anon mapping */
+ result->vmu_swap_all +=
+ (virt << PAGESHIFT);
+ result->vmu_swap_shared +=
+ (virt << PAGESHIFT);
+ result->vmu_rss_shared +=
+ (rss << PAGESHIFT);
+ } else if (shared == B_TRUE && file == B_TRUE) {
+ /* shared file mapping */
+ result->vmu_rss_shared +=
+ (rss << PAGESHIFT);
+ } else if (shared == B_FALSE &&
+ file == B_TRUE) {
+ /* private file mapping */
+ result->vmu_rss_private +=
+ (rss << PAGESHIFT);
+ }
+ vmu_merge_bounds(&e_first, &e_last);
+ }
+ tmp = cur;
+ cur = cur->vmb_next;
+ vmu_free_bound(tmp);
+ }
+ }
+}
+
+/*
+ * Based on the current calculation flags, find the relevant entities
+ * which are relative to the process. Then calculate each segment
+ * in the process'es address space for each relevant entity.
+ */
+static void
+vmu_calculate_proc(proc_t *p)
+{
+ vmu_entity_t *entities = NULL;
+ vmu_zone_t *zone;
+ vmu_entity_t *tmp;
+ struct as *as;
+ struct seg *seg;
+ int ret;
+
+ /* Figure out which entities are being computed */
+ if ((vmu_data.vmu_system) != NULL) {
+ tmp = vmu_data.vmu_system;
+ tmp->vme_next_calc = entities;
+ entities = tmp;
+ }
+ if (vmu_data.vmu_calc_flags &
+ (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_PROJECTS |
+ VMUSAGE_ALL_PROJECTS | VMUSAGE_TASKS | VMUSAGE_ALL_TASKS |
+ VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_EUSERS |
+ VMUSAGE_ALL_EUSERS)) {
+ ret = i_mod_hash_find_nosync(vmu_data.vmu_zones_hash,
+ (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id,
+ (mod_hash_val_t *)&zone);
+ if (ret != 0) {
+ zone = vmu_alloc_zone(p->p_zone->zone_id);
+ ret = i_mod_hash_insert_nosync(vmu_data.vmu_zones_hash,
+ (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id,
+ (mod_hash_val_t)zone, (mod_hash_hndl_t)0);
+ ASSERT(ret == 0);
+ }
+ if (zone->vmz_zone != NULL) {
+ tmp = zone->vmz_zone;
+ tmp->vme_next_calc = entities;
+ entities = tmp;
+ }
+ if (vmu_data.vmu_calc_flags &
+ (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS)) {
+ tmp = vmu_find_insert_entity(zone->vmz_projects_hash,
+ p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS,
+ zone->vmz_id);
+ tmp->vme_next_calc = entities;
+ entities = tmp;
+ }
+ if (vmu_data.vmu_calc_flags &
+ (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS)) {
+ tmp = vmu_find_insert_entity(zone->vmz_tasks_hash,
+ p->p_task->tk_tkid, VMUSAGE_TASKS, zone->vmz_id);
+ tmp->vme_next_calc = entities;
+ entities = tmp;
+ }
+ if (vmu_data.vmu_calc_flags &
+ (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS)) {
+ tmp = vmu_find_insert_entity(zone->vmz_rusers_hash,
+ crgetruid(p->p_cred), VMUSAGE_RUSERS, zone->vmz_id);
+ tmp->vme_next_calc = entities;
+ entities = tmp;
+ }
+ if (vmu_data.vmu_calc_flags &
+ (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS)) {
+ tmp = vmu_find_insert_entity(zone->vmz_eusers_hash,
+ crgetuid(p->p_cred), VMUSAGE_EUSERS, zone->vmz_id);
+ tmp->vme_next_calc = entities;
+ entities = tmp;
+ }
+ }
+ /* Entities which collapse projects and users for all zones */
+ if (vmu_data.vmu_calc_flags & VMUSAGE_COL_PROJECTS) {
+ tmp = vmu_find_insert_entity(vmu_data.vmu_projects_col_hash,
+ p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS, ALL_ZONES);
+ tmp->vme_next_calc = entities;
+ entities = tmp;
+ }
+ if (vmu_data.vmu_calc_flags & VMUSAGE_COL_RUSERS) {
+ tmp = vmu_find_insert_entity(vmu_data.vmu_rusers_col_hash,
+ crgetruid(p->p_cred), VMUSAGE_RUSERS, ALL_ZONES);
+ tmp->vme_next_calc = entities;
+ entities = tmp;
+ }
+ if (vmu_data.vmu_calc_flags & VMUSAGE_COL_EUSERS) {
+ tmp = vmu_find_insert_entity(vmu_data.vmu_eusers_col_hash,
+ crgetuid(p->p_cred), VMUSAGE_EUSERS, ALL_ZONES);
+ tmp->vme_next_calc = entities;
+ entities = tmp;
+ }
+
+ ASSERT(entities != NULL);
+ /* process all segs in process's address space */
+ as = p->p_as;
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ for (seg = AS_SEGFIRST(as); seg != NULL;
+ seg = AS_SEGNEXT(as, seg)) {
+ vmu_calculate_seg(entities, seg);
+ }
+ AS_LOCK_EXIT(as, &as->a_lock);
+}
+
+/*
+ * Free data created by previous call to vmu_calculate().
+ */
+static void
+vmu_clear_calc()
+{
+ if (vmu_data.vmu_system != NULL)
+ vmu_free_entity(vmu_data.vmu_system);
+ vmu_data.vmu_system = NULL;
+ if (vmu_data.vmu_zones_hash != NULL)
+ i_mod_hash_clear_nosync(vmu_data.vmu_zones_hash);
+ if (vmu_data.vmu_projects_col_hash != NULL)
+ i_mod_hash_clear_nosync(vmu_data.vmu_projects_col_hash);
+ if (vmu_data.vmu_rusers_col_hash != NULL)
+ i_mod_hash_clear_nosync(vmu_data.vmu_rusers_col_hash);
+ if (vmu_data.vmu_eusers_col_hash != NULL)
+ i_mod_hash_clear_nosync(vmu_data.vmu_eusers_col_hash);
+
+ i_mod_hash_clear_nosync(vmu_data.vmu_all_vnodes_hash);
+ i_mod_hash_clear_nosync(vmu_data.vmu_all_amps_hash);
+}
+
+/*
+ * Free unused data structures. These can result if the system workload
+ * decreases between calculations.
+ */
+static void
+vmu_free_extra()
+{
+ vmu_bound_t *tb;
+ vmu_object_t *to;
+ vmu_entity_t *te;
+ vmu_zone_t *tz;
+
+ while (vmu_data.vmu_free_bounds != NULL) {
+ tb = vmu_data.vmu_free_bounds;
+ vmu_data.vmu_free_bounds = vmu_data.vmu_free_bounds->vmb_next;
+ kmem_cache_free(vmu_bound_cache, tb);
+ }
+ while (vmu_data.vmu_free_objects != NULL) {
+ to = vmu_data.vmu_free_objects;
+ vmu_data.vmu_free_objects =
+ vmu_data.vmu_free_objects->vmo_next;
+ kmem_cache_free(vmu_object_cache, to);
+ }
+ while (vmu_data.vmu_free_entities != NULL) {
+ te = vmu_data.vmu_free_entities;
+ vmu_data.vmu_free_entities =
+ vmu_data.vmu_free_entities->vme_next;
+ if (te->vme_vnode_hash != NULL)
+ mod_hash_destroy_hash(te->vme_vnode_hash);
+ if (te->vme_amp_hash != NULL)
+ mod_hash_destroy_hash(te->vme_amp_hash);
+ if (te->vme_anon_hash != NULL)
+ mod_hash_destroy_hash(te->vme_anon_hash);
+ kmem_free(te, sizeof (vmu_entity_t));
+ }
+ while (vmu_data.vmu_free_zones != NULL) {
+ tz = vmu_data.vmu_free_zones;
+ vmu_data.vmu_free_zones =
+ vmu_data.vmu_free_zones->vmz_next;
+ if (tz->vmz_projects_hash != NULL)
+ mod_hash_destroy_hash(tz->vmz_projects_hash);
+ if (tz->vmz_tasks_hash != NULL)
+ mod_hash_destroy_hash(tz->vmz_tasks_hash);
+ if (tz->vmz_rusers_hash != NULL)
+ mod_hash_destroy_hash(tz->vmz_rusers_hash);
+ if (tz->vmz_eusers_hash != NULL)
+ mod_hash_destroy_hash(tz->vmz_eusers_hash);
+ kmem_free(tz, sizeof (vmu_zone_t));
+ }
+}
+
+extern kcondvar_t *pr_pid_cv;
+
+/*
+ * Determine which entity types are relevant and allocate the hashes to
+ * track them. Then walk the process table and count rss and swap
+ * for each process'es address space. Address space object such as
+ * vnodes, amps and anons are tracked per entity, so that they are
+ * not double counted in the results.
+ *
+ */
+static void
+vmu_calculate()
+{
+ int i = 0;
+ int ret;
+ proc_t *p;
+
+ vmu_clear_calc();
+
+ if (vmu_data.vmu_calc_flags & VMUSAGE_SYSTEM)
+ vmu_data.vmu_system = vmu_alloc_entity(0, VMUSAGE_SYSTEM,
+ ALL_ZONES);
+
+ /*
+ * Walk process table and calculate rss of each proc.
+ *
+ * Pidlock and p_lock cannot be held while doing the rss calculation.
+ * This is because:
+ * 1. The calculation allocates using KM_SLEEP.
+ * 2. The calculation grabs a_lock, which cannot be grabbed
+ * after p_lock.
+ *
+ * Since pidlock must be dropped, we cannot simply just walk the
+ * practive list. Instead, we walk the process table, and sprlock
+ * each process to ensure that it does not exit during the
+ * calculation.
+ */
+
+ mutex_enter(&pidlock);
+ for (i = 0; i < v.v_proc; i++) {
+again:
+ p = pid_entry(i);
+ if (p == NULL)
+ continue;
+
+ mutex_enter(&p->p_lock);
+ mutex_exit(&pidlock);
+
+ if (panicstr) {
+ mutex_exit(&p->p_lock);
+ return;
+ }
+
+ /* Try to set P_PR_LOCK */
+ ret = sprtrylock_proc(p);
+ if (ret == -1) {
+ /* Process in invalid state */
+ mutex_exit(&p->p_lock);
+ mutex_enter(&pidlock);
+ continue;
+ } else if (ret == 1) {
+ /*
+ * P_PR_LOCK is already set. Wait and try again.
+ * This also drops p_lock.
+ */
+ sprwaitlock_proc(p);
+ mutex_enter(&pidlock);
+ goto again;
+ }
+ mutex_exit(&p->p_lock);
+
+ vmu_calculate_proc(p);
+
+ mutex_enter(&p->p_lock);
+ sprunlock(p);
+ mutex_enter(&pidlock);
+ }
+ mutex_exit(&pidlock);
+
+ vmu_free_extra();
+}
+
+/*
+ * allocate a new cache for N results satisfying flags
+ */
+vmu_cache_t *
+vmu_cache_alloc(size_t nres, uint_t flags)
+{
+ vmu_cache_t *cache;
+
+ cache = kmem_zalloc(sizeof (vmu_cache_t), KM_SLEEP);
+ cache->vmc_results = kmem_zalloc(sizeof (vmusage_t) * nres, KM_SLEEP);
+ cache->vmc_nresults = nres;
+ cache->vmc_flags = flags;
+ cache->vmc_refcnt = 1;
+ return (cache);
+}
+
+/*
+ * Make sure cached results are not freed
+ */
+static void
+vmu_cache_hold(vmu_cache_t *cache)
+{
+ ASSERT(MUTEX_HELD(&vmu_data.vmu_lock));
+ cache->vmc_refcnt++;
+}
+
+/*
+ * free cache data
+ */
+static void
+vmu_cache_rele(vmu_cache_t *cache)
+{
+ ASSERT(MUTEX_HELD(&vmu_data.vmu_lock));
+ ASSERT(cache->vmc_refcnt > 0);
+ cache->vmc_refcnt--;
+ if (cache->vmc_refcnt == 0) {
+ kmem_free(cache->vmc_results, sizeof (vmusage_t) *
+ cache->vmc_nresults);
+ kmem_free(cache, sizeof (vmu_cache_t));
+ }
+}
+
+/*
+ * Copy out the cached results to a caller. Inspect the callers flags
+ * and zone to determine which cached results should be copied.
+ */
+static int
+vmu_copyout_results(vmu_cache_t *cache, vmusage_t *buf, size_t *nres,
+ uint_t flags)
+{
+ vmusage_t *result, *out_result;
+ vmusage_t dummy;
+ size_t i, count = 0;
+ size_t bufsize;
+ int ret = 0;
+ uint_t types = 0;
+
+ if (nres != NULL) {
+ if (copyin((caddr_t)nres, &bufsize, sizeof (size_t)))
+ return (set_errno(EFAULT));
+ } else {
+ bufsize = 0;
+ }
+
+ /* figure out what results the caller is interested in. */
+ if ((flags & VMUSAGE_SYSTEM) && curproc->p_zone == global_zone)
+ types |= VMUSAGE_SYSTEM;
+ if (flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES))
+ types |= VMUSAGE_ZONE;
+ if (flags & (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS |
+ VMUSAGE_COL_PROJECTS))
+ types |= VMUSAGE_PROJECTS;
+ if (flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS))
+ types |= VMUSAGE_TASKS;
+ if (flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS))
+ types |= VMUSAGE_RUSERS;
+ if (flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS))
+ types |= VMUSAGE_EUSERS;
+
+ /* count results for current zone */
+ out_result = buf;
+ for (result = cache->vmc_results, i = 0;
+ i < cache->vmc_nresults; result++, i++) {
+
+ /* Do not return "other-zone" results to non-global zones */
+ if (curproc->p_zone != global_zone &&
+ curproc->p_zone->zone_id != result->vmu_zoneid)
+ continue;
+
+ /*
+ * If non-global zone requests VMUSAGE_SYSTEM, fake
+ * up VMUSAGE_ZONE result as VMUSAGE_SYSTEM result.
+ */
+ if (curproc->p_zone != global_zone &&
+ (flags & VMUSAGE_SYSTEM) != 0 &&
+ result->vmu_type == VMUSAGE_ZONE) {
+ count++;
+ if (out_result != NULL) {
+ if (bufsize < count) {
+ ret = set_errno(EOVERFLOW);
+ } else {
+ dummy = *result;
+ dummy.vmu_zoneid = ALL_ZONES;
+ dummy.vmu_id = 0;
+ dummy.vmu_type = VMUSAGE_SYSTEM;
+ if (copyout(&dummy, out_result,
+ sizeof (vmusage_t)))
+ return (set_errno(
+ EFAULT));
+ out_result++;
+ }
+ }
+ }
+
+ /* Skip results that do not match requested type */
+ if ((result->vmu_type & types) == 0)
+ continue;
+
+ /* Skip collated results if not requested */
+ if (result->vmu_zoneid == ALL_ZONES) {
+ if (result->vmu_type == VMUSAGE_PROJECTS &&
+ (flags & VMUSAGE_COL_PROJECTS) == 0)
+ continue;
+ if (result->vmu_type == VMUSAGE_EUSERS &&
+ (flags & VMUSAGE_COL_EUSERS) == 0)
+ continue;
+ if (result->vmu_type == VMUSAGE_RUSERS &&
+ (flags & VMUSAGE_COL_RUSERS) == 0)
+ continue;
+ }
+
+ /* Skip "other zone" results if not requested */
+ if (result->vmu_zoneid != curproc->p_zone->zone_id) {
+ if (result->vmu_type == VMUSAGE_ZONE &&
+ (flags & VMUSAGE_ALL_ZONES) == 0)
+ continue;
+ if (result->vmu_type == VMUSAGE_PROJECTS &&
+ (flags & (VMUSAGE_ALL_PROJECTS |
+ VMUSAGE_COL_PROJECTS)) == 0)
+ continue;
+ if (result->vmu_type == VMUSAGE_TASKS &&
+ (flags & VMUSAGE_ALL_TASKS) == 0)
+ continue;
+ if (result->vmu_type == VMUSAGE_RUSERS &&
+ (flags & (VMUSAGE_ALL_RUSERS |
+ VMUSAGE_COL_RUSERS)) == 0)
+ continue;
+ if (result->vmu_type == VMUSAGE_EUSERS &&
+ (flags & (VMUSAGE_ALL_EUSERS |
+ VMUSAGE_COL_EUSERS)) == 0)
+ continue;
+ }
+ count++;
+ if (out_result != NULL) {
+ if (bufsize < count) {
+ ret = set_errno(EOVERFLOW);
+ } else {
+ if (copyout(result, out_result,
+ sizeof (vmusage_t)))
+ return (set_errno(EFAULT));
+ out_result++;
+ }
+ }
+ }
+ if (nres != NULL)
+ if (copyout(&count, (void *)nres, sizeof (size_t)))
+ return (set_errno(EFAULT));
+
+ return (ret);
+}
+
+/*
+ * vm_getusage()
+ *
+ * Counts rss and swap by zone, project, task, and/or user. The flags argument
+ * determines the type of results structures returned. Flags requesting
+ * results from more than one zone are "flattened" to the local zone if the
+ * caller is not the global zone.
+ *
+ * args:
+ * flags: bitmap consisting of one or more of VMUSAGE_*.
+ * age: maximum allowable age (time since counting was done) in
+ * seconds of the results. Results from previous callers are
+ * cached in kernel.
+ * buf: pointer to buffer array of vmusage_t. If NULL, then only nres
+ * set on success.
+ * nres: Set to number of vmusage_t structures pointed to by buf
+ * before calling vm_getusage().
+ * On return 0 (success) or ENOSPC, is set to the number of result
+ * structures returned or attempted to return.
+ *
+ * returns 0 on success, -1 on failure:
+ * EINTR (interrupted)
+ * ENOSPC (nres to small for results, nres set to needed value for success)
+ * EINVAL (flags invalid)
+ * EFAULT (bad address for buf or nres)
+ */
+int
+vm_getusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres)
+{
+ vmu_entity_t *entity;
+ vmusage_t *result;
+ int ret = 0;
+ int cacherecent = 0;
+ hrtime_t now;
+ uint_t flags_orig;
+
+ /*
+ * Non-global zones cannot request system wide and/or collated
+ * results, or the system result, so munge the flags accordingly.
+ */
+ flags_orig = flags;
+ if (curproc->p_zone != global_zone) {
+ if (flags & (VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS)) {
+ flags &= ~(VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS);
+ flags |= VMUSAGE_PROJECTS;
+ }
+ if (flags & (VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS)) {
+ flags &= ~(VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS);
+ flags |= VMUSAGE_RUSERS;
+ }
+ if (flags & (VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS)) {
+ flags &= ~(VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS);
+ flags |= VMUSAGE_EUSERS;
+ }
+ if (flags & VMUSAGE_SYSTEM) {
+ flags &= ~VMUSAGE_SYSTEM;
+ flags |= VMUSAGE_ZONE;
+ }
+ }
+
+ /* Check for unknown flags */
+ if ((flags & (~VMUSAGE_MASK)) != 0)
+ return (set_errno(EINVAL));
+
+ /* Check for no flags */
+ if ((flags & VMUSAGE_MASK) == 0)
+ return (set_errno(EINVAL));
+
+ mutex_enter(&vmu_data.vmu_lock);
+ now = gethrtime();
+
+start:
+ if (vmu_data.vmu_cache != NULL) {
+
+ vmu_cache_t *cache;
+
+ if ((vmu_data.vmu_cache->vmc_timestamp +
+ ((hrtime_t)age * NANOSEC)) > now)
+ cacherecent = 1;
+
+ if ((vmu_data.vmu_cache->vmc_flags & flags) == flags &&
+ cacherecent == 1) {
+ cache = vmu_data.vmu_cache;
+ vmu_cache_hold(cache);
+ mutex_exit(&vmu_data.vmu_lock);
+
+ ret = vmu_copyout_results(cache, buf, nres, flags_orig);
+ mutex_enter(&vmu_data.vmu_lock);
+ vmu_cache_rele(cache);
+ if (vmu_data.vmu_pending_waiters > 0)
+ cv_broadcast(&vmu_data.vmu_cv);
+ mutex_exit(&vmu_data.vmu_lock);
+ return (ret);
+ }
+ /*
+ * If the cache is recent, it is likely that there are other
+ * consumers of vm_getusage running, so add their flags to the
+ * desired flags for the calculation.
+ */
+ if (cacherecent == 1)
+ flags = vmu_data.vmu_cache->vmc_flags | flags;
+ }
+ if (vmu_data.vmu_calc_thread == NULL) {
+
+ vmu_cache_t *cache;
+
+ vmu_data.vmu_calc_thread = curthread;
+ vmu_data.vmu_calc_flags = flags;
+ vmu_data.vmu_entities = NULL;
+ vmu_data.vmu_nentities = 0;
+ if (vmu_data.vmu_pending_waiters > 0)
+ vmu_data.vmu_calc_flags |=
+ vmu_data.vmu_pending_flags;
+
+ vmu_data.vmu_pending_flags = 0;
+ mutex_exit(&vmu_data.vmu_lock);
+ vmu_calculate();
+ mutex_enter(&vmu_data.vmu_lock);
+ /* copy results to cache */
+ if (vmu_data.vmu_cache != NULL)
+ vmu_cache_rele(vmu_data.vmu_cache);
+ cache = vmu_data.vmu_cache =
+ vmu_cache_alloc(vmu_data.vmu_nentities,
+ vmu_data.vmu_calc_flags);
+
+ result = cache->vmc_results;
+ for (entity = vmu_data.vmu_entities; entity != NULL;
+ entity = entity->vme_next) {
+ *result = entity->vme_result;
+ result++;
+ }
+ cache->vmc_timestamp = gethrtime();
+ vmu_cache_hold(cache);
+
+ vmu_data.vmu_calc_flags = 0;
+ vmu_data.vmu_calc_thread = NULL;
+
+ if (vmu_data.vmu_pending_waiters > 0)
+ cv_broadcast(&vmu_data.vmu_cv);
+
+ mutex_exit(&vmu_data.vmu_lock);
+
+ /* copy cache */
+ ret = vmu_copyout_results(cache, buf, nres, flags_orig);
+ mutex_enter(&vmu_data.vmu_lock);
+ vmu_cache_rele(cache);
+ mutex_exit(&vmu_data.vmu_lock);
+
+ return (ret);
+ }
+ vmu_data.vmu_pending_flags |= flags;
+ vmu_data.vmu_pending_waiters++;
+ while (vmu_data.vmu_calc_thread != NULL) {
+ if (cv_wait_sig(&vmu_data.vmu_cv,
+ &vmu_data.vmu_lock) == 0) {
+ vmu_data.vmu_pending_waiters--;
+ mutex_exit(&vmu_data.vmu_lock);
+ return (set_errno(EINTR));
+ }
+ }
+ vmu_data.vmu_pending_waiters--;
+ goto start;
+}