summaryrefslogtreecommitdiff
path: root/usr/src/uts/common
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common')
-rw-r--r--usr/src/uts/common/disp/fss.c60
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_server.c4
-rw-r--r--usr/src/uts/common/os/kstat_fr.c6
-rw-r--r--usr/src/uts/common/os/zone.c13
-rw-r--r--usr/src/uts/common/sys/buf.h8
-rw-r--r--usr/src/uts/common/sys/fss.h4
-rw-r--r--usr/src/uts/common/sys/mman.h2
-rw-r--r--usr/src/uts/common/sys/zone.h21
-rw-r--r--usr/src/uts/common/syscall/memcntl.c9
-rw-r--r--usr/src/uts/common/vm/hat.h2
-rw-r--r--usr/src/uts/common/vm/seg_vn.c12
-rw-r--r--usr/src/uts/common/vm/vm_pvn.c28
-rw-r--r--usr/src/uts/common/vm/vm_usage.c16
13 files changed, 165 insertions, 20 deletions
diff --git a/usr/src/uts/common/disp/fss.c b/usr/src/uts/common/disp/fss.c
index 62301d65d8..af8826780c 100644
--- a/usr/src/uts/common/disp/fss.c
+++ b/usr/src/uts/common/disp/fss.c
@@ -814,6 +814,7 @@ fss_decay_usage()
fsszone_t *fsszone;
fsspri_t maxfsspri;
int psetid;
+ struct zone *zp;
mutex_enter(&fsspsets_lock);
/*
@@ -824,6 +825,8 @@ fss_decay_usage()
fsspset = &fsspsets[psetid];
mutex_enter(&fsspset->fssps_lock);
+ fsspset->fssps_gen++;
+
if (fsspset->fssps_cpupart == NULL ||
(fssproj = fsspset->fssps_list) == NULL) {
mutex_exit(&fsspset->fssps_lock);
@@ -843,6 +846,21 @@ fss_decay_usage()
fsspset->fssps_maxfsspri = maxfsspri;
do {
+ fsszone = fssproj->fssp_fsszone;
+ zp = fsszone->fssz_zone;
+
+ /*
+ * Reset zone's FSS kstats if they are from a
+ * previous cycle.
+ */
+ if (fsspset->fssps_gen != zp->zone_fss_gen) {
+ zp->zone_fss_gen = fsspset->fssps_gen;
+ zp->zone_fss_pri_hi = 0;
+ zp->zone_runq_cntr = 0;
+ zp->zone_fss_shr_pct = 0;
+ zp->zone_proc_cnt = 0;
+ }
+
/*
* Decay usage for each project running on
* this cpu partition.
@@ -850,9 +868,18 @@ fss_decay_usage()
fssproj->fssp_usage =
(fssproj->fssp_usage * FSS_DECAY_USG) /
FSS_DECAY_BASE + fssproj->fssp_ticks;
+
fssproj->fssp_ticks = 0;
- fsszone = fssproj->fssp_fsszone;
+ zp->zone_run_ticks += fssproj->fssp_zone_ticks;
+ /*
+ * This is the count for this one second cycle only,
+ * and not cumulative.
+ */
+ zp->zone_runq_cntr += fssproj->fssp_runnable;
+
+ fssproj->fssp_zone_ticks = 0;
+
/*
* Readjust the project's number of shares if it has
* changed since we checked it last time.
@@ -871,7 +898,7 @@ fss_decay_usage()
* Readjust the zone's number of shares if it
* has changed since we checked it last time.
*/
- zone_ext_shares = fsszone->fssz_zone->zone_shares;
+ zone_ext_shares = zp->zone_shares;
if (fsszone->fssz_rshares != zone_ext_shares) {
if (fsszone->fssz_runnable != 0) {
fsspset->fssps_shares -=
@@ -883,6 +910,12 @@ fss_decay_usage()
}
zone_int_shares = fsszone->fssz_shares;
pset_shares = fsspset->fssps_shares;
+
+ if (zp->zone_runq_cntr > 0 && pset_shares > 0)
+ /* in tenths of a pct */
+ zp->zone_fss_shr_pct =
+ (zone_ext_shares * 1000) / pset_shares;
+
/*
* Calculate fssp_shusage value to be used
* for fsspri increments for the next second.
@@ -1050,6 +1083,8 @@ fss_update_list(int i)
fssproc_t *fssproc;
fssproj_t *fssproj;
fsspri_t fsspri;
+ struct zone *zp;
+ pri_t fss_umdpri;
kthread_t *t;
int updated = 0;
@@ -1073,6 +1108,7 @@ fss_update_list(int i)
fssproj = FSSPROC2FSSPROJ(fssproc);
if (fssproj == NULL)
goto next;
+
if (fssproj->fssp_shares != 0) {
/*
* Decay fsspri value.
@@ -1096,11 +1132,28 @@ fss_update_list(int i)
fss_newpri(fssproc);
updated = 1;
+ fss_umdpri = fssproc->fss_umdpri;
+
+ /*
+ * Summarize a zone's process priorities for runnable
+ * procs.
+ */
+ zp = fssproj->fssp_fsszone->fssz_zone;
+
+ if (fss_umdpri > zp->zone_fss_pri_hi)
+ zp->zone_fss_pri_hi = fss_umdpri;
+
+ if (zp->zone_proc_cnt++ == 0)
+ zp->zone_fss_pri_avg = fss_umdpri;
+ else
+ zp->zone_fss_pri_avg =
+ (zp->zone_fss_pri_avg + fss_umdpri) / 2;
+
/*
* Only dequeue the thread if it needs to be moved; otherwise
* it should just round-robin here.
*/
- if (t->t_pri != fssproc->fss_umdpri)
+ if (t->t_pri != fss_umdpri)
fss_change_priority(t, fssproc);
next:
thread_unlock(t);
@@ -2180,6 +2233,7 @@ fss_tick(kthread_t *t)
fsspset_t *fsspset = FSSPROJ2FSSPSET(fssproj);
disp_lock_enter_high(&fsspset->fssps_displock);
fssproj->fssp_ticks += fss_nice_tick[fssproc->fss_nice];
+ fssproj->fssp_zone_ticks++;
fssproc->fss_ticks++;
disp_lock_exit_high(&fsspset->fssps_displock);
}
diff --git a/usr/src/uts/common/fs/nfs/nfs_server.c b/usr/src/uts/common/fs/nfs/nfs_server.c
index ad2fed01dc..8473788d8a 100644
--- a/usr/src/uts/common/fs/nfs/nfs_server.c
+++ b/usr/src/uts/common/fs/nfs/nfs_server.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Bayard G. Bell. All rights reserved.
+ * Copyright (c) 2012 Joyent, Inc. All rights reserved.
*/
/*
@@ -2520,6 +2521,9 @@ nfs_srvinit(void)
{
int error;
+ if (getzoneid() != GLOBAL_ZONEID)
+ return (EACCES);
+
error = nfs_exportinit();
if (error != 0)
return (error);
diff --git a/usr/src/uts/common/os/kstat_fr.c b/usr/src/uts/common/os/kstat_fr.c
index 93376a9edf..25afef3259 100644
--- a/usr/src/uts/common/os/kstat_fr.c
+++ b/usr/src/uts/common/os/kstat_fr.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2011, 2012, Joyent, Inc. All rights reserved.
*/
/*
@@ -161,6 +161,7 @@ struct {
kstat_named_t avenrun_5min;
kstat_named_t avenrun_15min;
kstat_named_t boot_time;
+ kstat_named_t nsec_per_tick;
} system_misc_kstat = {
{ "ncpus", KSTAT_DATA_UINT32 },
{ "lbolt", KSTAT_DATA_UINT32 },
@@ -172,6 +173,7 @@ struct {
{ "avenrun_5min", KSTAT_DATA_UINT32 },
{ "avenrun_15min", KSTAT_DATA_UINT32 },
{ "boot_time", KSTAT_DATA_UINT32 },
+ { "nsec_per_tick", KSTAT_DATA_UINT32 },
};
struct {
@@ -855,6 +857,8 @@ system_misc_kstat_update(kstat_t *ksp, int rw)
system_misc_kstat.avenrun_15min.value.ui32 = (uint32_t)loadavgp[2];
system_misc_kstat.boot_time.value.ui32 = (uint32_t)
zone_boot_time;
+ system_misc_kstat.nsec_per_tick.value.ui32 = (uint32_t)
+ nsec_per_tick;
return (0);
}
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index 3ea0d0fe95..79f61ddcb9 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -2222,6 +2222,12 @@ zone_misc_kstat_update(kstat_t *ksp, int rw)
zmp->zm_avenrun5.value.ui32 = zone->zone_avenrun[1];
zmp->zm_avenrun15.value.ui32 = zone->zone_avenrun[2];
+ zmp->zm_run_ticks.value.ui64 = zone->zone_run_ticks;
+ zmp->zm_run_wait.value.ui64 = zone->zone_runq_cntr;
+ zmp->zm_fss_shr_pct.value.ui64 = zone->zone_fss_shr_pct;
+ zmp->zm_fss_pri_hi.value.ui64 = zone->zone_fss_pri_hi;
+ zmp->zm_fss_pri_avg.value.ui64 = zone->zone_fss_pri_avg;
+
return (0);
}
@@ -2255,6 +2261,13 @@ zone_misc_kstat_create(zone_t *zone)
kstat_named_init(&zmp->zm_avenrun5, "avenrun_5min", KSTAT_DATA_UINT32);
kstat_named_init(&zmp->zm_avenrun15, "avenrun_15min",
KSTAT_DATA_UINT32);
+ kstat_named_init(&zmp->zm_run_ticks, "run_ticks", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_run_wait, "run_queue", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_fss_shr_pct, "fss_share_percent",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&zmp->zm_fss_pri_hi, "fss_pri_hi", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_fss_pri_avg, "fss_pri_avg",
+ KSTAT_DATA_UINT64);
ksp->ks_update = zone_misc_kstat_update;
ksp->ks_private = zone;
diff --git a/usr/src/uts/common/sys/buf.h b/usr/src/uts/common/sys/buf.h
index a9191aed7c..cb8a6012fc 100644
--- a/usr/src/uts/common/sys/buf.h
+++ b/usr/src/uts/common/sys/buf.h
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -186,6 +187,7 @@ struct biostats {
#define B_STARTED 0x2000000 /* io:::start probe called for buf */
#define B_ABRWRITE 0x4000000 /* Application based recovery active */
#define B_PAGE_NOWAIT 0x8000000 /* Skip the page if it is locked */
+#define B_INVALCURONLY 0x10000000 /* invalidate only for curproc */
/*
* There is some confusion over the meaning of B_FREE and B_INVAL and what
@@ -198,6 +200,12 @@ struct biostats {
* between the sole use of these two flags. In both cases, IO will be done
* if the page is not yet committed to storage.
*
+ * The B_INVALCURONLY flag modifies the behavior of the B_INVAL flag and is
+ * intended to be used in conjunction with B_INVAL. B_INVALCURONLY has no
+ * meaning on its own. When both B_INVALCURONLY and B_INVAL are set, then
+ * the mapping for the page is only invalidated for the current process.
+ * In this case, the page is not destroyed unless this was the final mapping.
+ *
* In order to discard pages without writing them back, (B_INVAL | B_TRUNC)
* should be used.
*
diff --git a/usr/src/uts/common/sys/fss.h b/usr/src/uts/common/sys/fss.h
index 583586fd75..cdb47beb7f 100644
--- a/usr/src/uts/common/sys/fss.h
+++ b/usr/src/uts/common/sys/fss.h
@@ -22,6 +22,7 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_FSS_H
@@ -86,6 +87,7 @@ typedef struct fsspset {
/* on the list */
struct fssproj *fssps_list; /* list of project parts */
struct fsszone *fssps_zones; /* list of fsszone_t's in pset */
+ uint32_t fssps_gen; /* generation for zone's kstats */
} fsspset_t;
/*
@@ -103,6 +105,8 @@ typedef struct fssproj {
/* protected by fssps_displock */
uint32_t fssp_ticks; /* total of all ticks */
/* protected by fssps_displock */
+ uint32_t fssp_zone_ticks; /* unscaled total of all ticks */
+ /* protected by fssps_displock */
fssusage_t fssp_usage; /* this project's decayed usage */
fssusage_t fssp_shusage; /* normalized usage */
struct fssproj *fssp_next; /* next project on this pset */
diff --git a/usr/src/uts/common/sys/mman.h b/usr/src/uts/common/sys/mman.h
index 6c9119e56d..82344607b0 100644
--- a/usr/src/uts/common/sys/mman.h
+++ b/usr/src/uts/common/sys/mman.h
@@ -22,6 +22,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -353,6 +354,7 @@ struct memcntl_mha32 {
#define MS_SYNC 0x4 /* wait for msync */
#define MS_ASYNC 0x1 /* return immediately */
#define MS_INVALIDATE 0x2 /* invalidate caches */
+#define MS_INVALCURPROC 0x8 /* invalidate cache for curproc only */
#if (_POSIX_C_SOURCE <= 2) && !defined(_XPG4_2) || defined(__EXTENSIONS__)
/* functions to mctl */
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index 08677a2f65..a2b7217fd4 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent Inc. All rights reserved.
+ * Copyright (c) 2011, 2012, Joyent Inc. All rights reserved.
*/
#ifndef _SYS_ZONE_H
@@ -441,6 +441,11 @@ typedef struct {
kstat_named_t zm_avenrun1;
kstat_named_t zm_avenrun5;
kstat_named_t zm_avenrun15;
+ kstat_named_t zm_run_ticks;
+ kstat_named_t zm_run_wait;
+ kstat_named_t zm_fss_shr_pct;
+ kstat_named_t zm_fss_pri_hi;
+ kstat_named_t zm_fss_pri_avg;
} zone_misc_kstat_t;
typedef struct zone {
@@ -671,6 +676,20 @@ typedef struct zone {
struct loadavg_s zone_loadavg; /* loadavg for this zone */
uint64_t zone_hp_avenrun[3]; /* high-precision avenrun */
int zone_avenrun[3]; /* FSCALED avg. run queue len */
+
+ /*
+ * FSS stats updated once per second by fss_decay_usage.
+ * zone_runq_cntr is an instantaneous accumulation of the number of
+ * processes in the run queue per project and is not computed over the
+ * one second interval.
+ */
+ uint32_t zone_fss_gen; /* FSS generation cntr */
+ uint32_t zone_proc_cnt; /* FSS process cntr */
+ uint64_t zone_run_ticks; /* tot # of ticks running */
+ uint64_t zone_runq_cntr; /* tot # of procs in runq */
+ uint32_t zone_fss_shr_pct; /* fss active shr % in intvl */
+ uint64_t zone_fss_pri_hi; /* fss high pri this interval */
+ uint64_t zone_fss_pri_avg; /* fss avg pri this interval */
} zone_t;
/*
diff --git a/usr/src/uts/common/syscall/memcntl.c b/usr/src/uts/common/syscall/memcntl.c
index 1ab3a8b65e..63c8b64ad0 100644
--- a/usr/src/uts/common/syscall/memcntl.c
+++ b/usr/src/uts/common/syscall/memcntl.c
@@ -21,6 +21,7 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -116,13 +117,17 @@ memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask)
* MS_SYNC used to be defined to be zero but is now non-zero.
* For binary compatibility we still accept zero
* (the absence of MS_ASYNC) to mean the same thing.
+ * Binary compatibility is not an issue for MS_INVALCURPROC.
*/
iarg = (uintptr_t)arg;
if ((iarg & ~MS_INVALIDATE) == 0)
iarg |= MS_SYNC;
- if (((iarg & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) != 0) ||
- ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))) {
+ if (((iarg &
+ ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE|MS_INVALCURPROC)) != 0) ||
+ ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC)) ||
+ ((iarg & (MS_INVALIDATE|MS_INVALCURPROC)) ==
+ (MS_INVALIDATE|MS_INVALCURPROC))) {
error = set_errno(EINVAL);
} else {
error = as_ctl(as, addr, len, cmd, attr, iarg, NULL, 0);
diff --git a/usr/src/uts/common/vm/hat.h b/usr/src/uts/common/vm/hat.h
index 1d91475e38..156b810046 100644
--- a/usr/src/uts/common/vm/hat.h
+++ b/usr/src/uts/common/vm/hat.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -460,6 +461,7 @@ void hat_setstat(struct as *, caddr_t, size_t, uint_t);
*/
#define HAT_ADV_PGUNLOAD 0x00
#define HAT_FORCE_PGUNLOAD 0x01
+#define HAT_CURPROC_PGUNLOAD 0x02
/*
* Attributes for hat_page_*attr, hat_setstats and
diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c
index 31c293d416..5f106f6c06 100644
--- a/usr/src/uts/common/vm/seg_vn.c
+++ b/usr/src/uts/common/vm/seg_vn.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -7254,7 +7255,8 @@ segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
vpp = svd->vpage;
offset = svd->offset + (uintptr_t)(addr - seg->s_base);
bflags = ((flags & MS_ASYNC) ? B_ASYNC : 0) |
- ((flags & MS_INVALIDATE) ? B_INVAL : 0);
+ ((flags & MS_INVALIDATE) ? B_INVAL : 0) |
+ ((flags & MS_INVALCURPROC) ? (B_INVALCURONLY | B_INVAL) : 0);
if (attr) {
pageprot = attr & ~(SHARED|PRIVATE);
@@ -7279,11 +7281,11 @@ segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
vpp = &svd->vpage[seg_page(seg, addr)];
} else if (svd->vp && svd->amp == NULL &&
- (flags & MS_INVALIDATE) == 0) {
+ (flags & (MS_INVALIDATE | MS_INVALCURPROC)) == 0) {
/*
- * No attributes, no anonymous pages and MS_INVALIDATE flag
- * is not on, just use one big request.
+ * No attributes, no anonymous pages and MS_INVAL* flags
+ * are not on, just use one big request.
*/
err = VOP_PUTPAGE(svd->vp, (offset_t)offset, len,
bflags, svd->cred, NULL);
@@ -7335,7 +7337,7 @@ segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
* might race in and lock the page after we unlock and before
* we do the PUTPAGE, then PUTPAGE simply does nothing.
*/
- if (flags & MS_INVALIDATE) {
+ if (flags & (MS_INVALIDATE | MS_INVALCURPROC)) {
if ((pp = page_lookup(vp, off, SE_SHARED)) != NULL) {
if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
page_unlock(pp);
diff --git a/usr/src/uts/common/vm/vm_pvn.c b/usr/src/uts/common/vm/vm_pvn.c
index 7233581227..39ace0b3c2 100644
--- a/usr/src/uts/common/vm/vm_pvn.c
+++ b/usr/src/uts/common/vm/vm_pvn.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -431,7 +432,14 @@ pvn_write_done(page_t *plist, int flags)
page_io_unlock(pp);
page_unlock(pp);
}
- } else if (flags & B_INVAL) {
+ } else if ((flags & (B_INVAL | B_INVALCURONLY)) == B_INVAL) {
+ /*
+ * If B_INVALCURONLY is set, then we handle that case
+ * in the next conditional if hat_page_is_mapped()
+ * indicates that there are no additional mappings
+ * to the page.
+ */
+
/*
* XXX - Failed writes with B_INVAL set are
* not handled appropriately.
@@ -572,8 +580,9 @@ pvn_write_done(page_t *plist, int flags)
}
/*
- * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_DELWRI,
- * B_TRUNC, B_FORCE}. B_DELWRI indicates that this page is part of a kluster
+ * Flags are composed of {B_ASYNC, B_INVAL, B_INVALCURONLY, B_FREE,
+ * B_DONTNEED, B_DELWRI, B_TRUNC, B_FORCE}.
+ * B_DELWRI indicates that this page is part of a kluster
* operation and is only to be considered if it doesn't involve any
* waiting here. B_TRUNC indicates that the file is being truncated
* and so no i/o needs to be done. B_FORCE indicates that the page
@@ -627,13 +636,17 @@ pvn_getdirty(page_t *pp, int flags)
* If we want to free or invalidate the page then
* we need to unload it so that anyone who wants
* it will have to take a minor fault to get it.
+ * If we are only invalidating the page for the
+ * current process, then pass in a different flag.
* Otherwise, we're just writing the page back so we
* need to sync up the hardwre and software mod bit to
* detect any future modifications. We clear the
* software mod bit when we put the page on the dirty
* list.
*/
- if (flags & (B_INVAL | B_FREE)) {
+ if (flags & B_INVALCURONLY) {
+ (void) hat_pageunload(pp, HAT_CURPROC_PGUNLOAD);
+ } else if (flags & (B_INVAL | B_FREE)) {
(void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
} else {
(void) hat_pagesync(pp, HAT_SYNC_ZERORM);
@@ -645,7 +658,7 @@ pvn_getdirty(page_t *pp, int flags)
* list after all.
*/
page_io_unlock(pp);
- if (flags & B_INVAL) {
+ if ((flags & (B_INVAL | B_INVALCURONLY)) == B_INVAL) {
/*LINTED: constant in conditional context*/
VN_DISPOSE(pp, B_INVAL, 0, kcred);
} else if (flags & B_FREE) {
@@ -657,6 +670,9 @@ pvn_getdirty(page_t *pp, int flags)
* of VOP_PUTPAGE() who prefer freeing the
* page _only_ if no one else is accessing it.
* E.g. segmap_release()
+ * We also take this path for B_INVALCURONLY and
+ * let page_release call VN_DISPOSE if no one else is
+ * using the page.
*
* The above hat_ismod() check is useless because:
* (1) we may not be holding SE_EXCL lock;
@@ -681,7 +697,7 @@ pvn_getdirty(page_t *pp, int flags)
* We'll detect the fact that they used it when the
* i/o is done and avoid freeing the page.
*/
- if (flags & B_FREE)
+ if (flags & (B_FREE | B_INVALCURONLY))
page_downgrade(pp);
diff --git a/usr/src/uts/common/vm/vm_usage.c b/usr/src/uts/common/vm/vm_usage.c
index 18e3c4c806..bbfd6013cd 100644
--- a/usr/src/uts/common/vm/vm_usage.c
+++ b/usr/src/uts/common/vm/vm_usage.c
@@ -939,7 +939,10 @@ vmu_amp_update_incore_bounds(avl_tree_t *tree, struct anon_map *amp,
if (ap != NULL && vn != NULL && vn->v_pages != NULL &&
(page = page_exists(vn, off)) != NULL) {
- page_type = VMUSAGE_BOUND_INCORE;
+ if (PP_ISFREE(page))
+ page_type = VMUSAGE_BOUND_NOT_INCORE;
+ else
+ page_type = VMUSAGE_BOUND_INCORE;
if (page->p_szc > 0) {
pgcnt = page_get_pagecnt(page->p_szc);
pgshft = page_get_shift(page->p_szc);
@@ -1026,7 +1029,10 @@ vmu_vnode_update_incore_bounds(avl_tree_t *tree, vnode_t *vnode,
if (vnode->v_pages != NULL &&
(page = page_exists(vnode, ptob(index))) != NULL) {
- page_type = VMUSAGE_BOUND_INCORE;
+ if (PP_ISFREE(page))
+ page_type = VMUSAGE_BOUND_NOT_INCORE;
+ else
+ page_type = VMUSAGE_BOUND_INCORE;
if (page->p_szc > 0) {
pgcnt = page_get_pagecnt(page->p_szc);
pgshft = page_get_shift(page->p_szc);
@@ -1306,6 +1312,12 @@ vmu_calculate_seg(vmu_entity_t *vmu_entities, struct seg *seg)
}
/*
+ * Pages on the free list aren't counted for the rss.
+ */
+ if (PP_ISFREE(page))
+ continue;
+
+ /*
* Assume anon structs with a refcnt
* of 1 are not COW shared, so there
* is no reason to track them per entity.