diff options
Diffstat (limited to 'usr/src/uts')
| -rw-r--r-- | usr/src/uts/common/disp/fss.c | 60 | ||||
| -rw-r--r-- | usr/src/uts/common/fs/nfs/nfs_server.c | 4 | ||||
| -rw-r--r-- | usr/src/uts/common/os/kstat_fr.c | 6 | ||||
| -rw-r--r-- | usr/src/uts/common/os/zone.c | 13 | ||||
| -rw-r--r-- | usr/src/uts/common/sys/buf.h | 8 | ||||
| -rw-r--r-- | usr/src/uts/common/sys/fss.h | 4 | ||||
| -rw-r--r-- | usr/src/uts/common/sys/mman.h | 2 | ||||
| -rw-r--r-- | usr/src/uts/common/sys/zone.h | 21 | ||||
| -rw-r--r-- | usr/src/uts/common/syscall/memcntl.c | 9 | ||||
| -rw-r--r-- | usr/src/uts/common/vm/hat.h | 2 | ||||
| -rw-r--r-- | usr/src/uts/common/vm/seg_vn.c | 12 | ||||
| -rw-r--r-- | usr/src/uts/common/vm/vm_pvn.c | 28 | ||||
| -rw-r--r-- | usr/src/uts/common/vm/vm_usage.c | 16 | ||||
| -rw-r--r-- | usr/src/uts/i86pc/vm/hat_i86.c | 44 | 
14 files changed, 199 insertions, 30 deletions
| diff --git a/usr/src/uts/common/disp/fss.c b/usr/src/uts/common/disp/fss.c index 62301d65d8..af8826780c 100644 --- a/usr/src/uts/common/disp/fss.c +++ b/usr/src/uts/common/disp/fss.c @@ -814,6 +814,7 @@ fss_decay_usage()  	fsszone_t *fsszone;  	fsspri_t maxfsspri;  	int psetid; +	struct zone *zp;  	mutex_enter(&fsspsets_lock);  	/* @@ -824,6 +825,8 @@ fss_decay_usage()  		fsspset = &fsspsets[psetid];  		mutex_enter(&fsspset->fssps_lock); +		fsspset->fssps_gen++; +  		if (fsspset->fssps_cpupart == NULL ||  		    (fssproj = fsspset->fssps_list) == NULL) {  			mutex_exit(&fsspset->fssps_lock); @@ -843,6 +846,21 @@ fss_decay_usage()  		fsspset->fssps_maxfsspri = maxfsspri;  		do { +			fsszone = fssproj->fssp_fsszone; +			zp = fsszone->fssz_zone; + +			/* +			 * Reset zone's FSS kstats if they are from a +			 * previous cycle. +			 */ +			if (fsspset->fssps_gen != zp->zone_fss_gen) { +				zp->zone_fss_gen = fsspset->fssps_gen; +				zp->zone_fss_pri_hi = 0; +				zp->zone_runq_cntr = 0; +				zp->zone_fss_shr_pct = 0; +				zp->zone_proc_cnt = 0; +			} +  			/*  			 * Decay usage for each project running on  			 * this cpu partition. @@ -850,9 +868,18 @@ fss_decay_usage()  			fssproj->fssp_usage =  			    (fssproj->fssp_usage * FSS_DECAY_USG) /  			    FSS_DECAY_BASE + fssproj->fssp_ticks; +  			fssproj->fssp_ticks = 0; -			fsszone = fssproj->fssp_fsszone; +			zp->zone_run_ticks += fssproj->fssp_zone_ticks; +			/* +			 * This is the count for this one second cycle only, +			 * and not cumulative. +			 */ +			zp->zone_runq_cntr += fssproj->fssp_runnable; + +			fssproj->fssp_zone_ticks = 0; +  			/*  			 * Readjust the project's number of shares if it has  			 * changed since we checked it last time. @@ -871,7 +898,7 @@ fss_decay_usage()  			 * Readjust the zone's number of shares if it  			 * has changed since we checked it last time.  			 */ -			zone_ext_shares = fsszone->fssz_zone->zone_shares; +			zone_ext_shares = zp->zone_shares;  			if (fsszone->fssz_rshares != zone_ext_shares) {  				if (fsszone->fssz_runnable != 0) {  					fsspset->fssps_shares -= @@ -883,6 +910,12 @@ fss_decay_usage()  			}  			zone_int_shares = fsszone->fssz_shares;  			pset_shares = fsspset->fssps_shares; + +			if (zp->zone_runq_cntr > 0 && pset_shares > 0) +				/* in tenths of a pct */ +				zp->zone_fss_shr_pct = +				    (zone_ext_shares * 1000) / pset_shares; +  			/*  			 * Calculate fssp_shusage value to be used  			 * for fsspri increments for the next second. @@ -1050,6 +1083,8 @@ fss_update_list(int i)  	fssproc_t *fssproc;  	fssproj_t *fssproj;  	fsspri_t fsspri; +	struct zone *zp; +	pri_t fss_umdpri;  	kthread_t *t;  	int updated = 0; @@ -1073,6 +1108,7 @@ fss_update_list(int i)  		fssproj = FSSPROC2FSSPROJ(fssproc);  		if (fssproj == NULL)  			goto next; +  		if (fssproj->fssp_shares != 0) {  			/*  			 * Decay fsspri value. @@ -1096,11 +1132,28 @@ fss_update_list(int i)  		fss_newpri(fssproc);  		updated = 1; +		fss_umdpri = fssproc->fss_umdpri; + +		/* +		 * Summarize a zone's process priorities for runnable +		 * procs. +		 */ +		zp = fssproj->fssp_fsszone->fssz_zone; + +		if (fss_umdpri > zp->zone_fss_pri_hi) +			zp->zone_fss_pri_hi = fss_umdpri; + +		if (zp->zone_proc_cnt++ == 0) +			zp->zone_fss_pri_avg = fss_umdpri; +		else +			zp->zone_fss_pri_avg = +			    (zp->zone_fss_pri_avg + fss_umdpri) / 2; +  		/*  		 * Only dequeue the thread if it needs to be moved; otherwise  		 * it should just round-robin here.  		 */ -		if (t->t_pri != fssproc->fss_umdpri) +		if (t->t_pri != fss_umdpri)  			fss_change_priority(t, fssproc);  next:  		thread_unlock(t); @@ -2180,6 +2233,7 @@ fss_tick(kthread_t *t)  		fsspset_t *fsspset = FSSPROJ2FSSPSET(fssproj);  		disp_lock_enter_high(&fsspset->fssps_displock);  		fssproj->fssp_ticks += fss_nice_tick[fssproc->fss_nice]; +		fssproj->fssp_zone_ticks++;  		fssproc->fss_ticks++;  		disp_lock_exit_high(&fsspset->fssps_displock);  	} diff --git a/usr/src/uts/common/fs/nfs/nfs_server.c b/usr/src/uts/common/fs/nfs/nfs_server.c index ad2fed01dc..8473788d8a 100644 --- a/usr/src/uts/common/fs/nfs/nfs_server.c +++ b/usr/src/uts/common/fs/nfs/nfs_server.c @@ -21,6 +21,7 @@  /*   * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.   * Copyright (c) 2011 Bayard G. Bell. All rights reserved. + * Copyright (c) 2012 Joyent, Inc. All rights reserved.   */  /* @@ -2520,6 +2521,9 @@ nfs_srvinit(void)  {  	int error; +	if (getzoneid() != GLOBAL_ZONEID) +		return (EACCES); +  	error = nfs_exportinit();  	if (error != 0)  		return (error); diff --git a/usr/src/uts/common/os/kstat_fr.c b/usr/src/uts/common/os/kstat_fr.c index 93376a9edf..25afef3259 100644 --- a/usr/src/uts/common/os/kstat_fr.c +++ b/usr/src/uts/common/os/kstat_fr.c @@ -20,7 +20,7 @@   */  /*   * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, Joyent, Inc. All rights reserved. + * Copyright (c) 2011, 2012, Joyent, Inc. All rights reserved.   */  /* @@ -161,6 +161,7 @@ struct {  	kstat_named_t avenrun_5min;  	kstat_named_t avenrun_15min;  	kstat_named_t boot_time; +	kstat_named_t nsec_per_tick;  } system_misc_kstat = {  	{ "ncpus",		KSTAT_DATA_UINT32 },  	{ "lbolt",		KSTAT_DATA_UINT32 }, @@ -172,6 +173,7 @@ struct {  	{ "avenrun_5min",	KSTAT_DATA_UINT32 },  	{ "avenrun_15min",	KSTAT_DATA_UINT32 },  	{ "boot_time",		KSTAT_DATA_UINT32 }, +	{ "nsec_per_tick",	KSTAT_DATA_UINT32 },  };  struct { @@ -855,6 +857,8 @@ system_misc_kstat_update(kstat_t *ksp, int rw)  	system_misc_kstat.avenrun_15min.value.ui32	= (uint32_t)loadavgp[2];  	system_misc_kstat.boot_time.value.ui32		= (uint32_t)  	    zone_boot_time; +	system_misc_kstat.nsec_per_tick.value.ui32	= (uint32_t) +	    nsec_per_tick;  	return (0);  } diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c index 3ea0d0fe95..79f61ddcb9 100644 --- a/usr/src/uts/common/os/zone.c +++ b/usr/src/uts/common/os/zone.c @@ -2222,6 +2222,12 @@ zone_misc_kstat_update(kstat_t *ksp, int rw)  	zmp->zm_avenrun5.value.ui32 = zone->zone_avenrun[1];  	zmp->zm_avenrun15.value.ui32 = zone->zone_avenrun[2]; +	zmp->zm_run_ticks.value.ui64 = zone->zone_run_ticks; +	zmp->zm_run_wait.value.ui64 = zone->zone_runq_cntr; +	zmp->zm_fss_shr_pct.value.ui64 = zone->zone_fss_shr_pct; +	zmp->zm_fss_pri_hi.value.ui64 = zone->zone_fss_pri_hi; +	zmp->zm_fss_pri_avg.value.ui64 = zone->zone_fss_pri_avg; +  	return (0);  } @@ -2255,6 +2261,13 @@ zone_misc_kstat_create(zone_t *zone)  	kstat_named_init(&zmp->zm_avenrun5, "avenrun_5min", KSTAT_DATA_UINT32);  	kstat_named_init(&zmp->zm_avenrun15, "avenrun_15min",  	    KSTAT_DATA_UINT32); +	kstat_named_init(&zmp->zm_run_ticks, "run_ticks", KSTAT_DATA_UINT64); +	kstat_named_init(&zmp->zm_run_wait, "run_queue", KSTAT_DATA_UINT64); +	kstat_named_init(&zmp->zm_fss_shr_pct, "fss_share_percent", +	    KSTAT_DATA_UINT32); +	kstat_named_init(&zmp->zm_fss_pri_hi, "fss_pri_hi", KSTAT_DATA_UINT64); +	kstat_named_init(&zmp->zm_fss_pri_avg, "fss_pri_avg", +	    KSTAT_DATA_UINT64);  	ksp->ks_update = zone_misc_kstat_update;  	ksp->ks_private = zone; diff --git a/usr/src/uts/common/sys/buf.h b/usr/src/uts/common/sys/buf.h index a9191aed7c..cb8a6012fc 100644 --- a/usr/src/uts/common/sys/buf.h +++ b/usr/src/uts/common/sys/buf.h @@ -21,6 +21,7 @@  /*   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.   * Use is subject to license terms. + * Copyright 2012 Joyent, Inc.  All rights reserved.   */  /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/ @@ -186,6 +187,7 @@ struct biostats {  #define	B_STARTED	0x2000000	/* io:::start probe called for buf */  #define	B_ABRWRITE	0x4000000	/* Application based recovery active */  #define	B_PAGE_NOWAIT	0x8000000	/* Skip the page if it is locked */ +#define	B_INVALCURONLY	0x10000000	/* invalidate only for curproc */  /*   * There is some confusion over the meaning of B_FREE and B_INVAL and what @@ -198,6 +200,12 @@ struct biostats {   * between the sole use of these two flags.  In both cases, IO will be done   * if the page is not yet committed to storage.   * + * The B_INVALCURONLY flag modifies the behavior of the B_INVAL flag and is + * intended to be used in conjunction with B_INVAL.  B_INVALCURONLY has no + * meaning on its own.  When both B_INVALCURONLY and B_INVAL are set, then + * the mapping for the page is only invalidated for the current process. + * In this case, the page is not destroyed unless this was the final mapping. + *   * In order to discard pages without writing them back, (B_INVAL | B_TRUNC)   * should be used.   * diff --git a/usr/src/uts/common/sys/fss.h b/usr/src/uts/common/sys/fss.h index 583586fd75..cdb47beb7f 100644 --- a/usr/src/uts/common/sys/fss.h +++ b/usr/src/uts/common/sys/fss.h @@ -22,6 +22,7 @@  /*   * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.   * Use is subject to license terms. + * Copyright 2012 Joyent, Inc.  All rights reserved.   */  #ifndef	_SYS_FSS_H @@ -86,6 +87,7 @@ typedef struct fsspset {  					/* on the list			*/  	struct fssproj	*fssps_list;	/* list of project parts	*/  	struct fsszone	*fssps_zones;	/* list of fsszone_t's in pset	*/ +	uint32_t	fssps_gen;	/* generation for zone's kstats */  } fsspset_t;  /* @@ -103,6 +105,8 @@ typedef struct fssproj {  					/* protected by fssps_displock	*/  	uint32_t	fssp_ticks;	/* total of all ticks		*/  					/* protected by fssps_displock	*/ +	uint32_t	fssp_zone_ticks; /* unscaled total of all ticks	*/ +					/* protected by fssps_displock	*/  	fssusage_t	fssp_usage;	/* this project's decayed usage */  	fssusage_t	fssp_shusage;	/* normalized usage		*/  	struct fssproj	*fssp_next;	/* next project on this pset	*/ diff --git a/usr/src/uts/common/sys/mman.h b/usr/src/uts/common/sys/mman.h index 6c9119e56d..82344607b0 100644 --- a/usr/src/uts/common/sys/mman.h +++ b/usr/src/uts/common/sys/mman.h @@ -22,6 +22,7 @@  /*   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.   * Use is subject to license terms. + * Copyright 2012 Joyent, Inc.  All rights reserved.   */  /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/ @@ -353,6 +354,7 @@ struct memcntl_mha32 {  #define	MS_SYNC		0x4		/* wait for msync */  #define	MS_ASYNC	0x1		/* return immediately */  #define	MS_INVALIDATE	0x2		/* invalidate caches */ +#define	MS_INVALCURPROC	0x8		/* invalidate cache for curproc only */  #if	(_POSIX_C_SOURCE <= 2) && !defined(_XPG4_2) || defined(__EXTENSIONS__)  /* functions to mctl */ diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 08677a2f65..a2b7217fd4 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -20,7 +20,7 @@   */  /*   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, Joyent Inc. All rights reserved. + * Copyright (c) 2011, 2012, Joyent Inc. All rights reserved.   */  #ifndef _SYS_ZONE_H @@ -441,6 +441,11 @@ typedef struct {  	kstat_named_t	zm_avenrun1;  	kstat_named_t	zm_avenrun5;  	kstat_named_t	zm_avenrun15; +	kstat_named_t	zm_run_ticks; +	kstat_named_t	zm_run_wait; +	kstat_named_t	zm_fss_shr_pct; +	kstat_named_t	zm_fss_pri_hi; +	kstat_named_t	zm_fss_pri_avg;  } zone_misc_kstat_t;  typedef struct zone { @@ -671,6 +676,20 @@ typedef struct zone {  	struct loadavg_s zone_loadavg;		/* loadavg for this zone */  	uint64_t	zone_hp_avenrun[3];	/* high-precision avenrun */  	int		zone_avenrun[3];	/* FSCALED avg. run queue len */ + +	/* +	 * FSS stats updated once per second by fss_decay_usage. +	 * zone_runq_cntr is an instantaneous accumulation of the number of +	 * processes in the run queue per project and is not computed over the +	 * one second interval. +	 */ +	uint32_t	zone_fss_gen;		/* FSS generation cntr */ +	uint32_t	zone_proc_cnt;		/* FSS process cntr */ +	uint64_t	zone_run_ticks;		/* tot # of ticks running */ +	uint64_t	zone_runq_cntr;		/* tot # of procs in runq */ +	uint32_t	zone_fss_shr_pct;	/* fss active shr % in intvl */ +	uint64_t	zone_fss_pri_hi;	/* fss high pri this interval */ +	uint64_t	zone_fss_pri_avg;	/* fss avg pri this interval */  } zone_t;  /* diff --git a/usr/src/uts/common/syscall/memcntl.c b/usr/src/uts/common/syscall/memcntl.c index 1ab3a8b65e..63c8b64ad0 100644 --- a/usr/src/uts/common/syscall/memcntl.c +++ b/usr/src/uts/common/syscall/memcntl.c @@ -21,6 +21,7 @@  /*   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.   * Use is subject to license terms. + * Copyright 2012 Joyent, Inc.  All rights reserved.   */  /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/ @@ -116,13 +117,17 @@ memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask)  		 * MS_SYNC used to be defined to be zero but is now non-zero.  		 * For binary compatibility we still accept zero  		 * (the absence of MS_ASYNC) to mean the same thing. +		 * Binary compatibility is not an issue for MS_INVALCURPROC.  		 */  		iarg = (uintptr_t)arg;  		if ((iarg & ~MS_INVALIDATE) == 0)  			iarg |= MS_SYNC; -		if (((iarg & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) != 0) || -			((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))) { +		if (((iarg & +		    ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE|MS_INVALCURPROC)) != 0) || +		    ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC)) || +		    ((iarg & (MS_INVALIDATE|MS_INVALCURPROC)) == +		    (MS_INVALIDATE|MS_INVALCURPROC))) {  			error = set_errno(EINVAL);  		} else {  			error = as_ctl(as, addr, len, cmd, attr, iarg, NULL, 0); diff --git a/usr/src/uts/common/vm/hat.h b/usr/src/uts/common/vm/hat.h index 1d91475e38..156b810046 100644 --- a/usr/src/uts/common/vm/hat.h +++ b/usr/src/uts/common/vm/hat.h @@ -21,6 +21,7 @@  /*   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.   * Use is subject to license terms. + * Copyright 2012 Joyent, Inc.  All rights reserved.   */  /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/ @@ -460,6 +461,7 @@ void	hat_setstat(struct as *, caddr_t, size_t, uint_t);   */  #define	HAT_ADV_PGUNLOAD	0x00  #define	HAT_FORCE_PGUNLOAD	0x01 +#define	HAT_CURPROC_PGUNLOAD	0x02  /*   * Attributes for hat_page_*attr, hat_setstats and diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c index 31c293d416..5f106f6c06 100644 --- a/usr/src/uts/common/vm/seg_vn.c +++ b/usr/src/uts/common/vm/seg_vn.c @@ -20,6 +20,7 @@   */  /*   * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, Joyent, Inc. All rights reserved.   */  /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/ @@ -7254,7 +7255,8 @@ segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)  	vpp = svd->vpage;  	offset = svd->offset + (uintptr_t)(addr - seg->s_base);  	bflags = ((flags & MS_ASYNC) ? B_ASYNC : 0) | -	    ((flags & MS_INVALIDATE) ? B_INVAL : 0); +	    ((flags & MS_INVALIDATE) ? B_INVAL : 0) | +	    ((flags & MS_INVALCURPROC) ? (B_INVALCURONLY | B_INVAL) : 0);  	if (attr) {  		pageprot = attr & ~(SHARED|PRIVATE); @@ -7279,11 +7281,11 @@ segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)  			vpp = &svd->vpage[seg_page(seg, addr)];  	} else if (svd->vp && svd->amp == NULL && -	    (flags & MS_INVALIDATE) == 0) { +	    (flags & (MS_INVALIDATE | MS_INVALCURPROC)) == 0) {  		/* -		 * No attributes, no anonymous pages and MS_INVALIDATE flag -		 * is not on, just use one big request. +		 * No attributes, no anonymous pages and MS_INVAL* flags +		 * are not on, just use one big request.  		 */  		err = VOP_PUTPAGE(svd->vp, (offset_t)offset, len,  		    bflags, svd->cred, NULL); @@ -7335,7 +7337,7 @@ segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)  		 * might race in and lock the page after we unlock and before  		 * we do the PUTPAGE, then PUTPAGE simply does nothing.  		 */ -		if (flags & MS_INVALIDATE) { +		if (flags & (MS_INVALIDATE | MS_INVALCURPROC)) {  			if ((pp = page_lookup(vp, off, SE_SHARED)) != NULL) {  				if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {  					page_unlock(pp); diff --git a/usr/src/uts/common/vm/vm_pvn.c b/usr/src/uts/common/vm/vm_pvn.c index 7233581227..39ace0b3c2 100644 --- a/usr/src/uts/common/vm/vm_pvn.c +++ b/usr/src/uts/common/vm/vm_pvn.c @@ -20,6 +20,7 @@   */  /*   * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved.   */  /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/ @@ -431,7 +432,14 @@ pvn_write_done(page_t *plist, int flags)  				page_io_unlock(pp);  				page_unlock(pp);  			} -		} else if (flags & B_INVAL) { +		} else if ((flags & (B_INVAL | B_INVALCURONLY)) == B_INVAL) { +			/* +			 * If B_INVALCURONLY is set, then we handle that case +			 * in the next conditional if hat_page_is_mapped() +			 * indicates that there are no additional mappings +			 * to the page. +			 */ +  			/*  			 * XXX - Failed writes with B_INVAL set are  			 * not handled appropriately. @@ -572,8 +580,9 @@ pvn_write_done(page_t *plist, int flags)  }  /* - * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_DELWRI, - * B_TRUNC, B_FORCE}.  B_DELWRI indicates that this page is part of a kluster + * Flags are composed of {B_ASYNC, B_INVAL, B_INVALCURONLY, B_FREE, + * B_DONTNEED, B_DELWRI, B_TRUNC, B_FORCE}. + * B_DELWRI indicates that this page is part of a kluster   * operation and is only to be considered if it doesn't involve any   * waiting here.  B_TRUNC indicates that the file is being truncated   * and so no i/o needs to be done. B_FORCE indicates that the page @@ -627,13 +636,17 @@ pvn_getdirty(page_t *pp, int flags)  	 * If we want to free or invalidate the page then  	 * we need to unload it so that anyone who wants  	 * it will have to take a minor fault to get it. +	 * If we are only invalidating the page for the +	 * current process, then pass in a different flag.  	 * Otherwise, we're just writing the page back so we  	 * need to sync up the hardwre and software mod bit to  	 * detect any future modifications.  We clear the  	 * software mod bit when we put the page on the dirty  	 * list.  	 */ -	if (flags & (B_INVAL | B_FREE)) { +	if (flags & B_INVALCURONLY) { +		(void) hat_pageunload(pp, HAT_CURPROC_PGUNLOAD); +	} else if (flags & (B_INVAL | B_FREE)) {  		(void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);  	} else {  		(void) hat_pagesync(pp, HAT_SYNC_ZERORM); @@ -645,7 +658,7 @@ pvn_getdirty(page_t *pp, int flags)  		 * list after all.  		 */  		page_io_unlock(pp); -		if (flags & B_INVAL) { +		if ((flags & (B_INVAL | B_INVALCURONLY)) == B_INVAL) {  			/*LINTED: constant in conditional context*/  			VN_DISPOSE(pp, B_INVAL, 0, kcred);  		} else if (flags & B_FREE) { @@ -657,6 +670,9 @@ pvn_getdirty(page_t *pp, int flags)  			 * of VOP_PUTPAGE() who prefer freeing the  			 * page _only_ if no one else is accessing it.  			 * E.g. segmap_release() +			 * We also take this path for B_INVALCURONLY and +			 * let page_release call VN_DISPOSE if no one else is +			 * using the page.  			 *  			 * The above hat_ismod() check is useless because:  			 * (1) we may not be holding SE_EXCL lock; @@ -681,7 +697,7 @@ pvn_getdirty(page_t *pp, int flags)  	 * We'll detect the fact that they used it when the  	 * i/o is done and avoid freeing the page.  	 */ -	if (flags & B_FREE) +	if (flags & (B_FREE | B_INVALCURONLY))  		page_downgrade(pp); diff --git a/usr/src/uts/common/vm/vm_usage.c b/usr/src/uts/common/vm/vm_usage.c index 18e3c4c806..bbfd6013cd 100644 --- a/usr/src/uts/common/vm/vm_usage.c +++ b/usr/src/uts/common/vm/vm_usage.c @@ -939,7 +939,10 @@ vmu_amp_update_incore_bounds(avl_tree_t *tree, struct anon_map *amp,  			if (ap != NULL && vn != NULL && vn->v_pages != NULL &&  			    (page = page_exists(vn, off)) != NULL) { -				page_type = VMUSAGE_BOUND_INCORE; +				if (PP_ISFREE(page)) +					page_type = VMUSAGE_BOUND_NOT_INCORE; +				else +					page_type = VMUSAGE_BOUND_INCORE;  				if (page->p_szc > 0) {  					pgcnt = page_get_pagecnt(page->p_szc);  					pgshft = page_get_shift(page->p_szc); @@ -1026,7 +1029,10 @@ vmu_vnode_update_incore_bounds(avl_tree_t *tree, vnode_t *vnode,  			if (vnode->v_pages != NULL &&  			    (page = page_exists(vnode, ptob(index))) != NULL) { -				page_type = VMUSAGE_BOUND_INCORE; +				if (PP_ISFREE(page)) +					page_type = VMUSAGE_BOUND_NOT_INCORE; +				else +					page_type = VMUSAGE_BOUND_INCORE;  				if (page->p_szc > 0) {  					pgcnt = page_get_pagecnt(page->p_szc);  					pgshft = page_get_shift(page->p_szc); @@ -1306,6 +1312,12 @@ vmu_calculate_seg(vmu_entity_t *vmu_entities, struct seg *seg)  			}  			/* +			 * Pages on the free list aren't counted for the rss. +			 */ +			if (PP_ISFREE(page)) +				continue; + +			/*  			 * Assume anon structs with a refcnt  			 * of 1 are not COW shared, so there  			 * is no reason to track them per entity. diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c index 8da02a4c36..40b033d0e4 100644 --- a/usr/src/uts/i86pc/vm/hat_i86.c +++ b/usr/src/uts/i86pc/vm/hat_i86.c @@ -27,6 +27,7 @@   */  /*   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved. + * Copyright 2012 Joyent, Inc.  All rights reserved.   */  /* @@ -3350,15 +3351,13 @@ hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)  extern int	vpm_enable;  /* - * Unload all translations to a page. If the page is a subpage of a large + * Unload translations to a page. If the page is a subpage of a large   * page, the large page mappings are also removed. - * - * The forceflags are unused. + * If unloadflag is HAT_CURPROC_PGUNLOAD, then we only unload the translation + * for the current process, otherwise all translations are unloaded.   */ - -/*ARGSUSED*/  static int -hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag) +hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t unloadflag)  {  	page_t		*cur_pp = pp;  	hment_t		*hm; @@ -3366,6 +3365,8 @@ hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)  	htable_t	*ht;  	uint_t		entry;  	level_t		level; +	struct hat	*curhat; +	ulong_t		cnt;  	XPV_DISALLOW_MIGRATE(); @@ -3375,6 +3376,9 @@ hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)  	++curthread->t_hatdepth;  	ASSERT(curthread->t_hatdepth < 16); +	if (unloadflag == HAT_CURPROC_PGUNLOAD) +		curhat = curthread->t_procp->p_as->a_hat; +  #if defined(__amd64)  	/*  	 * clear the vpm ref. @@ -3387,6 +3391,8 @@ hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)  	 * The loop with next_size handles pages with multiple pagesize mappings  	 */  next_size: +	if (unloadflag == HAT_CURPROC_PGUNLOAD) +		cnt = hat_page_getshare(cur_pp);  	for (;;) {  		/* @@ -3398,6 +3404,7 @@ next_size:  			if (hm == NULL) {  				x86_hm_exit(cur_pp); +curproc_done:  				/*  				 * If not part of a larger page, we're done.  				 */ @@ -3424,8 +3431,21 @@ next_size:  			 * If this mapping size matches, remove it.  			 */  			level = ht->ht_level; -			if (level == pg_szcd) -				break; +			if (level == pg_szcd) { +				if (unloadflag != HAT_CURPROC_PGUNLOAD || +				    ht->ht_hat == curhat) +					break; +				/* +				 * unloadflag == HAT_CURPROC_PGUNLOAD but it's +				 * not the hat for the current process. Leave +				 * entry in place. Also do a safety check to +				 * ensure we don't get in an infinite loop +				 */ +				if (cnt-- == 0) { +					x86_hm_exit(cur_pp); +					goto curproc_done; +				} +			}  		}  		/* @@ -3435,14 +3455,18 @@ next_size:  		hm = hati_page_unmap(cur_pp, ht, entry);  		if (hm != NULL)  			hment_free(hm); + +		/* Perform check above for being part of a larger page. */ +		if (unloadflag == HAT_CURPROC_PGUNLOAD) +			goto curproc_done;  	}  }  int -hat_pageunload(struct page *pp, uint_t forceflag) +hat_pageunload(struct page *pp, uint_t unloadflag)  {  	ASSERT(PAGE_EXCL(pp)); -	return (hati_pageunload(pp, 0, forceflag)); +	return (hati_pageunload(pp, 0, unloadflag));  }  /* | 
