1 files changed, 132 insertions, 718 deletions
diff --git a/usr/src/uts/common/vm/vm_page.c b/usr/src/uts/common/vm/vm_page.c
index 5b3db34db1..27b2702d28 100644
--- a/usr/src/uts/common/vm/vm_page.c
+++ b/usr/src/uts/common/vm/vm_page.c
@@ -87,90 +87,6 @@ static pgcnt_t max_page_get;	/* max page_get request size in pages */
 pgcnt_t total_pages = 0;	/* total number of pages (used by /proc) */
 
 /*
- * vnode for all pages which are retired from the VM system;
- * such as pages with Uncorrectable Errors.
- */
-struct vnode retired_ppages;
-
-static void	page_retired_init(void);
-static void	retired_dispose(vnode_t *vp, page_t *pp, int flag,
-			int dn, cred_t *cr);
-static void	retired_inactive(vnode_t *vp, cred_t *cr);
-static void	page_retired(page_t *pp);
-static void	retired_page_removed(page_t *pp);
-void		page_unretire_pages(void);
-
-/*
- * The maximum number of pages that will be unretired in one iteration.
- * This number is totally arbitrary.
- */
-#define	UNRETIRE_PAGES		256
-
-/*
- * We limit the number of pages that may be retired to
- * a percentage of the total physical memory. Note that
- * the percentage values are  stored as 'basis points',
- * ie, 100 basis points is 1%.
- */
-#define	MAX_PAGES_RETIRED_BPS_DEFAULT	10	/* .1% */
-
-uint64_t max_pages_retired_bps = MAX_PAGES_RETIRED_BPS_DEFAULT;
-
-static int	pages_retired_limit_exceeded(void);
-
-/*
- * operations vector for vnode with retired pages. Only VOP_DISPOSE
- * and VOP_INACTIVE are intercepted.
- */
-struct vnodeops retired_vnodeops = {
-	"retired_vnodeops",
-	fs_nosys,	/* open */
-	fs_nosys,	/* close */
-	fs_nosys,	/* read */
-	fs_nosys,	/* write */
-	fs_nosys,	/* ioctl */
-	fs_nosys,	/* setfl */
-	fs_nosys,	/* getattr */
-	fs_nosys,	/* setattr */
-	fs_nosys,	/* access */
-	fs_nosys,	/* lookup */
-	fs_nosys,	/* create */
-	fs_nosys,	/* remove */
-	fs_nosys,	/* link */
-	fs_nosys,	/* rename */
-	fs_nosys,	/* mkdir */
-	fs_nosys,	/* rmdir */
-	fs_nosys,	/* readdir */
-	fs_nosys,	/* symlink */
-	fs_nosys,	/* readlink */
-	fs_nosys,	/* fsync */
-	retired_inactive,
-	fs_nosys,	/* fid */
-	fs_rwlock,	/* rwlock */
-	fs_rwunlock,	/* rwunlock */
-	fs_nosys,	/* seek */
-	fs_nosys,	/* cmp */
-	fs_nosys,	/* frlock */
-	fs_nosys,	/* space */
-	fs_nosys,	/* realvp */
-	fs_nosys,	/* getpage */
-	fs_nosys,	/* putpage */
-	fs_nosys_map,
-	fs_nosys_addmap,
-	fs_nosys,	/* delmap */
-	fs_nosys_poll,
-	fs_nosys,	/* dump */
-	fs_nosys,	/* l_pathconf */
-	fs_nosys,	/* pageio */
-	fs_nosys,	/* dumpctl */
-	retired_dispose,
-	fs_nosys,	/* setsecattr */
-	fs_nosys,	/* getsecatt */
-	fs_nosys,	/* shrlock */
-	fs_vnevent_nosupport	/* vnevent */
-};
-
-/*
  * freemem_lock protects all freemem variables:
  * availrmem. Also this lock protects the globals which track the
  * availrmem changes for accurate kernel footprint calculation.
@@ -289,15 +205,6 @@ static kcondvar_t	pcgs_cv;	/* cv for delay in pcgs */
 #define	PAGE_LOCK_MAXIMUM \
 	((1 << (sizeof (((page_t *)0)->p_lckcnt) * NBBY)) - 1)
 
-/*
- * Control over the verbosity of page retirement.  When set to zero, no messages
- * will be printed.  A value of one will trigger messages for retirement
- * operations, and is intended for processors which don't yet support FMA
- * (spitfire).  Two will cause verbose messages to be printed when retirements
- * complete, and is intended only for debugging purposes.
- */
-int page_retire_messages = 0;
-
 #ifdef VM_STATS
 
 /*
@@ -440,11 +347,7 @@ vm_init(void)
 
 	(void) callb_add(callb_vm_cpr, 0, CB_CL_CPR_VM, "vm");
 	page_init_mem_config();
-
-	/*
-	 * initialise the vnode for retired pages
-	 */
-	page_retired_init();
+	page_retire_init();
 }
 
 /*
@@ -2799,153 +2702,6 @@ page_free(page_t *pp, int dontneed)
 	ASSERT((PAGE_EXCL(pp) &&
 	    !page_iolock_assert(pp)) || panicstr);
 
-	if (page_deteriorating(pp)) {
-		volatile int i = 0;
-		char *kaddr;
-		volatile int rb, wb;
-		uint64_t pa;
-		volatile int ue = 0;
-		on_trap_data_t otd;
-
-		if (pp->p_vnode != NULL) {
-			/*
-			 * Let page_destroy() do its bean counting and
-			 * hash out the page; it will then call back
-			 * into page_free() with pp->p_vnode == NULL.
-			 */
-			page_destroy(pp, 0);
-			return;
-		}
-
-		if (page_isfailing(pp)) {
-			/*
-			 * If we have already exceeded the limit for
-			 * pages retired, we will treat this page as
-			 * 'toxic' rather than failing. That will ensure
-			 * that the page is at least cleaned, and if
-			 * a UE is detected, the page will be retired
-			 * anyway.
-			 */
-			if (pages_retired_limit_exceeded()) {
-				/*
-				 * clear the flag and reset to toxic
-				 */
-				page_clrtoxic(pp);
-				page_settoxic(pp, PAGE_IS_TOXIC);
-			} else {
-				pa = ptob((uint64_t)page_pptonum(pp));
-				if (page_retire_messages) {
-					cmn_err(CE_NOTE, "Page 0x%08x.%08x "
-					    "removed from service",
-					    (uint32_t)(pa >> 32), (uint32_t)pa);
-				}
-				goto page_failed;
-			}
-		}
-
-		pagescrub(pp, 0, PAGESIZE);
-
-		/*
-		 * We want to determine whether the error that occurred on
-		 * this page is transient or persistent, so we get a mapping
-		 * to the page and try every possible bit pattern to compare
-		 * what we write with what we read back.  A smaller number
-		 * of bit patterns might suffice, but there's no point in
-		 * getting fancy.  If this is the hot path on your system,
-		 * you've got bigger problems.
-		 */
-		kaddr = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1);
-		for (wb = 0xff; wb >= 0; wb--) {
-			if (on_trap(&otd, OT_DATA_EC)) {
-				pa = ptob((uint64_t)page_pptonum(pp)) + i;
-				page_settoxic(pp, PAGE_IS_FAILING);
-
-				if (page_retire_messages) {
-					cmn_err(CE_WARN, "Uncorrectable Error "
-					    "occurred at PA 0x%08x.%08x while "
-					    "attempting to clear previously "
-					    "reported error; page removed from "
-					    "service", (uint32_t)(pa >> 32),
-					    (uint32_t)pa);
-				}
-
-				ue++;
-				break;
-			}
-
-			/*
-			 * Write out the bit pattern, flush it to memory, and
-			 * read it back while under on_trap() protection.
-			 */
-			for (i = 0; i < PAGESIZE; i++)
-				kaddr[i] = wb;
-
-			sync_data_memory(kaddr, PAGESIZE);
-
-			for (i = 0; i < PAGESIZE; i++) {
-				if ((rb = (uchar_t)kaddr[i]) != wb) {
-					page_settoxic(pp, PAGE_IS_FAILING);
-					goto out;
-				}
-			}
-		}
-out:
-		no_trap();
-		ppmapout(kaddr);
-
-		if (wb >= 0 && !ue) {
-			pa = ptob((uint64_t)page_pptonum(pp)) + i;
-			if (page_retire_messages) {
-				cmn_err(CE_WARN, "Data Mismatch occurred at PA "
-				    "0x%08x.%08x [ 0x%x != 0x%x ] while "
-				    "attempting to clear previously reported "
-				    "error; page removed from service",
-				    (uint32_t)(pa >> 32), (uint32_t)pa, rb, wb);
-			}
-		}
-page_failed:
-		/*
-		 * DR operations change the association between a page_t
-		 * and the physical page it represents. Check if the
-		 * page is still bad. If it is, then retire it.
-		 */
-		if (page_isfaulty(pp) && page_isfailing(pp)) {
-			/*
-			 * In the future, it might be useful to have a platform
-			 * callback here to tell the hardware to fence off this
-			 * page during the next reboot.
-			 *
-			 * We move the page to the retired_vnode here
-			 */
-			(void) page_hashin(pp, &retired_ppages,
-			    (u_offset_t)ptob((uint64_t)page_pptonum(pp)), NULL);
-			mutex_enter(&freemem_lock);
-			availrmem--;
-			mutex_exit(&freemem_lock);
-			page_retired(pp);
-			page_downgrade(pp);
-
-			/*
-			 * If DR raced with the above page retirement code,
-			 * we might have retired a good page. If so, unretire
-			 * the page.
-			 */
-			if (!page_isfaulty(pp))
-				page_unretire_pages();
-			return;
-		}
-
-		pa = ptob((uint64_t)page_pptonum(pp));
-
-		if (page_retire_messages) {
-			cmn_err(CE_NOTE, "Previously reported error on page "
-			    "0x%08x.%08x cleared", (uint32_t)(pa >> 32),
-			    (uint32_t)pa);
-		}
-
-		page_clrtoxic(pp);
-	}
-
 	if (PP_ISFREE(pp)) {
 		panic("page_free: page %p is free", (void *)pp);
 	}
@@ -3089,7 +2845,6 @@ page_free_pages(page_t *pp)
 	pgcnt_t	pgcnt = page_get_pagecnt(pp->p_szc);
 	pgcnt_t	i;
 	uint_t	szc = pp->p_szc;
-	int	toxic = 0;
 
 	VM_STAT_ADD(pagecnt.pc_free_pages);
 	TRACE_1(TR_FAC_VM, TR_PAGE_FREE_FREE,
@@ -3118,9 +2873,6 @@ page_free_pages(page_t *pp)
 		ASSERT(tpp->p_vnode == NULL);
 		ASSERT(tpp->p_szc == szc);
 
-		if (page_deteriorating(tpp))
-			toxic = 1;
-
 		PP_SETFREE(tpp);
 		page_clr_all_props(tpp);
 		PP_SETAGED(tpp);
@@ -3131,10 +2883,6 @@ page_free_pages(page_t *pp)
 	}
 	ASSERT(rootpp == pp);
 
-	if (toxic) {
-		page_free_toxic_pages(rootpp);
-		return;
-	}
 	page_list_add_pages(rootpp, 0);
 	page_create_putback(pgcnt);
 }
@@ -3219,12 +2967,13 @@ page_reclaim(page_t *pp, kmutex_t *lock)
 	struct pcf	*p;
 	uint_t		pcf_index;
 	struct cpu	*cpup;
-	int		enough;
 	uint_t		i;
+	pgcnt_t		npgs, need, collected;
 
 	ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1);
 	ASSERT(PAGE_EXCL(pp) && PP_ISFREE(pp));
-	ASSERT(pp->p_szc == 0);
+
+	npgs = page_get_pagecnt(pp->p_szc);
 
 	/*
 	 * If `freemem' is 0, we cannot reclaim this page from the
@@ -3254,18 +3003,19 @@ page_reclaim(page_t *pp, kmutex_t *lock)
 		goto page_reclaim_nomem;
 	}
 
-	enough = 0;
+	collected = 0;
 	pcf_index = PCF_INDEX();
 	p = &pcf[pcf_index];
 	p->pcf_touch = 1;
 	mutex_enter(&p->pcf_lock);
-	if (p->pcf_count >= 1) {
-		enough = 1;
-		p->pcf_count--;
+	if (p->pcf_count >= npgs) {
+		collected = npgs;
+		p->pcf_count -= npgs;
 	}
 	mutex_exit(&p->pcf_lock);
+	need = npgs - collected;
 
-	if (!enough) {
+	if (need > 0) {
 		VM_STAT_ADD(page_reclaim_zero);
 		/*
 		 * Check again. Its possible that some other thread
@@ -3277,15 +3027,22 @@ page_reclaim(page_t *pp, kmutex_t *lock)
 		for (i = 0; i < PCF_FANOUT; i++) {
 			p->pcf_touch = 1;
 			mutex_enter(&p->pcf_lock);
-			if (p->pcf_count >= 1) {
-				p->pcf_count -= 1;
-				enough = 1;
-				break;
+			if (p->pcf_count) {
+				if (p->pcf_count >= need) {
+					p->pcf_count -= need;
+					collected += need;
+					need = 0;
+					break;
+				} else if (p->pcf_count) {
+					collected += p->pcf_count;
+					need -= p->pcf_count;
+					p->pcf_count = 0;
+				}
 			}
 			p++;
 		}
 
-		if (!enough) {
+		if (need > 0) {
 page_reclaim_nomem:
 			/*
 			 * We really can't have page `pp'.
@@ -3309,6 +3066,7 @@ page_reclaim_nomem:
 			mutex_enter(&new_freemem_lock);
 
 			p = pcf;
+			p->pcf_count += collected;
 			for (i = 0; i < PCF_FANOUT; i++) {
 				p->pcf_wait++;
 				mutex_exit(&p->pcf_lock);
@@ -3328,11 +3086,13 @@ page_reclaim_nomem:
 		}
 
 		/*
-		 * There was a page to be found.
+		 * We beat the PCF bins over the head until
+		 * we got the memory that we wanted.
 		 * The pcf accounting has been done,
 		 * though none of the pcf_wait flags have been set,
 		 * drop the locks and continue on.
 		 */
+		ASSERT(collected == npgs);
 		while (p >= pcf) {
 			mutex_exit(&p->pcf_lock);
 			p--;
@@ -3343,14 +3103,19 @@ page_reclaim_nomem:
 	 * freemem is not protected by any lock. Thus, we cannot
 	 * have any assertion containing freemem here.
 	 */
-	freemem -= 1;
+	freemem -= npgs;
 
 	VM_STAT_ADD(pagecnt.pc_reclaim);
 	if (PP_ISAGED(pp)) {
-		page_list_sub(pp, PG_FREE_LIST);
+		if (npgs > 1) {
+			page_list_sub_pages(pp, pp->p_szc);
+		} else {
+			page_list_sub(pp, PG_FREE_LIST);
+		}
 		TRACE_1(TR_FAC_VM, TR_PAGE_UNFREE_FREE,
 		    "page_reclaim_free:pp %p", pp);
 	} else {
+		ASSERT(npgs == 1);
 		page_list_sub(pp, PG_CACHE_LIST);
 		TRACE_1(TR_FAC_VM, TR_PAGE_UNFREE_CACHE,
 		    "page_reclaim_cache:pp %p", pp);
@@ -3363,9 +3128,11 @@ page_reclaim_nomem:
 	 *
 	 * Set the reference bit to protect against immediate pageout.
 	 */
-	PP_CLRFREE(pp);
-	PP_CLRAGED(pp);
-	page_set_props(pp, P_REF);
+	for (i = 0; i < npgs; i++, pp = page_next(pp)) {
+		PP_CLRFREE(pp);
+		PP_CLRAGED(pp);
+		page_set_props(pp, P_REF);
+	}
 
 	CPU_STATS_ENTER_K();
 	cpup = CPU;	/* get cpup now that CPU cannot change */
@@ -3441,7 +3208,6 @@ page_destroy_pages(page_t *pp)
 	pgcnt_t	pgcnt = page_get_pagecnt(pp->p_szc);
 	pgcnt_t	i, pglcks = 0;
 	uint_t	szc = pp->p_szc;
-	int	toxic = 0;
 
 	ASSERT(pp->p_szc != 0 && pp->p_szc < page_num_pagesizes());
 
@@ -3471,9 +3237,6 @@ page_destroy_pages(page_t *pp)
 		ASSERT(tpp->p_vnode == NULL);
 		ASSERT(tpp->p_szc == szc);
 
-		if (page_deteriorating(tpp))
-			toxic = 1;
-
 		PP_SETFREE(tpp);
 		page_clr_all_props(tpp);
 		PP_SETAGED(tpp);
@@ -3489,10 +3252,6 @@ page_destroy_pages(page_t *pp)
 		mutex_exit(&freemem_lock);
 	}
 
-	if (toxic) {
-		page_free_toxic_pages(rootpp);
-		return;
-	}
 	page_list_add_pages(rootpp, 0);
 	page_create_putback(pgcnt);
 }
@@ -3916,14 +3675,6 @@ page_hashout(page_t *pp, kmutex_t *phm)
 		mutex_exit(nphm);
 
 	/*
-	 * If the page was retired, update the pages_retired
-	 * total and clear the page flag
-	 */
-	if (page_isretired(pp)) {
-		retired_page_removed(pp);
-	}
-
-	/*
 	 * Wake up processes waiting for this page.  The page's
 	 * identity has been changed, and is probably not the
 	 * desired page any longer.
@@ -5397,6 +5148,63 @@ page_release(page_t *pp, int checkmod)
 	return (status);
 }
 
+/*
+ * Given a constituent page, try to demote the large page on the freelist.
+ *
+ * Returns nonzero if the page could be demoted successfully. Returns with
+ * the constituent page still locked.
+ */
+int
+page_try_demote_free_pages(page_t *pp)
+{
+	page_t *rootpp = pp;
+	pfn_t	pfn = page_pptonum(pp);
+	spgcnt_t npgs;
+	uint_t	szc = pp->p_szc;
+
+	ASSERT(PP_ISFREE(pp));
+	ASSERT(PAGE_EXCL(pp));
+
+	/*
+	 * Adjust rootpp and lock it, if `pp' is not the base
+	 * constituent page.
+	 */
+	npgs = page_get_pagecnt(pp->p_szc);
+	if (npgs == 1) {
+		return (0);
+	}
+
+	if (!IS_P2ALIGNED(pfn, npgs)) {
+		pfn = P2ALIGN(pfn, npgs);
+		rootpp = page_numtopp_nolock(pfn);
+	}
+
+	if (pp != rootpp && !page_trylock(rootpp, SE_EXCL)) {
+		return (0);
+	}
+
+	if (rootpp->p_szc != szc) {
+		if (pp != rootpp)
+			page_unlock(rootpp);
+		return (0);
+	}
+
+	page_demote_free_pages(rootpp);
+
+	if (pp != rootpp)
+		page_unlock(rootpp);
+
+	ASSERT(PP_ISFREE(pp));
+	ASSERT(PAGE_EXCL(pp));
+	return (1);
+}
+
+/*
+ * Given a constituent page, try to demote the large page.
+ *
+ * Returns nonzero if the page could be demoted successfully. Returns with
+ * the constituent page still locked.
+ */
 int
 page_try_demote_pages(page_t *pp)
 {
@@ -5406,27 +5214,27 @@ page_try_demote_pages(page_t *pp)
 	uint_t	szc = pp->p_szc;
 	vnode_t *vp = pp->p_vnode;
 
-	ASSERT(PAGE_EXCL(rootpp));
+	ASSERT(PAGE_EXCL(pp));
 
 	VM_STAT_ADD(pagecnt.pc_try_demote_pages[0]);
 
-	if (rootpp->p_szc == 0) {
+	if (pp->p_szc == 0) {
 		VM_STAT_ADD(pagecnt.pc_try_demote_pages[1]);
 		return (1);
 	}
 
 	if (vp != NULL && !IS_SWAPFSVP(vp) && vp != &kvp) {
 		VM_STAT_ADD(pagecnt.pc_try_demote_pages[2]);
-		page_demote_vp_pages(rootpp);
+		page_demote_vp_pages(pp);
 		ASSERT(pp->p_szc == 0);
 		return (1);
 	}
 
 	/*
-	 * Adjust rootpp if  passed in is not the base
+	 * Adjust rootpp if passed in is not the base
 	 * constituent page.
 	 */
-	npgs = page_get_pagecnt(rootpp->p_szc);
+	npgs = page_get_pagecnt(pp->p_szc);
 	ASSERT(npgs > 1);
 	if (!IS_P2ALIGNED(pfn, npgs)) {
 		pfn = P2ALIGN(pfn, npgs);
@@ -5455,12 +5263,11 @@ page_try_demote_pages(page_t *pp)
 			break;
 		ASSERT(tpp->p_szc == rootpp->p_szc);
 		ASSERT(page_pptonum(tpp) == page_pptonum(rootpp) + i);
-		(void) hat_pageunload(tpp, HAT_FORCE_PGUNLOAD);
 	}
 
 	/*
-	 * If we failed to lock them all then unlock what we have locked
-	 * so far and bail.
+	 * If we failed to lock them all then unlock what we have
+	 * locked so far and bail.
 	 */
 	if (i < npgs) {
 		tpp = rootpp;
@@ -5473,12 +5280,9 @@ page_try_demote_pages(page_t *pp)
 		return (0);
 	}
 
-	/*
-	 * XXX probably p_szc clearing and page unlocking can be done within
-	 * one loop but since this is rare code we can play very safe.
-	 */
 	for (tpp = rootpp, i = 0; i < npgs; i++, tpp++) {
 		ASSERT(PAGE_EXCL(tpp));
+		(void) hat_pageunload(tpp, HAT_FORCE_PGUNLOAD);
 		tpp->p_szc = 0;
 	}
 
@@ -5490,6 +5294,7 @@ page_try_demote_pages(page_t *pp)
 		if (tpp != pp)
 			page_unlock(tpp);
 	}
+
 	VM_STAT_ADD(pagecnt.pc_try_demote_pages[5]);
 	return (1);
 }
@@ -5579,221 +5384,6 @@ page_demote_vp_pages(page_t *pp)
 }
 
 /*
- * Page retire operation.
- *
- * page_retire()
- * Attempt to retire (throw away) page pp.  We cannot do this if
- * the page is dirty; if the page is clean, we can try.  We return 0 on
- * success, -1 on failure.  This routine should be invoked by the platform's
- * memory error detection code.
- *
- * pages_retired_limit_exceeded()
- * We set a limit on the number of pages which may be retired. This
- * is set to a percentage of total physical memory. This limit is
- * enforced here.
- */
-
-static pgcnt_t	retired_pgcnt = 0;
-
-/*
- * routines to update the count of retired pages
- */
-static void
-page_retired(page_t *pp)
-{
-	ASSERT(pp);
-
-	page_settoxic(pp, PAGE_IS_RETIRED);
-	atomic_add_long(&retired_pgcnt, 1);
-}
-
-static void
-retired_page_removed(page_t *pp)
-{
-	ASSERT(pp);
-	ASSERT(page_isretired(pp));
-	ASSERT(retired_pgcnt > 0);
-
-	page_clrtoxic(pp);
-	atomic_add_long(&retired_pgcnt, -1);
-}
-
-
-static int
-pages_retired_limit_exceeded()
-{
-	pgcnt_t	retired_max;
-
-	/*
-	 * If the percentage is zero or is not set correctly,
-	 * return TRUE so that pages are not retired.
-	 */
-	if (max_pages_retired_bps <= 0 ||
-	    max_pages_retired_bps >= 10000)
-		return (1);
-
-	/*
-	 * Calculate the maximum number of pages allowed to
-	 * be retired as a percentage of total physical memory
-	 * (Remember that we are using basis points, hence the 10000.)
-	 */
-	retired_max = (physmem * max_pages_retired_bps) / 10000;
-
-	/*
-	 * return 'TRUE' if we have already retired more
-	 * than the legal limit
-	 */
-	return (retired_pgcnt >= retired_max);
-}
-
-#define	PAGE_RETIRE_SELOCK	0
-#define	PAGE_RETIRE_NORECLAIM	1
-#define	PAGE_RETIRE_LOCKED	2
-#define	PAGE_RETIRE_COW		3
-#define	PAGE_RETIRE_DIRTY	4
-#define	PAGE_RETIRE_LPAGE	5
-#define	PAGE_RETIRE_SUCCESS	6
-#define	PAGE_RETIRE_LIMIT	7
-#define	PAGE_RETIRE_NCODES	8
-
-typedef struct page_retire_op {
-	int	pr_count;
-	short	pr_unlock;
-	short	pr_retval;
-	char	*pr_message;
-} page_retire_op_t;
-
-page_retire_op_t page_retire_ops[PAGE_RETIRE_NCODES] = {
-	{	0,	0,	-1,	"cannot lock page"		},
-	{	0,	0,	-1,	"cannot reclaim cached page"	},
-	{	0,	1,	-1,	"page is locked"		},
-	{	0,	1,	-1,	"copy-on-write page"		},
-	{	0,	1,	-1,	"page is dirty"			},
-	{	0,	1,	-1,	"cannot demote large page"	},
-	{	0,	0,	0,	"page successfully retired"	},
-	{	0,	0,	-1,	"excess pages retired already"	},
-};
-
-static int
-page_retire_done(page_t *pp, int code)
-{
-	page_retire_op_t *prop = &page_retire_ops[code];
-
-	prop->pr_count++;
-
-	if (prop->pr_unlock)
-		page_unlock(pp);
-
-	if (page_retire_messages > 1) {
-		printf("page_retire(%p) pfn 0x%lx %s: %s\n",
-		    (void *)pp, page_pptonum(pp),
-		    prop->pr_retval == -1 ? "failed" : "succeeded",
-		    prop->pr_message);
-	}
-
-	return (prop->pr_retval);
-}
-
-int
-page_retire(page_t *pp, uchar_t flag)
-{
-	uint64_t pa = ptob((uint64_t)page_pptonum(pp));
-
-	ASSERT(flag == PAGE_IS_FAILING || flag == PAGE_IS_TOXIC);
-
-	/*
-	 * DR operations change the association between a page_t
-	 * and the physical page it represents. Check if the
-	 * page is still bad.
-	 */
-	if (!page_isfaulty(pp)) {
-		page_clrtoxic(pp);
-		return (page_retire_done(pp, PAGE_RETIRE_SUCCESS));
-	}
-
-	/*
-	 * We set the flag here so that even if we fail due
-	 * to exceeding the limit for retired pages, the
-	 * page will still be checked and either cleared
-	 * or retired in page_free().
-	 */
-	page_settoxic(pp, flag);
-
-	if (flag == PAGE_IS_TOXIC) {
-		if (page_retire_messages) {
-			cmn_err(CE_NOTE, "Scheduling clearing of error on"
-			    " page 0x%08x.%08x",
-			    (uint32_t)(pa >> 32), (uint32_t)pa);
-		}
-
-	} else { /* PAGE_IS_FAILING */
-		if (pages_retired_limit_exceeded()) {
-			/*
-			 * Return as we have already exceeded the
-			 * maximum number of pages allowed to be
-			 * retired
-			 */
-			return (page_retire_done(pp, PAGE_RETIRE_LIMIT));
-		}
-
-		if (page_retire_messages) {
-			cmn_err(CE_NOTE, "Scheduling removal of "
-			    "page 0x%08x.%08x",
-			    (uint32_t)(pa >> 32), (uint32_t)pa);
-		}
-	}
-
-	if (PAGE_LOCKED(pp) || !page_trylock(pp, SE_EXCL))
-		return (page_retire_done(pp, PAGE_RETIRE_SELOCK));
-
-	/*
-	 * If this is a large page we first try and demote it
-	 * to PAGESIZE pages and then dispose of the toxic page.
-	 * On failure we will let the page free/destroy
-	 * code handle it later since this is a mapped page.
-	 * Note that free large pages can always be demoted.
-	 *
-	 */
-	if (pp->p_szc != 0) {
-		if (PP_ISFREE(pp))
-			(void) page_demote_free_pages(pp);
-		else
-			(void) page_try_demote_pages(pp);
-
-		if (pp->p_szc != 0)
-			return (page_retire_done(pp, PAGE_RETIRE_LPAGE));
-	}
-
-	if (PP_ISFREE(pp)) {
-		if (!page_reclaim(pp, NULL))
-			return (page_retire_done(pp, PAGE_RETIRE_NORECLAIM));
-		/*LINTED: constant in conditional context*/
-		VN_DISPOSE(pp, pp->p_vnode ? B_INVAL : B_FREE, 0, kcred)
-		return (page_retire_done(pp, PAGE_RETIRE_SUCCESS));
-	}
-
-	if (pp->p_lckcnt != 0)
-		return (page_retire_done(pp, PAGE_RETIRE_LOCKED));
-
-	if (pp->p_cowcnt != 0)
-		return (page_retire_done(pp, PAGE_RETIRE_COW));
-
-	/*
-	 * Unload all translations to this page.  No new translations
-	 * can be created while we hold the exclusive lock on the page.
-	 */
-	(void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
-
-	if (hat_ismod(pp))
-		return (page_retire_done(pp, PAGE_RETIRE_DIRTY));
-
-	/*LINTED: constant in conditional context*/
-	VN_DISPOSE(pp, B_INVAL, 0, kcred);
-
-	return (page_retire_done(pp, PAGE_RETIRE_SUCCESS));
-}
-
-/*
  * Mark any existing pages for migration in the given range
  */
 void
@@ -6128,140 +5718,6 @@ next:
 	}
 }
 
-/*
- * initialize the vnode for retired pages
- */
-static void
-page_retired_init(void)
-{
-	vn_setops(&retired_ppages, &retired_vnodeops);
-}
-
-/* ARGSUSED */
-static void
-retired_dispose(vnode_t *vp, page_t *pp, int flag, int dn, cred_t *cr)
-{
-	panic("retired_dispose invoked");
-}
-
-/* ARGSUSED */
-static void
-retired_inactive(vnode_t *vp, cred_t *cr)
-{}
-
-void
-page_unretire_pages(void)
-{
-	page_t		*pp;
-	kmutex_t	*vphm;
-	vnode_t		*vp;
-	page_t		*rpages[UNRETIRE_PAGES];
-	pgcnt_t		i, npages, rmem;
-	uint64_t	pa;
-
-	rmem = 0;
-
-	for (;;) {
-		/*
-		 * We do this in 2 steps:
-		 *
-		 * 1. We walk the retired pages list and collect a list of
-		 *    pages that have the toxic field cleared.
-		 *
-		 * 2. We iterate through the page list and unretire each one.
-		 *
-		 * We have to do it in two steps on account of the mutexes that
-		 * we need to acquire.
-		 */
-
-		vp = &retired_ppages;
-		vphm = page_vnode_mutex(vp);
-		mutex_enter(vphm);
-
-		if ((pp = vp->v_pages) == NULL) {
-			mutex_exit(vphm);
-			break;
-		}
-
-		i = 0;
-		do {
-			ASSERT(pp != NULL);
-			ASSERT(pp->p_vnode == vp);
-
-			/*
-			 * DR operations change the association between a page_t
-			 * and the physical page it represents. Check if the
-			 * page is still bad. If not, unretire it.
-			 */
-			if (!page_isfaulty(pp))
-				rpages[i++] = pp;
-
-			pp = pp->p_vpnext;
-		} while ((pp != vp->v_pages) && (i < UNRETIRE_PAGES));
-
-		mutex_exit(vphm);
-
-		npages = i;
-		for (i = 0; i < npages; i++) {
-			pp = rpages[i];
-			pa = ptob((uint64_t)page_pptonum(pp));
-
-			/*
-			 * Need to upgrade the shared lock to an exclusive
-			 * lock in order to hash out the page.
-			 *
-			 * The page could have been retired but the page lock
-			 * may not have been downgraded yet. If so, skip this
-			 * page. page_free() will call this function after the
-			 * lock is downgraded.
-			 */
-
-			if (!PAGE_SHARED(pp) || !page_tryupgrade(pp))
-				continue;
-
-			/*
-			 * Both page_free() and DR call this function. They
-			 * can potentially call this function at the same
-			 * time and race with each other.
-			 */
-			if (!page_isretired(pp) || page_isfaulty(pp)) {
-				page_downgrade(pp);
-				continue;
-			}
-
-			cmn_err(CE_NOTE,
-				"unretiring retired page 0x%08x.%08x",
-				(uint32_t)(pa >> 32), (uint32_t)pa);
-
-			/*
-			 * When a page is removed from the retired pages vnode,
-			 * its toxic field is also cleared. So, we do not have
-			 * to do that seperately here.
-			 */
-			page_hashout(pp, (kmutex_t *)NULL);
-
-			/*
-			 * This is a good page. So, free it.
-			 */
-			pp->p_vnode = NULL;
-			page_free(pp, 1);
-			rmem++;
-		}
-
-		/*
-		 * If the rpages array was filled up, then there could be more
-		 * retired pages that are not faulty. We need to iterate
-		 * again and unretire them. Otherwise, we are done.
-		 */
-		if (npages < UNRETIRE_PAGES)
-			break;
-	}
-
-	mutex_enter(&freemem_lock);
-	availrmem += rmem;
-	mutex_exit(&freemem_lock);
-}
-
 ulong_t mem_waiters 	= 0;
 ulong_t	max_count 	= 20;
 #define	MAX_DELAY	0x1ff
@@ -6621,90 +6077,48 @@ page_clr_all_props(page_t *pp)
 }
 
 /*
- * The following functions is called from free_vp_pages()
- * for an inexact estimate of a newly free'd page...
+ * Clear p_lckcnt and p_cowcnt, adjusting freemem if required.
  */
-ulong_t
-page_share_cnt(page_t *pp)
-{
-	return (hat_page_getshare(pp));
-}
-
-/*
- * The following functions are used in handling memory
- * errors.
- */
-
-int
-page_istoxic(page_t *pp)
-{
-	return ((pp->p_toxic & PAGE_IS_TOXIC) == PAGE_IS_TOXIC);
-}
-
-int
-page_isfailing(page_t *pp)
-{
-	return ((pp->p_toxic & PAGE_IS_FAILING) == PAGE_IS_FAILING);
-}
-
-int
-page_isretired(page_t *pp)
-{
-	return ((pp->p_toxic & PAGE_IS_RETIRED) == PAGE_IS_RETIRED);
-}
-
 int
-page_deteriorating(page_t *pp)
+page_clear_lck_cow(page_t *pp, int adjust)
 {
-	return ((pp->p_toxic & (PAGE_IS_TOXIC | PAGE_IS_FAILING)) != 0);
-}
+	int	f_amount;
 
-void
-page_settoxic(page_t *pp, uchar_t flag)
-{
-	uchar_t new_flag = 0;
-	while ((new_flag & flag) != flag) {
-		uchar_t old_flag = pp->p_toxic;
-		new_flag = old_flag | flag;
-		(void) cas8(&pp->p_toxic, old_flag, new_flag);
-		new_flag = ((volatile page_t *)pp)->p_toxic;
-	}
-}
+	ASSERT(PAGE_EXCL(pp));
 
-void
-page_clrtoxic(page_t *pp)
-{
 	/*
-	 * We don't need to worry about atomicity on the
-	 * p_toxic flag here as this is only called from
-	 * page_free() while holding an exclusive lock on
-	 * the page
+	 * The page_struct_lock need not be acquired here since
+	 * we require the caller hold the page exclusively locked.
 	 */
-	pp->p_toxic = PAGE_IS_OK;
-}
+	f_amount = 0;
+	if (pp->p_lckcnt) {
+		f_amount = 1;
+		pp->p_lckcnt = 0;
+	}
+	if (pp->p_cowcnt) {
+		f_amount += pp->p_cowcnt;
+		pp->p_cowcnt = 0;
+	}
 
-void
-page_clrtoxic_flag(page_t *pp, uchar_t flag)
-{
-	uchar_t new_flag = ((volatile page_t *)pp)->p_toxic;
-	while ((new_flag & flag) == flag) {
-		uchar_t old_flag = new_flag;
-		new_flag = old_flag & ~flag;
-		(void) cas8(&pp->p_toxic, old_flag, new_flag);
-		new_flag = ((volatile page_t *)pp)->p_toxic;
+	if (adjust && f_amount) {
+		mutex_enter(&freemem_lock);
+		availrmem += f_amount;
+		mutex_exit(&freemem_lock);
 	}
-}
 
-int
-page_isfaulty(page_t *pp)
-{
-	return ((pp->p_toxic & PAGE_IS_FAULTY) == PAGE_IS_FAULTY);
+	return (f_amount);
 }
 
 /*
- * The following four functions are called from /proc code
- * for the /proc/<pid>/xmap interface.
+ * The following functions is called from free_vp_pages()
+ * for an inexact estimate of a newly free'd page...
  */
+ulong_t
+page_share_cnt(page_t *pp)
+{
+	return (hat_page_getshare(pp));
+}
+
 int
 page_isshared(page_t *pp)
 {