summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/vm/vm_page.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/vm/vm_page.c')
-rw-r--r--usr/src/uts/common/vm/vm_page.c850
1 files changed, 132 insertions, 718 deletions
diff --git a/usr/src/uts/common/vm/vm_page.c b/usr/src/uts/common/vm/vm_page.c
index 5b3db34db1..27b2702d28 100644
--- a/usr/src/uts/common/vm/vm_page.c
+++ b/usr/src/uts/common/vm/vm_page.c
@@ -87,90 +87,6 @@ static pgcnt_t max_page_get; /* max page_get request size in pages */
pgcnt_t total_pages = 0; /* total number of pages (used by /proc) */
/*
- * vnode for all pages which are retired from the VM system;
- * such as pages with Uncorrectable Errors.
- */
-struct vnode retired_ppages;
-
-static void page_retired_init(void);
-static void retired_dispose(vnode_t *vp, page_t *pp, int flag,
- int dn, cred_t *cr);
-static void retired_inactive(vnode_t *vp, cred_t *cr);
-static void page_retired(page_t *pp);
-static void retired_page_removed(page_t *pp);
-void page_unretire_pages(void);
-
-/*
- * The maximum number of pages that will be unretired in one iteration.
- * This number is totally arbitrary.
- */
-#define UNRETIRE_PAGES 256
-
-/*
- * We limit the number of pages that may be retired to
- * a percentage of the total physical memory. Note that
- * the percentage values are stored as 'basis points',
- * ie, 100 basis points is 1%.
- */
-#define MAX_PAGES_RETIRED_BPS_DEFAULT 10 /* .1% */
-
-uint64_t max_pages_retired_bps = MAX_PAGES_RETIRED_BPS_DEFAULT;
-
-static int pages_retired_limit_exceeded(void);
-
-/*
- * operations vector for vnode with retired pages. Only VOP_DISPOSE
- * and VOP_INACTIVE are intercepted.
- */
-struct vnodeops retired_vnodeops = {
- "retired_vnodeops",
- fs_nosys, /* open */
- fs_nosys, /* close */
- fs_nosys, /* read */
- fs_nosys, /* write */
- fs_nosys, /* ioctl */
- fs_nosys, /* setfl */
- fs_nosys, /* getattr */
- fs_nosys, /* setattr */
- fs_nosys, /* access */
- fs_nosys, /* lookup */
- fs_nosys, /* create */
- fs_nosys, /* remove */
- fs_nosys, /* link */
- fs_nosys, /* rename */
- fs_nosys, /* mkdir */
- fs_nosys, /* rmdir */
- fs_nosys, /* readdir */
- fs_nosys, /* symlink */
- fs_nosys, /* readlink */
- fs_nosys, /* fsync */
- retired_inactive,
- fs_nosys, /* fid */
- fs_rwlock, /* rwlock */
- fs_rwunlock, /* rwunlock */
- fs_nosys, /* seek */
- fs_nosys, /* cmp */
- fs_nosys, /* frlock */
- fs_nosys, /* space */
- fs_nosys, /* realvp */
- fs_nosys, /* getpage */
- fs_nosys, /* putpage */
- fs_nosys_map,
- fs_nosys_addmap,
- fs_nosys, /* delmap */
- fs_nosys_poll,
- fs_nosys, /* dump */
- fs_nosys, /* l_pathconf */
- fs_nosys, /* pageio */
- fs_nosys, /* dumpctl */
- retired_dispose,
- fs_nosys, /* setsecattr */
- fs_nosys, /* getsecatt */
- fs_nosys, /* shrlock */
- fs_vnevent_nosupport /* vnevent */
-};
-
-/*
* freemem_lock protects all freemem variables:
* availrmem. Also this lock protects the globals which track the
* availrmem changes for accurate kernel footprint calculation.
@@ -289,15 +205,6 @@ static kcondvar_t pcgs_cv; /* cv for delay in pcgs */
#define PAGE_LOCK_MAXIMUM \
((1 << (sizeof (((page_t *)0)->p_lckcnt) * NBBY)) - 1)
-/*
- * Control over the verbosity of page retirement. When set to zero, no messages
- * will be printed. A value of one will trigger messages for retirement
- * operations, and is intended for processors which don't yet support FMA
- * (spitfire). Two will cause verbose messages to be printed when retirements
- * complete, and is intended only for debugging purposes.
- */
-int page_retire_messages = 0;
-
#ifdef VM_STATS
/*
@@ -440,11 +347,7 @@ vm_init(void)
(void) callb_add(callb_vm_cpr, 0, CB_CL_CPR_VM, "vm");
page_init_mem_config();
-
- /*
- * initialise the vnode for retired pages
- */
- page_retired_init();
+ page_retire_init();
}
/*
@@ -2799,153 +2702,6 @@ page_free(page_t *pp, int dontneed)
ASSERT((PAGE_EXCL(pp) &&
!page_iolock_assert(pp)) || panicstr);
- if (page_deteriorating(pp)) {
- volatile int i = 0;
- char *kaddr;
- volatile int rb, wb;
- uint64_t pa;
- volatile int ue = 0;
- on_trap_data_t otd;
-
- if (pp->p_vnode != NULL) {
- /*
- * Let page_destroy() do its bean counting and
- * hash out the page; it will then call back
- * into page_free() with pp->p_vnode == NULL.
- */
- page_destroy(pp, 0);
- return;
- }
-
- if (page_isfailing(pp)) {
- /*
- * If we have already exceeded the limit for
- * pages retired, we will treat this page as
- * 'toxic' rather than failing. That will ensure
- * that the page is at least cleaned, and if
- * a UE is detected, the page will be retired
- * anyway.
- */
- if (pages_retired_limit_exceeded()) {
- /*
- * clear the flag and reset to toxic
- */
- page_clrtoxic(pp);
- page_settoxic(pp, PAGE_IS_TOXIC);
- } else {
- pa = ptob((uint64_t)page_pptonum(pp));
- if (page_retire_messages) {
- cmn_err(CE_NOTE, "Page 0x%08x.%08x "
- "removed from service",
- (uint32_t)(pa >> 32), (uint32_t)pa);
- }
- goto page_failed;
- }
- }
-
- pagescrub(pp, 0, PAGESIZE);
-
- /*
- * We want to determine whether the error that occurred on
- * this page is transient or persistent, so we get a mapping
- * to the page and try every possible bit pattern to compare
- * what we write with what we read back. A smaller number
- * of bit patterns might suffice, but there's no point in
- * getting fancy. If this is the hot path on your system,
- * you've got bigger problems.
- */
- kaddr = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1);
- for (wb = 0xff; wb >= 0; wb--) {
- if (on_trap(&otd, OT_DATA_EC)) {
- pa = ptob((uint64_t)page_pptonum(pp)) + i;
- page_settoxic(pp, PAGE_IS_FAILING);
-
- if (page_retire_messages) {
- cmn_err(CE_WARN, "Uncorrectable Error "
- "occurred at PA 0x%08x.%08x while "
- "attempting to clear previously "
- "reported error; page removed from "
- "service", (uint32_t)(pa >> 32),
- (uint32_t)pa);
- }
-
- ue++;
- break;
- }
-
- /*
- * Write out the bit pattern, flush it to memory, and
- * read it back while under on_trap() protection.
- */
- for (i = 0; i < PAGESIZE; i++)
- kaddr[i] = wb;
-
- sync_data_memory(kaddr, PAGESIZE);
-
- for (i = 0; i < PAGESIZE; i++) {
- if ((rb = (uchar_t)kaddr[i]) != wb) {
- page_settoxic(pp, PAGE_IS_FAILING);
- goto out;
- }
- }
- }
-out:
- no_trap();
- ppmapout(kaddr);
-
- if (wb >= 0 && !ue) {
- pa = ptob((uint64_t)page_pptonum(pp)) + i;
- if (page_retire_messages) {
- cmn_err(CE_WARN, "Data Mismatch occurred at PA "
- "0x%08x.%08x [ 0x%x != 0x%x ] while "
- "attempting to clear previously reported "
- "error; page removed from service",
- (uint32_t)(pa >> 32), (uint32_t)pa, rb, wb);
- }
- }
-page_failed:
- /*
- * DR operations change the association between a page_t
- * and the physical page it represents. Check if the
- * page is still bad. If it is, then retire it.
- */
- if (page_isfaulty(pp) && page_isfailing(pp)) {
- /*
- * In the future, it might be useful to have a platform
- * callback here to tell the hardware to fence off this
- * page during the next reboot.
- *
- * We move the page to the retired_vnode here
- */
- (void) page_hashin(pp, &retired_ppages,
- (u_offset_t)ptob((uint64_t)page_pptonum(pp)), NULL);
- mutex_enter(&freemem_lock);
- availrmem--;
- mutex_exit(&freemem_lock);
- page_retired(pp);
- page_downgrade(pp);
-
- /*
- * If DR raced with the above page retirement code,
- * we might have retired a good page. If so, unretire
- * the page.
- */
- if (!page_isfaulty(pp))
- page_unretire_pages();
- return;
- }
-
- pa = ptob((uint64_t)page_pptonum(pp));
-
- if (page_retire_messages) {
- cmn_err(CE_NOTE, "Previously reported error on page "
- "0x%08x.%08x cleared", (uint32_t)(pa >> 32),
- (uint32_t)pa);
- }
-
- page_clrtoxic(pp);
- }
-
if (PP_ISFREE(pp)) {
panic("page_free: page %p is free", (void *)pp);
}
@@ -3089,7 +2845,6 @@ page_free_pages(page_t *pp)
pgcnt_t pgcnt = page_get_pagecnt(pp->p_szc);
pgcnt_t i;
uint_t szc = pp->p_szc;
- int toxic = 0;
VM_STAT_ADD(pagecnt.pc_free_pages);
TRACE_1(TR_FAC_VM, TR_PAGE_FREE_FREE,
@@ -3118,9 +2873,6 @@ page_free_pages(page_t *pp)
ASSERT(tpp->p_vnode == NULL);
ASSERT(tpp->p_szc == szc);
- if (page_deteriorating(tpp))
- toxic = 1;
-
PP_SETFREE(tpp);
page_clr_all_props(tpp);
PP_SETAGED(tpp);
@@ -3131,10 +2883,6 @@ page_free_pages(page_t *pp)
}
ASSERT(rootpp == pp);
- if (toxic) {
- page_free_toxic_pages(rootpp);
- return;
- }
page_list_add_pages(rootpp, 0);
page_create_putback(pgcnt);
}
@@ -3219,12 +2967,13 @@ page_reclaim(page_t *pp, kmutex_t *lock)
struct pcf *p;
uint_t pcf_index;
struct cpu *cpup;
- int enough;
uint_t i;
+ pgcnt_t npgs, need, collected;
ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1);
ASSERT(PAGE_EXCL(pp) && PP_ISFREE(pp));
- ASSERT(pp->p_szc == 0);
+
+ npgs = page_get_pagecnt(pp->p_szc);
/*
* If `freemem' is 0, we cannot reclaim this page from the
@@ -3254,18 +3003,19 @@ page_reclaim(page_t *pp, kmutex_t *lock)
goto page_reclaim_nomem;
}
- enough = 0;
+ collected = 0;
pcf_index = PCF_INDEX();
p = &pcf[pcf_index];
p->pcf_touch = 1;
mutex_enter(&p->pcf_lock);
- if (p->pcf_count >= 1) {
- enough = 1;
- p->pcf_count--;
+ if (p->pcf_count >= npgs) {
+ collected = npgs;
+ p->pcf_count -= npgs;
}
mutex_exit(&p->pcf_lock);
+ need = npgs - collected;
- if (!enough) {
+ if (need > 0) {
VM_STAT_ADD(page_reclaim_zero);
/*
* Check again. Its possible that some other thread
@@ -3277,15 +3027,22 @@ page_reclaim(page_t *pp, kmutex_t *lock)
for (i = 0; i < PCF_FANOUT; i++) {
p->pcf_touch = 1;
mutex_enter(&p->pcf_lock);
- if (p->pcf_count >= 1) {
- p->pcf_count -= 1;
- enough = 1;
- break;
+ if (p->pcf_count) {
+ if (p->pcf_count >= need) {
+ p->pcf_count -= need;
+ collected += need;
+ need = 0;
+ break;
+ } else if (p->pcf_count) {
+ collected += p->pcf_count;
+ need -= p->pcf_count;
+ p->pcf_count = 0;
+ }
}
p++;
}
- if (!enough) {
+ if (need > 0) {
page_reclaim_nomem:
/*
* We really can't have page `pp'.
@@ -3309,6 +3066,7 @@ page_reclaim_nomem:
mutex_enter(&new_freemem_lock);
p = pcf;
+ p->pcf_count += collected;
for (i = 0; i < PCF_FANOUT; i++) {
p->pcf_wait++;
mutex_exit(&p->pcf_lock);
@@ -3328,11 +3086,13 @@ page_reclaim_nomem:
}
/*
- * There was a page to be found.
+ * We beat the PCF bins over the head until
+ * we got the memory that we wanted.
* The pcf accounting has been done,
* though none of the pcf_wait flags have been set,
* drop the locks and continue on.
*/
+ ASSERT(collected == npgs);
while (p >= pcf) {
mutex_exit(&p->pcf_lock);
p--;
@@ -3343,14 +3103,19 @@ page_reclaim_nomem:
* freemem is not protected by any lock. Thus, we cannot
* have any assertion containing freemem here.
*/
- freemem -= 1;
+ freemem -= npgs;
VM_STAT_ADD(pagecnt.pc_reclaim);
if (PP_ISAGED(pp)) {
- page_list_sub(pp, PG_FREE_LIST);
+ if (npgs > 1) {
+ page_list_sub_pages(pp, pp->p_szc);
+ } else {
+ page_list_sub(pp, PG_FREE_LIST);
+ }
TRACE_1(TR_FAC_VM, TR_PAGE_UNFREE_FREE,
"page_reclaim_free:pp %p", pp);
} else {
+ ASSERT(npgs == 1);
page_list_sub(pp, PG_CACHE_LIST);
TRACE_1(TR_FAC_VM, TR_PAGE_UNFREE_CACHE,
"page_reclaim_cache:pp %p", pp);
@@ -3363,9 +3128,11 @@ page_reclaim_nomem:
*
* Set the reference bit to protect against immediate pageout.
*/
- PP_CLRFREE(pp);
- PP_CLRAGED(pp);
- page_set_props(pp, P_REF);
+ for (i = 0; i < npgs; i++, pp = page_next(pp)) {
+ PP_CLRFREE(pp);
+ PP_CLRAGED(pp);
+ page_set_props(pp, P_REF);
+ }
CPU_STATS_ENTER_K();
cpup = CPU; /* get cpup now that CPU cannot change */
@@ -3441,7 +3208,6 @@ page_destroy_pages(page_t *pp)
pgcnt_t pgcnt = page_get_pagecnt(pp->p_szc);
pgcnt_t i, pglcks = 0;
uint_t szc = pp->p_szc;
- int toxic = 0;
ASSERT(pp->p_szc != 0 && pp->p_szc < page_num_pagesizes());
@@ -3471,9 +3237,6 @@ page_destroy_pages(page_t *pp)
ASSERT(tpp->p_vnode == NULL);
ASSERT(tpp->p_szc == szc);
- if (page_deteriorating(tpp))
- toxic = 1;
-
PP_SETFREE(tpp);
page_clr_all_props(tpp);
PP_SETAGED(tpp);
@@ -3489,10 +3252,6 @@ page_destroy_pages(page_t *pp)
mutex_exit(&freemem_lock);
}
- if (toxic) {
- page_free_toxic_pages(rootpp);
- return;
- }
page_list_add_pages(rootpp, 0);
page_create_putback(pgcnt);
}
@@ -3916,14 +3675,6 @@ page_hashout(page_t *pp, kmutex_t *phm)
mutex_exit(nphm);
/*
- * If the page was retired, update the pages_retired
- * total and clear the page flag
- */
- if (page_isretired(pp)) {
- retired_page_removed(pp);
- }
-
- /*
* Wake up processes waiting for this page. The page's
* identity has been changed, and is probably not the
* desired page any longer.
@@ -5397,6 +5148,63 @@ page_release(page_t *pp, int checkmod)
return (status);
}
+/*
+ * Given a constituent page, try to demote the large page on the freelist.
+ *
+ * Returns nonzero if the page could be demoted successfully. Returns with
+ * the constituent page still locked.
+ */
+int
+page_try_demote_free_pages(page_t *pp)
+{
+ page_t *rootpp = pp;
+ pfn_t pfn = page_pptonum(pp);
+ spgcnt_t npgs;
+ uint_t szc = pp->p_szc;
+
+ ASSERT(PP_ISFREE(pp));
+ ASSERT(PAGE_EXCL(pp));
+
+ /*
+ * Adjust rootpp and lock it, if `pp' is not the base
+ * constituent page.
+ */
+ npgs = page_get_pagecnt(pp->p_szc);
+ if (npgs == 1) {
+ return (0);
+ }
+
+ if (!IS_P2ALIGNED(pfn, npgs)) {
+ pfn = P2ALIGN(pfn, npgs);
+ rootpp = page_numtopp_nolock(pfn);
+ }
+
+ if (pp != rootpp && !page_trylock(rootpp, SE_EXCL)) {
+ return (0);
+ }
+
+ if (rootpp->p_szc != szc) {
+ if (pp != rootpp)
+ page_unlock(rootpp);
+ return (0);
+ }
+
+ page_demote_free_pages(rootpp);
+
+ if (pp != rootpp)
+ page_unlock(rootpp);
+
+ ASSERT(PP_ISFREE(pp));
+ ASSERT(PAGE_EXCL(pp));
+ return (1);
+}
+
+/*
+ * Given a constituent page, try to demote the large page.
+ *
+ * Returns nonzero if the page could be demoted successfully. Returns with
+ * the constituent page still locked.
+ */
int
page_try_demote_pages(page_t *pp)
{
@@ -5406,27 +5214,27 @@ page_try_demote_pages(page_t *pp)
uint_t szc = pp->p_szc;
vnode_t *vp = pp->p_vnode;
- ASSERT(PAGE_EXCL(rootpp));
+ ASSERT(PAGE_EXCL(pp));
VM_STAT_ADD(pagecnt.pc_try_demote_pages[0]);
- if (rootpp->p_szc == 0) {
+ if (pp->p_szc == 0) {
VM_STAT_ADD(pagecnt.pc_try_demote_pages[1]);
return (1);
}
if (vp != NULL && !IS_SWAPFSVP(vp) && vp != &kvp) {
VM_STAT_ADD(pagecnt.pc_try_demote_pages[2]);
- page_demote_vp_pages(rootpp);
+ page_demote_vp_pages(pp);
ASSERT(pp->p_szc == 0);
return (1);
}
/*
- * Adjust rootpp if passed in is not the base
+ * Adjust rootpp if passed in is not the base
* constituent page.
*/
- npgs = page_get_pagecnt(rootpp->p_szc);
+ npgs = page_get_pagecnt(pp->p_szc);
ASSERT(npgs > 1);
if (!IS_P2ALIGNED(pfn, npgs)) {
pfn = P2ALIGN(pfn, npgs);
@@ -5455,12 +5263,11 @@ page_try_demote_pages(page_t *pp)
break;
ASSERT(tpp->p_szc == rootpp->p_szc);
ASSERT(page_pptonum(tpp) == page_pptonum(rootpp) + i);
- (void) hat_pageunload(tpp, HAT_FORCE_PGUNLOAD);
}
/*
- * If we failed to lock them all then unlock what we have locked
- * so far and bail.
+ * If we failed to lock them all then unlock what we have
+ * locked so far and bail.
*/
if (i < npgs) {
tpp = rootpp;
@@ -5473,12 +5280,9 @@ page_try_demote_pages(page_t *pp)
return (0);
}
- /*
- * XXX probably p_szc clearing and page unlocking can be done within
- * one loop but since this is rare code we can play very safe.
- */
for (tpp = rootpp, i = 0; i < npgs; i++, tpp++) {
ASSERT(PAGE_EXCL(tpp));
+ (void) hat_pageunload(tpp, HAT_FORCE_PGUNLOAD);
tpp->p_szc = 0;
}
@@ -5490,6 +5294,7 @@ page_try_demote_pages(page_t *pp)
if (tpp != pp)
page_unlock(tpp);
}
+
VM_STAT_ADD(pagecnt.pc_try_demote_pages[5]);
return (1);
}
@@ -5579,221 +5384,6 @@ page_demote_vp_pages(page_t *pp)
}
/*
- * Page retire operation.
- *
- * page_retire()
- * Attempt to retire (throw away) page pp. We cannot do this if
- * the page is dirty; if the page is clean, we can try. We return 0 on
- * success, -1 on failure. This routine should be invoked by the platform's
- * memory error detection code.
- *
- * pages_retired_limit_exceeded()
- * We set a limit on the number of pages which may be retired. This
- * is set to a percentage of total physical memory. This limit is
- * enforced here.
- */
-
-static pgcnt_t retired_pgcnt = 0;
-
-/*
- * routines to update the count of retired pages
- */
-static void
-page_retired(page_t *pp)
-{
- ASSERT(pp);
-
- page_settoxic(pp, PAGE_IS_RETIRED);
- atomic_add_long(&retired_pgcnt, 1);
-}
-
-static void
-retired_page_removed(page_t *pp)
-{
- ASSERT(pp);
- ASSERT(page_isretired(pp));
- ASSERT(retired_pgcnt > 0);
-
- page_clrtoxic(pp);
- atomic_add_long(&retired_pgcnt, -1);
-}
-
-
-static int
-pages_retired_limit_exceeded()
-{
- pgcnt_t retired_max;
-
- /*
- * If the percentage is zero or is not set correctly,
- * return TRUE so that pages are not retired.
- */
- if (max_pages_retired_bps <= 0 ||
- max_pages_retired_bps >= 10000)
- return (1);
-
- /*
- * Calculate the maximum number of pages allowed to
- * be retired as a percentage of total physical memory
- * (Remember that we are using basis points, hence the 10000.)
- */
- retired_max = (physmem * max_pages_retired_bps) / 10000;
-
- /*
- * return 'TRUE' if we have already retired more
- * than the legal limit
- */
- return (retired_pgcnt >= retired_max);
-}
-
-#define PAGE_RETIRE_SELOCK 0
-#define PAGE_RETIRE_NORECLAIM 1
-#define PAGE_RETIRE_LOCKED 2
-#define PAGE_RETIRE_COW 3
-#define PAGE_RETIRE_DIRTY 4
-#define PAGE_RETIRE_LPAGE 5
-#define PAGE_RETIRE_SUCCESS 6
-#define PAGE_RETIRE_LIMIT 7
-#define PAGE_RETIRE_NCODES 8
-
-typedef struct page_retire_op {
- int pr_count;
- short pr_unlock;
- short pr_retval;
- char *pr_message;
-} page_retire_op_t;
-
-page_retire_op_t page_retire_ops[PAGE_RETIRE_NCODES] = {
- { 0, 0, -1, "cannot lock page" },
- { 0, 0, -1, "cannot reclaim cached page" },
- { 0, 1, -1, "page is locked" },
- { 0, 1, -1, "copy-on-write page" },
- { 0, 1, -1, "page is dirty" },
- { 0, 1, -1, "cannot demote large page" },
- { 0, 0, 0, "page successfully retired" },
- { 0, 0, -1, "excess pages retired already" },
-};
-
-static int
-page_retire_done(page_t *pp, int code)
-{
- page_retire_op_t *prop = &page_retire_ops[code];
-
- prop->pr_count++;
-
- if (prop->pr_unlock)
- page_unlock(pp);
-
- if (page_retire_messages > 1) {
- printf("page_retire(%p) pfn 0x%lx %s: %s\n",
- (void *)pp, page_pptonum(pp),
- prop->pr_retval == -1 ? "failed" : "succeeded",
- prop->pr_message);
- }
-
- return (prop->pr_retval);
-}
-
-int
-page_retire(page_t *pp, uchar_t flag)
-{
- uint64_t pa = ptob((uint64_t)page_pptonum(pp));
-
- ASSERT(flag == PAGE_IS_FAILING || flag == PAGE_IS_TOXIC);
-
- /*
- * DR operations change the association between a page_t
- * and the physical page it represents. Check if the
- * page is still bad.
- */
- if (!page_isfaulty(pp)) {
- page_clrtoxic(pp);
- return (page_retire_done(pp, PAGE_RETIRE_SUCCESS));
- }
-
- /*
- * We set the flag here so that even if we fail due
- * to exceeding the limit for retired pages, the
- * page will still be checked and either cleared
- * or retired in page_free().
- */
- page_settoxic(pp, flag);
-
- if (flag == PAGE_IS_TOXIC) {
- if (page_retire_messages) {
- cmn_err(CE_NOTE, "Scheduling clearing of error on"
- " page 0x%08x.%08x",
- (uint32_t)(pa >> 32), (uint32_t)pa);
- }
-
- } else { /* PAGE_IS_FAILING */
- if (pages_retired_limit_exceeded()) {
- /*
- * Return as we have already exceeded the
- * maximum number of pages allowed to be
- * retired
- */
- return (page_retire_done(pp, PAGE_RETIRE_LIMIT));
- }
-
- if (page_retire_messages) {
- cmn_err(CE_NOTE, "Scheduling removal of "
- "page 0x%08x.%08x",
- (uint32_t)(pa >> 32), (uint32_t)pa);
- }
- }
-
- if (PAGE_LOCKED(pp) || !page_trylock(pp, SE_EXCL))
- return (page_retire_done(pp, PAGE_RETIRE_SELOCK));
-
- /*
- * If this is a large page we first try and demote it
- * to PAGESIZE pages and then dispose of the toxic page.
- * On failure we will let the page free/destroy
- * code handle it later since this is a mapped page.
- * Note that free large pages can always be demoted.
- *
- */
- if (pp->p_szc != 0) {
- if (PP_ISFREE(pp))
- (void) page_demote_free_pages(pp);
- else
- (void) page_try_demote_pages(pp);
-
- if (pp->p_szc != 0)
- return (page_retire_done(pp, PAGE_RETIRE_LPAGE));
- }
-
- if (PP_ISFREE(pp)) {
- if (!page_reclaim(pp, NULL))
- return (page_retire_done(pp, PAGE_RETIRE_NORECLAIM));
- /*LINTED: constant in conditional context*/
- VN_DISPOSE(pp, pp->p_vnode ? B_INVAL : B_FREE, 0, kcred)
- return (page_retire_done(pp, PAGE_RETIRE_SUCCESS));
- }
-
- if (pp->p_lckcnt != 0)
- return (page_retire_done(pp, PAGE_RETIRE_LOCKED));
-
- if (pp->p_cowcnt != 0)
- return (page_retire_done(pp, PAGE_RETIRE_COW));
-
- /*
- * Unload all translations to this page. No new translations
- * can be created while we hold the exclusive lock on the page.
- */
- (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
-
- if (hat_ismod(pp))
- return (page_retire_done(pp, PAGE_RETIRE_DIRTY));
-
- /*LINTED: constant in conditional context*/
- VN_DISPOSE(pp, B_INVAL, 0, kcred);
-
- return (page_retire_done(pp, PAGE_RETIRE_SUCCESS));
-}
-
-/*
* Mark any existing pages for migration in the given range
*/
void
@@ -6128,140 +5718,6 @@ next:
}
}
-/*
- * initialize the vnode for retired pages
- */
-static void
-page_retired_init(void)
-{
- vn_setops(&retired_ppages, &retired_vnodeops);
-}
-
-/* ARGSUSED */
-static void
-retired_dispose(vnode_t *vp, page_t *pp, int flag, int dn, cred_t *cr)
-{
- panic("retired_dispose invoked");
-}
-
-/* ARGSUSED */
-static void
-retired_inactive(vnode_t *vp, cred_t *cr)
-{}
-
-void
-page_unretire_pages(void)
-{
- page_t *pp;
- kmutex_t *vphm;
- vnode_t *vp;
- page_t *rpages[UNRETIRE_PAGES];
- pgcnt_t i, npages, rmem;
- uint64_t pa;
-
- rmem = 0;
-
- for (;;) {
- /*
- * We do this in 2 steps:
- *
- * 1. We walk the retired pages list and collect a list of
- * pages that have the toxic field cleared.
- *
- * 2. We iterate through the page list and unretire each one.
- *
- * We have to do it in two steps on account of the mutexes that
- * we need to acquire.
- */
-
- vp = &retired_ppages;
- vphm = page_vnode_mutex(vp);
- mutex_enter(vphm);
-
- if ((pp = vp->v_pages) == NULL) {
- mutex_exit(vphm);
- break;
- }
-
- i = 0;
- do {
- ASSERT(pp != NULL);
- ASSERT(pp->p_vnode == vp);
-
- /*
- * DR operations change the association between a page_t
- * and the physical page it represents. Check if the
- * page is still bad. If not, unretire it.
- */
- if (!page_isfaulty(pp))
- rpages[i++] = pp;
-
- pp = pp->p_vpnext;
- } while ((pp != vp->v_pages) && (i < UNRETIRE_PAGES));
-
- mutex_exit(vphm);
-
- npages = i;
- for (i = 0; i < npages; i++) {
- pp = rpages[i];
- pa = ptob((uint64_t)page_pptonum(pp));
-
- /*
- * Need to upgrade the shared lock to an exclusive
- * lock in order to hash out the page.
- *
- * The page could have been retired but the page lock
- * may not have been downgraded yet. If so, skip this
- * page. page_free() will call this function after the
- * lock is downgraded.
- */
-
- if (!PAGE_SHARED(pp) || !page_tryupgrade(pp))
- continue;
-
- /*
- * Both page_free() and DR call this function. They
- * can potentially call this function at the same
- * time and race with each other.
- */
- if (!page_isretired(pp) || page_isfaulty(pp)) {
- page_downgrade(pp);
- continue;
- }
-
- cmn_err(CE_NOTE,
- "unretiring retired page 0x%08x.%08x",
- (uint32_t)(pa >> 32), (uint32_t)pa);
-
- /*
- * When a page is removed from the retired pages vnode,
- * its toxic field is also cleared. So, we do not have
- * to do that seperately here.
- */
- page_hashout(pp, (kmutex_t *)NULL);
-
- /*
- * This is a good page. So, free it.
- */
- pp->p_vnode = NULL;
- page_free(pp, 1);
- rmem++;
- }
-
- /*
- * If the rpages array was filled up, then there could be more
- * retired pages that are not faulty. We need to iterate
- * again and unretire them. Otherwise, we are done.
- */
- if (npages < UNRETIRE_PAGES)
- break;
- }
-
- mutex_enter(&freemem_lock);
- availrmem += rmem;
- mutex_exit(&freemem_lock);
-}
-
ulong_t mem_waiters = 0;
ulong_t max_count = 20;
#define MAX_DELAY 0x1ff
@@ -6621,90 +6077,48 @@ page_clr_all_props(page_t *pp)
}
/*
- * The following functions is called from free_vp_pages()
- * for an inexact estimate of a newly free'd page...
+ * Clear p_lckcnt and p_cowcnt, adjusting freemem if required.
*/
-ulong_t
-page_share_cnt(page_t *pp)
-{
- return (hat_page_getshare(pp));
-}
-
-/*
- * The following functions are used in handling memory
- * errors.
- */
-
-int
-page_istoxic(page_t *pp)
-{
- return ((pp->p_toxic & PAGE_IS_TOXIC) == PAGE_IS_TOXIC);
-}
-
-int
-page_isfailing(page_t *pp)
-{
- return ((pp->p_toxic & PAGE_IS_FAILING) == PAGE_IS_FAILING);
-}
-
-int
-page_isretired(page_t *pp)
-{
- return ((pp->p_toxic & PAGE_IS_RETIRED) == PAGE_IS_RETIRED);
-}
-
int
-page_deteriorating(page_t *pp)
+page_clear_lck_cow(page_t *pp, int adjust)
{
- return ((pp->p_toxic & (PAGE_IS_TOXIC | PAGE_IS_FAILING)) != 0);
-}
+ int f_amount;
-void
-page_settoxic(page_t *pp, uchar_t flag)
-{
- uchar_t new_flag = 0;
- while ((new_flag & flag) != flag) {
- uchar_t old_flag = pp->p_toxic;
- new_flag = old_flag | flag;
- (void) cas8(&pp->p_toxic, old_flag, new_flag);
- new_flag = ((volatile page_t *)pp)->p_toxic;
- }
-}
+ ASSERT(PAGE_EXCL(pp));
-void
-page_clrtoxic(page_t *pp)
-{
/*
- * We don't need to worry about atomicity on the
- * p_toxic flag here as this is only called from
- * page_free() while holding an exclusive lock on
- * the page
+ * The page_struct_lock need not be acquired here since
+ * we require the caller hold the page exclusively locked.
*/
- pp->p_toxic = PAGE_IS_OK;
-}
+ f_amount = 0;
+ if (pp->p_lckcnt) {
+ f_amount = 1;
+ pp->p_lckcnt = 0;
+ }
+ if (pp->p_cowcnt) {
+ f_amount += pp->p_cowcnt;
+ pp->p_cowcnt = 0;
+ }
-void
-page_clrtoxic_flag(page_t *pp, uchar_t flag)
-{
- uchar_t new_flag = ((volatile page_t *)pp)->p_toxic;
- while ((new_flag & flag) == flag) {
- uchar_t old_flag = new_flag;
- new_flag = old_flag & ~flag;
- (void) cas8(&pp->p_toxic, old_flag, new_flag);
- new_flag = ((volatile page_t *)pp)->p_toxic;
+ if (adjust && f_amount) {
+ mutex_enter(&freemem_lock);
+ availrmem += f_amount;
+ mutex_exit(&freemem_lock);
}
-}
-int
-page_isfaulty(page_t *pp)
-{
- return ((pp->p_toxic & PAGE_IS_FAULTY) == PAGE_IS_FAULTY);
+ return (f_amount);
}
/*
- * The following four functions are called from /proc code
- * for the /proc/<pid>/xmap interface.
+ * The following functions is called from free_vp_pages()
+ * for an inexact estimate of a newly free'd page...
*/
+ulong_t
+page_share_cnt(page_t *pp)
+{
+ return (hat_page_getshare(pp));
+}
+
int
page_isshared(page_t *pp)
{