summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorjmcp <James.McPherson@Sun.COM>2010-05-03 04:17:29 -0700
committerjmcp <James.McPherson@Sun.COM>2010-05-03 04:17:29 -0700
commitd94ffb286aba68edc813c6eda61754891db7f7a1 (patch)
tree13574e02d1ccafdd36362ac5aa2569093e5df40e /usr/src
parentea46d7619be99679c4c99ed47508abe31d5e0979 (diff)
downloadillumos-joyent-d94ffb286aba68edc813c6eda61754891db7f7a1.tar.gz
backout 6535949: needs more work
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/uts/common/os/clock.c10
-rw-r--r--usr/src/uts/common/os/kflt_mem_stubs.c63
-rw-r--r--usr/src/uts/common/sys/kflt_mem.h107
-rw-r--r--usr/src/uts/common/sys/mem_cage.h27
-rw-r--r--usr/src/uts/common/vm/page.h10
-rw-r--r--usr/src/uts/common/vm/vm_page.c141
-rw-r--r--usr/src/uts/common/vm/vm_pagelist.c918
-rw-r--r--usr/src/uts/i86pc/Makefile.files1
-rw-r--r--usr/src/uts/i86pc/os/startup.c16
-rw-r--r--usr/src/uts/i86pc/vm/kflt_mem.c990
-rw-r--r--usr/src/uts/i86pc/vm/vm_dep.h247
-rw-r--r--usr/src/uts/i86pc/vm/vm_machdep.c158
-rw-r--r--usr/src/uts/i86xpv/Makefile.files4
-rw-r--r--usr/src/uts/sun4/vm/vm_dep.c61
-rw-r--r--usr/src/uts/sun4/vm/vm_dep.h136
-rw-r--r--usr/src/uts/sun4u/Makefile.files4
-rw-r--r--usr/src/uts/sun4u/starfire/os/starfire.c10
-rw-r--r--usr/src/uts/sun4v/Makefile.files4
18 files changed, 417 insertions, 2490 deletions
diff --git a/usr/src/uts/common/os/clock.c b/usr/src/uts/common/os/clock.c
index 4f78fc586c..4783542d77 100644
--- a/usr/src/uts/common/os/clock.c
+++ b/usr/src/uts/common/os/clock.c
@@ -54,7 +54,6 @@
#include <sys/disp.h>
#include <sys/msacct.h>
#include <sys/mem_cage.h>
-#include <sys/kflt_mem.h>
#include <vm/page.h>
#include <vm/anon.h>
@@ -630,13 +629,8 @@ clock(void)
/*
* Wakeup the cageout thread waiters once per second.
*/
- if (one_sec) {
- if (kcage_on) {
- kcage_tick();
- } else if (kflt_on) {
- kflt_tick();
- }
- }
+ if (one_sec)
+ kcage_tick();
if (one_sec) {
diff --git a/usr/src/uts/common/os/kflt_mem_stubs.c b/usr/src/uts/common/os/kflt_mem_stubs.c
deleted file mode 100644
index 66905d1475..0000000000
--- a/usr/src/uts/common/os/kflt_mem_stubs.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2010, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-
-#include <sys/types.h>
-#include <sys/cmn_err.h>
-#include <sys/errno.h>
-#include <sys/debug.h>
-#include <vm/page.h>
-#include <sys/mem_config.h>
-#include <sys/kflt_mem.h>
-
-/* These should be in a platform stubs file. */
-
-int kflt_on;
-pgcnt_t kflt_freemem;
-pgcnt_t kflt_throttlefree;
-pgcnt_t kflt_minfree;
-pgcnt_t kflt_desfree;
-pgcnt_t kflt_needfree;
-pgcnt_t kflt_lotsfree;
-
-/*ARGSUSED*/
-int
-kflt_create_throttle(pgcnt_t npages, int flags)
-{
- return (0);
-}
-
-void
-kflt_init(void)
-{
-}
-
-void
-kflt_evict_wakeup(void)
-{
-}
-
-void
-kflt_tick(void)
-{
-}
diff --git a/usr/src/uts/common/sys/kflt_mem.h b/usr/src/uts/common/sys/kflt_mem.h
deleted file mode 100644
index 6ecff34ce7..0000000000
--- a/usr/src/uts/common/sys/kflt_mem.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2010, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-
-#ifndef _KFLT_MEM_H
-#define _KFLT_MEM_H
-
-#include <sys/types.h>
-#include <sys/memlist.h>
-
-/*
- * Kernel memory freelist interfaces.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef _KERNEL
-
-#define KFT_FAILURE 0
-#define KFT_CRIT 1
-#define KFT_NONCRIT 2
-
-#define KFLT_EXPAND_RETRIES 10
-#define KFLT_PAGESIZE 1
-
-extern pgcnt_t kflt_freemem;
-extern pgcnt_t kflt_desfree;
-extern pgcnt_t kflt_minfree;
-extern pgcnt_t kflt_lotsfree;
-extern pgcnt_t kflt_needfree;
-extern pgcnt_t kflt_user_alloc;
-extern pgcnt_t kflt_user_threshhold;
-extern pgcnt_t kflt_throttlefree;
-extern pgcnt_t kflt_reserve;
-extern kthread_id_t kflt_evict_thread;
-extern int kflt_on;
-
-extern void kflt_evict_wakeup(void);
-extern void kflt_freemem_add(pgcnt_t);
-extern void kflt_freemem_sub(pgcnt_t);
-extern int kflt_create_throttle(pgcnt_t, int);
-extern void kflt_expand(void);
-extern void kflt_init(void);
-extern void kflt_tick(void);
-#pragma weak kflt_expand
-
-#if defined(__amd64) && !defined(__xpv)
-/* Macros to throttle memory allocations from the kernel page freelist. */
-
-#define KERNEL_THROTTLE_NONCRIT(npages, flags) \
- (kflt_create_throttle(npages, flags) == KFT_NONCRIT)
-
-#define KERNEL_THROTTLE(npages, flags) \
- if (((flags) & PG_KFLT) && \
- (kflt_freemem < (kflt_throttlefree + (npages)))) { \
- (void) kflt_create_throttle(npages, flags); \
- }
-
-#define KERNEL_THROTTLE_PGCREATE(npages, flags, cond) \
- ((((flags) & (PG_KFLT|(cond)) == (PG_KFLT|(cond))) && \
- (kflt_freemem < (kflt_throttlefree + (npages))) && \
- (kflt_create_throttle(npages, flags) == KFT_FAILURE)) ? \
- 1 : 0)
-
-#define KERNEL_NOT_THROTTLED(flags) (!kflt_on || !(flags & PG_KFLT))
-
-#elif !defined(__sparc)
-
-#define KERNEL_THROTTLE_NONCRIT(npages, flags) 0
-
-#define KERNEL_THROTTLE(npages, flags)
-
-#define KERNEL_THROTTLE_PGCREATE(npages, flags, cond) 0
-
-#define KERNEL_NOT_THROTTLED(flags) 1
-
-#endif
-
-#endif /* _KERNEL */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _KFLT_MEM_H */
diff --git a/usr/src/uts/common/sys/mem_cage.h b/usr/src/uts/common/sys/mem_cage.h
index 84065c3e4f..8ad15d7acc 100644
--- a/usr/src/uts/common/sys/mem_cage.h
+++ b/usr/src/uts/common/sys/mem_cage.h
@@ -19,12 +19,15 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
*/
#ifndef _SYS_MEM_CAGE_H
#define _SYS_MEM_CAGE_H
+#pragma ident "%Z%%M% %I% %E% SMI"
+
#include <sys/types.h>
#include <sys/memlist.h>
@@ -82,28 +85,6 @@ extern int kcage_next_range(int incage,
extern kcage_dir_t kcage_startup_dir;
-#if defined(__sparc)
-/* Macros to throttle memory allocations from the kernel cage. */
-
-#define KERNEL_THROTTLE_NONCRIT(npages, flags) \
- (kcage_create_throttle(1, flags) == KCT_NONCRIT)
-
-#define KERNEL_THROTTLE(npages, flags) \
- if (((flags) & PG_NORELOC) && \
- (kcage_freemem < (kcage_throttlefree + (npages)))) { \
- (void) kcage_create_throttle(npages, flags); \
- }
-
-
-#define KERNEL_THROTTLE_PGCREATE(npages, flags, cond) \
- ((((flags) & (PG_NORELOC|(cond)) == (PG_NORELOC|(cond))) && \
- (kcage_freemem < (kcage_throttlefree + (npages))) && \
- (kcage_create_throttle(npages, flags) == KCT_FAILURE)) ? \
- 1 : 0)
-
-#define KERNEL_NOT_THROTTLED(flags) (!kcage_on || !((flags) & PG_NORELOC))
-#endif /* __sparc */
-
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h
index 166005d3cc..8299d4ca1b 100644
--- a/usr/src/uts/common/vm/page.h
+++ b/usr/src/uts/common/vm/page.h
@@ -659,8 +659,6 @@ extern pad_mutex_t ph_mutex[];
#define PG_LOCAL 0x0080 /* alloc from given lgrp only */
#define PG_NORMALPRI 0x0100 /* PG_WAIT like priority, but */
/* non-blocking */
-#define PG_KFLT 0x0200 /* alloc from kernel page freelist */
-
/*
* When p_selock has the SE_EWANTED bit set, threads waiting for SE_EXCL
* access are given priority over all other waiting threads.
@@ -942,8 +940,6 @@ int page_szc_user_filtered(size_t);
#define P_SWAP 0x10 /* belongs to vnode that is V_ISSWAP */
#define P_BOOTPAGES 0x08 /* member of bootpages list */
#define P_RAF 0x04 /* page retired at free */
-#define P_KFLT 0x02 /* allocated from kernel free list */
-#define P_USERKFLT 0x01 /* user pages from kernel free list */
#define PP_ISFREE(pp) ((pp)->p_state & P_FREE)
#define PP_ISAGED(pp) (((pp)->p_state & P_FREE) && \
@@ -955,8 +951,6 @@ int page_szc_user_filtered(size_t);
#define PP_ISSWAP(pp) ((pp)->p_state & P_SWAP)
#define PP_ISBOOTPAGES(pp) ((pp)->p_state & P_BOOTPAGES)
#define PP_ISRAF(pp) ((pp)->p_state & P_RAF)
-#define PP_ISKFLT(pp) ((pp)->p_state & P_KFLT)
-#define PP_ISUSERKFLT(pp) ((pp)->p_state & P_USERKFLT)
#define PP_SETFREE(pp) ((pp)->p_state = ((pp)->p_state & ~P_MIGRATE) \
| P_FREE)
@@ -966,8 +960,6 @@ int page_szc_user_filtered(size_t);
#define PP_SETSWAP(pp) ((pp)->p_state |= P_SWAP)
#define PP_SETBOOTPAGES(pp) ((pp)->p_state |= P_BOOTPAGES)
#define PP_SETRAF(pp) ((pp)->p_state |= P_RAF)
-#define PP_SETKFLT(pp) ((pp)->p_state |= P_KFLT)
-#define PP_SETUSERKFLT(pp) ((pp)->p_state |= P_USERKFLT)
#define PP_CLRFREE(pp) ((pp)->p_state &= ~P_FREE)
#define PP_CLRAGED(pp) ASSERT(!PP_ISAGED(pp))
@@ -976,8 +968,6 @@ int page_szc_user_filtered(size_t);
#define PP_CLRSWAP(pp) ((pp)->p_state &= ~P_SWAP)
#define PP_CLRBOOTPAGES(pp) ((pp)->p_state &= ~P_BOOTPAGES)
#define PP_CLRRAF(pp) ((pp)->p_state &= ~P_RAF)
-#define PP_CLRKFLT(pp) ((pp)->p_state &= ~P_KFLT)
-#define PP_CLRUSERKFLT(pp) ((pp)->p_state &= ~P_USERKFLT)
/*
* Flags for page_t p_toxic, for tracking memory hardware errors.
diff --git a/usr/src/uts/common/vm/vm_page.c b/usr/src/uts/common/vm/vm_page.c
index eec36f84ee..169b9c84e7 100644
--- a/usr/src/uts/common/vm/vm_page.c
+++ b/usr/src/uts/common/vm/vm_page.c
@@ -59,7 +59,6 @@
#include <sys/condvar_impl.h>
#include <sys/mem_config.h>
#include <sys/mem_cage.h>
-#include <sys/kflt_mem.h>
#include <sys/kmem.h>
#include <sys/atomic.h>
#include <sys/strlog.h>
@@ -130,7 +129,7 @@ static kcondvar_t freemem_cv;
* The free list contains those pages that should be reused first.
*
* The implementation of the lists is machine dependent.
- * PAGE_GET_FREELISTS(), page_get_cachelist(),
+ * page_get_freelist(), page_get_cachelist(),
* page_list_sub(), and page_list_add()
* form the interface to the machine dependent implementation.
*
@@ -508,13 +507,10 @@ static kphysm_setup_vector_t page_mem_config_vec = {
static void
page_init_mem_config(void)
{
-#ifdef DEBUG
- ASSERT(kphysm_setup_func_register(&page_mem_config_vec,
- (void *)NULL) == 0);
-#else /* !DEBUG */
- (void) kphysm_setup_func_register(&page_mem_config_vec, (void *)NULL);
-#endif /* !DEBUG */
+ int ret;
+ ret = kphysm_setup_func_register(&page_mem_config_vec, (void *)NULL);
+ ASSERT(ret == 0);
}
/*
@@ -1559,9 +1555,9 @@ page_create_wait(pgcnt_t npages, uint_t flags)
ASSERT(!kcage_on ? !(flags & PG_NORELOC) : 1);
checkagain:
-
- /* Throttle kernel memory allocations if necessary */
- KERNEL_THROTTLE(npages, flags);
+ if ((flags & PG_NORELOC) &&
+ kcage_freemem < kcage_throttlefree + npages)
+ (void) kcage_create_throttle(npages, flags);
if (freemem < npages + throttlefree)
if (!page_create_throttle(npages, flags))
@@ -1786,7 +1782,7 @@ page_create_get_something(vnode_t *vp, u_offset_t off, struct seg *seg,
*/
flags |= PG_PANIC;
- if ((flags & (PG_NORELOC|PG_KFLT)) != 0) {
+ if ((flags & PG_NORELOC) != 0) {
VM_STAT_ADD(pcgs_entered_noreloc);
/*
* Requests for free pages from critical threads
@@ -1805,8 +1801,7 @@ page_create_get_something(vnode_t *vp, u_offset_t off, struct seg *seg,
* kcage_freemem won't fall below minfree prior to grabbing
* pages from the freelists.
*/
- /* LINTED */
- if (KERNEL_THROTTLE_NONCRIT(1, flags)) {
+ if (kcage_create_throttle(1, flags) == KCT_NONCRIT) {
mutex_enter(&pcgs_cagelock);
cagelocked = 1;
VM_STAT_ADD(pcgs_cagelocked);
@@ -1863,8 +1858,8 @@ page_create_get_something(vnode_t *vp, u_offset_t off, struct seg *seg,
lgrp = lgrp_mem_choose(seg, vaddr, PAGESIZE);
- for (count = 0; kcage_on || kflt_on || count < MAX_PCGS; count++) {
- PAGE_GET_FREELISTS(pp, vp, off, seg, vaddr, PAGESIZE,
+ for (count = 0; kcage_on || count < MAX_PCGS; count++) {
+ pp = page_get_freelist(vp, off, seg, vaddr, PAGESIZE,
flags, lgrp);
if (pp == NULL) {
pp = page_get_cachelist(vp, off, seg, vaddr,
@@ -1874,7 +1869,7 @@ page_create_get_something(vnode_t *vp, u_offset_t off, struct seg *seg,
/*
* Serialize. Don't fight with other pcgs().
*/
- if (!locked && KERNEL_NOT_THROTTLED(flags)) {
+ if (!locked && (!kcage_on || !(flags & PG_NORELOC))) {
mutex_enter(&pcgs_lock);
VM_STAT_ADD(pcgs_locked);
locked = 1;
@@ -2053,16 +2048,14 @@ page_alloc_pages(struct vnode *vp, struct seg *seg, caddr_t addr,
while (npgs && szc) {
lgrp = lgrp_mem_choose(seg, addr, pgsz);
if (pgflags == PG_LOCAL) {
- PAGE_GET_FREELISTS(pp, vp, 0, seg, addr, pgsz,
+ pp = page_get_freelist(vp, 0, seg, addr, pgsz,
pgflags, lgrp);
if (pp == NULL) {
- /* LINTED */
- PAGE_GET_FREELISTS(pp, vp, 0, seg, addr, pgsz,
+ pp = page_get_freelist(vp, 0, seg, addr, pgsz,
0, lgrp);
}
} else {
- /* LINTED */
- PAGE_GET_FREELISTS(pp, vp, 0, seg, addr, pgsz,
+ pp = page_get_freelist(vp, 0, seg, addr, pgsz,
0, lgrp);
}
if (pp != NULL) {
@@ -2163,24 +2156,10 @@ page_create_va_large(vnode_t *vp, u_offset_t off, size_t bytes, uint_t flags,
npages = btop(bytes);
- if (kflt_on && ((flags & PG_NORELOC) || VN_ISKAS(vp)) &&
- !panicstr) {
- /*
- * If the kernel freelist is active, and this is a
- * kernel page or one that is non-relocatable because it
- * is locked then set the PG_KFLT flag so that this page
- * will be allocated from the kernel freelist and therefore
- * will not fragment memory
- */
- flags |= PG_KFLT;
- }
-
if (!kcage_on || panicstr) {
/*
- * If the cage is off, we turn off the PG_NORELOC flag
- * however if the kernel freelist is active we will use
- * this to prevent memory fragmentation instead.
- * In panic do not use the cage or the kernel freelist.
+ * Cage is OFF, or we are single threaded in
+ * panic, so make everything a RELOC request.
*/
flags &= ~PG_NORELOC;
}
@@ -2195,13 +2174,22 @@ page_create_va_large(vnode_t *vp, u_offset_t off, size_t bytes, uint_t flags,
}
/*
- * If cage or kernel freelist is on, dampen draw from cage when
- * available cage space is low.
+ * If cage is on, dampen draw from cage when available
+ * cage space is low.
*/
- /* LINTED */
- if (KERNEL_THROTTLE_PGCREATE(npages, flags, PG_WAIT)) {
- VM_STAT_ADD(page_create_large_cnt[2]);
- return (NULL);
+ if ((flags & (PG_NORELOC | PG_WAIT)) == (PG_NORELOC | PG_WAIT) &&
+ kcage_freemem < kcage_throttlefree + npages) {
+
+ /*
+ * The cage is on, the caller wants PG_NORELOC
+ * pages and available cage memory is very low.
+ * Call kcage_create_throttle() to attempt to
+ * control demand on the cage.
+ */
+ if (kcage_create_throttle(npages, flags) == KCT_FAILURE) {
+ VM_STAT_ADD(page_create_large_cnt[2]);
+ return (NULL);
+ }
}
if (!pcf_decrement_bucket(npages) &&
@@ -2222,9 +2210,8 @@ page_create_va_large(vnode_t *vp, u_offset_t off, size_t bytes, uint_t flags,
else
lgrp = lgrp_mem_choose(seg, vaddr, bytes);
- PAGE_GET_FREELISTS(rootpp, &kvp, off, seg, vaddr,
- bytes, flags & ~PG_MATCH_COLOR, lgrp);
- if (rootpp == NULL) {
+ if ((rootpp = page_get_freelist(&kvp, off, seg, vaddr,
+ bytes, flags & ~PG_MATCH_COLOR, lgrp)) == NULL) {
page_create_putback(npages);
VM_STAT_ADD(page_create_large_cnt[5]);
return (NULL);
@@ -2322,41 +2309,33 @@ page_create_va(vnode_t *vp, u_offset_t off, size_t bytes, uint_t flags,
}
}
- if (kflt_on && ((flags & PG_NORELOC) || VN_ISKAS(vp)) &&
- !panicstr) {
- /*
- * If the kernel freelist is active, and this is a
- * kernel page or one that is non-relocatable because it
- * is locked then set the PG_KFLT flag so that this page
- * will be allocated from the kernel freelist and therefore
- * will not fragment memory
- */
- flags |= PG_KFLT;
- }
-
if (!kcage_on || panicstr) {
/*
- * If the cage is off, we turn off the PG_NORELOC flag
- * however if the kernel freelist is active we will use
- * this to prevent memory fragmentation instead.
- * In panic do not use the cage or the kernel freelist.
+ * Cage is OFF, or we are single threaded in
+ * panic, so make everything a RELOC request.
*/
flags &= ~PG_NORELOC;
}
- if ((freemem <= throttlefree + npages) &&
- (!page_create_throttle(npages, flags))) {
+ if (freemem <= throttlefree + npages)
+ if (!page_create_throttle(npages, flags))
return (NULL);
- }
/*
- * If cage or kernel freelist is on, dampen draw from cage when
- * available cage space is low.
+ * If cage is on, dampen draw from cage when available
+ * cage space is low.
*/
+ if ((flags & PG_NORELOC) &&
+ kcage_freemem < kcage_throttlefree + npages) {
- /* LINTED */
- if (KERNEL_THROTTLE_PGCREATE(npages, flags, 0)) {
- return (NULL);
+ /*
+ * The cage is on, the caller wants PG_NORELOC
+ * pages and available cage memory is very low.
+ * Call kcage_create_throttle() to attempt to
+ * control demand on the cage.
+ */
+ if (kcage_create_throttle(npages, flags) == KCT_FAILURE)
+ return (NULL);
}
VM_STAT_ADD(page_create_cnt[0]);
@@ -2431,7 +2410,7 @@ top:
* the physical memory
*/
lgrp = lgrp_mem_choose(seg, vaddr, PAGESIZE);
- PAGE_GET_FREELISTS(npp, vp, off, seg, vaddr, PAGESIZE,
+ npp = page_get_freelist(vp, off, seg, vaddr, PAGESIZE,
flags | PG_MATCH_COLOR, lgrp);
if (npp == NULL) {
npp = page_get_cachelist(vp, off, seg,
@@ -2578,6 +2557,7 @@ fail:
npp->p_offset = (u_offset_t)-1;
page_list_add(npp, PG_FREE_LIST | PG_LIST_TAIL);
page_unlock(npp);
+
}
ASSERT(pages_req >= found_on_free);
@@ -2626,9 +2606,7 @@ page_free_toxic_pages(page_t *rootpp)
{
page_t *tpp;
pgcnt_t i, pgcnt = page_get_pagecnt(rootpp->p_szc);
-#ifdef DEBUG
uint_t szc = rootpp->p_szc;
-#endif
for (i = 0, tpp = rootpp; i < pgcnt; i++, tpp = tpp->p_next) {
ASSERT(tpp->p_szc == szc);
@@ -2802,9 +2780,7 @@ page_free_pages(page_t *pp)
page_t *tpp, *rootpp = NULL;
pgcnt_t pgcnt = page_get_pagecnt(pp->p_szc);
pgcnt_t i;
-#ifdef DEBUG
uint_t szc = pp->p_szc;
-#endif
VM_STAT_ADD(pagecnt.pc_free_pages);
TRACE_1(TR_FAC_VM, TR_PAGE_FREE_FREE,
@@ -3148,9 +3124,7 @@ page_destroy_pages(page_t *pp)
page_t *tpp, *rootpp = NULL;
pgcnt_t pgcnt = page_get_pagecnt(pp->p_szc);
pgcnt_t i, pglcks = 0;
-#ifdef DEBUG
uint_t szc = pp->p_szc;
-#endif
ASSERT(pp->p_szc != 0 && pp->p_szc < page_num_pagesizes());
@@ -3277,9 +3251,7 @@ page_rename(page_t *opp, vnode_t *vp, u_offset_t off)
* large pages left lying around.
*/
if (opp->p_szc != 0) {
-#ifdef DEBUG
vnode_t *ovp = opp->p_vnode;
-#endif
ASSERT(ovp != NULL);
ASSERT(!IS_SWAPFSVP(ovp));
ASSERT(!VN_ISKAS(ovp));
@@ -3517,11 +3489,8 @@ page_hashin(page_t *pp, vnode_t *vp, u_offset_t offset, kmutex_t *hold)
mutex_exit(vphm);
if (hold == NULL)
mutex_exit(phm);
-#ifdef VM_STATS
- if (rc == 0) {
+ if (rc == 0)
VM_STAT_ADD(hashin_already);
- }
-#endif
return (rc);
}
@@ -5229,10 +5198,8 @@ page_try_demote_pages(page_t *pp)
page_t *tpp, *rootpp = pp;
pfn_t pfn = page_pptonum(pp);
spgcnt_t i, npgs;
- vnode_t *vp = pp->p_vnode;
-#ifdef DEBUG
uint_t szc = pp->p_szc;
-#endif
+ vnode_t *vp = pp->p_vnode;
ASSERT(PAGE_EXCL(pp));
diff --git a/usr/src/uts/common/vm/vm_pagelist.c b/usr/src/uts/common/vm/vm_pagelist.c
index 54f2681e00..eda3552c03 100644
--- a/usr/src/uts/common/vm/vm_pagelist.c
+++ b/usr/src/uts/common/vm/vm_pagelist.c
@@ -57,7 +57,6 @@
#include <sys/mem_config.h>
#include <sys/callb.h>
#include <sys/mem_cage.h>
-#include <sys/kflt_mem.h>
#include <sys/sdt.h>
#include <sys/dumphdr.h>
#include <sys/swap.h>
@@ -122,18 +121,9 @@ int pgcplimitsearch = 1;
if (++pgcpfailcnt[szc] >= PGCPFAILMAX) \
pgcpfailcnt[szc] = PGCPFAILMAX / 2;
-/*
- * There are two page freelist types that are supported, flt_user, the user
- * page freelist type and flt_kern, the kernel page freelist type.
- */
-
-page_freelist_type_t flt_user;
-page_freelist_type_t flt_kern;
-page_freelist_type_t *ufltp = &flt_user;
-page_freelist_type_t *kfltp = &flt_kern;
-
#ifdef VM_STATS
struct vmm_vmstats_str vmm_vmstats;
+
#endif /* VM_STATS */
#if defined(__sparc)
@@ -245,9 +235,6 @@ page_t *page_demote(int, pfn_t, pfn_t, uchar_t, uchar_t, int, int);
page_t *page_freelist_split(uchar_t,
uint_t, int, int, pfn_t, pfn_t, page_list_walker_t *);
page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int);
-static page_t *page_get_flist(page_freelist_type_t *, uint_t, int,
- uchar_t, uint_t, struct lgrp *);
-
static int page_trylock_cons(page_t *pp, se_t se);
/*
@@ -365,6 +352,7 @@ static int mnode_maxmrange[MAX_MEM_NODES];
*/
krwlock_t page_ctrs_rwlock[MAX_MEM_NODES];
+
/*
* initialize cpu_vm_data to point at cache aligned vm_cpu_data_t.
*/
@@ -1422,8 +1410,7 @@ page_list_add(page_t *pp, int flags)
* threaded), add a page to the free list and add to the
* the free region counters w/o any locking
*/
- ASSERT(!PP_ISKFLT(pp));
- ppp = PAGE_FREELISTP(PFLT_USER, mnode, 0, bin, mtype);
+ ppp = &PAGE_FREELISTS(mnode, 0, bin, mtype);
/* inline version of page_add() */
if (*ppp != NULL) {
@@ -1437,13 +1424,13 @@ page_list_add(page_t *pp, int flags)
page_ctr_add_internal(mnode, mtype, pp, flags);
VM_STAT_ADD(vmm_vmstats.pladd_free[0]);
} else {
- pcm = PC_BIN_MUTEX(PP_ISKFLT(pp), mnode, bin, flags);
+ pcm = PC_BIN_MUTEX(mnode, bin, flags);
if (flags & PG_FREE_LIST) {
VM_STAT_ADD(vmm_vmstats.pladd_free[0]);
ASSERT(PP_ISAGED(pp));
- ppp = PAGE_FREELISTP(PP_ISKFLT(pp), mnode, 0,
- bin, mtype);
+ ppp = &PAGE_FREELISTS(mnode, 0, bin, mtype);
+
} else {
VM_STAT_ADD(vmm_vmstats.pladd_cache);
ASSERT(pp->p_vnode);
@@ -1468,16 +1455,7 @@ page_list_add(page_t *pp, int flags)
if (PP_ISNORELOC(pp)) {
kcage_freemem_add(1);
}
-#elif defined(__amd64) && !defined(__xpv)
- if (PP_ISKFLT(pp)) {
- kflt_freemem_add(1);
- if (PP_ISUSERKFLT(pp)) {
- ASSERT(kflt_user_alloc > 0);
- atomic_add_long(&kflt_user_alloc, -1);
- PP_CLRUSERKFLT(pp);
- }
- }
-#endif /* __sparc */
+#endif
/*
* It is up to the caller to unlock the page!
*/
@@ -1517,8 +1495,7 @@ page_list_noreloc_startup(page_t *pp)
ASSERT(pp->p_szc == 0);
if (PP_ISAGED(pp)) {
- ASSERT(!PP_ISKFLT(pp));
- ppp = PAGE_FREELISTP(PFLT_USER, mnode, 0, bin, mtype);
+ ppp = &PAGE_FREELISTS(mnode, 0, bin, mtype);
flags |= PG_FREE_LIST;
} else {
ppp = &PAGE_CACHELISTS(mnode, bin, mtype);
@@ -1556,8 +1533,7 @@ page_list_noreloc_startup(page_t *pp)
* Get new list for page.
*/
if (PP_ISAGED(pp)) {
- ASSERT(!PP_ISKFLT(pp));
- ppp = PAGE_FREELISTP(PFLT_USER, mnode, 0, bin, mtype);
+ ppp = &PAGE_FREELISTS(mnode, 0, bin, mtype);
} else {
ppp = &PAGE_CACHELISTS(mnode, bin, mtype);
}
@@ -1615,31 +1591,25 @@ page_list_add_pages(page_t *pp, int flags)
if (flags & PG_LIST_ISINIT) {
ASSERT(pp->p_szc == mmu_page_sizes - 1);
- page_vpadd(PAGE_FREELISTP(PFLT_USER, mnode, pp->p_szc,
- bin, mtype), pp);
+ page_vpadd(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp);
ASSERT(!PP_ISNORELOC(pp));
PLCNT_INCR(pp, mnode, mtype, pp->p_szc, flags);
} else {
ASSERT(pp->p_szc != 0 && pp->p_szc < mmu_page_sizes);
- pcm = PC_BIN_MUTEX(PFLT_USER, mnode, bin, PG_FREE_LIST);
+ pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST);
mutex_enter(pcm);
- ASSERT(!PP_ISKFLT(pp));
- page_vpadd(PAGE_FREELISTP(PFLT_USER, mnode, pp->p_szc,
- bin, mtype), pp);
+ page_vpadd(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp);
page_ctr_add(mnode, mtype, pp, PG_FREE_LIST);
mutex_exit(pcm);
pgcnt = page_get_pagecnt(pp->p_szc);
#if defined(__sparc)
- if (PP_ISNORELOC(pp)) {
+ if (PP_ISNORELOC(pp))
kcage_freemem_add(pgcnt);
- }
-#elif defined(__amd64) && !defined(__xpv)
- ASSERT(!PP_ISKFLT(pp));
-#endif /* __sparc */
+#endif
for (i = 0; i < pgcnt; i++, pp++)
page_unlock_nocapture(pp);
}
@@ -1697,7 +1667,7 @@ page_list_sub(page_t *pp, int flags)
try_again:
bin = PP_2_BIN(pp);
mnode = PP_2_MEM_NODE(pp);
- pcm = PC_BIN_MUTEX(PP_ISKFLT(pp), mnode, bin, flags);
+ pcm = PC_BIN_MUTEX(mnode, bin, flags);
mutex_enter(pcm);
if (PP_2_BIN(pp) != bin) {
mutex_exit(pcm);
@@ -1708,8 +1678,7 @@ try_again:
if (flags & PG_FREE_LIST) {
VM_STAT_ADD(vmm_vmstats.plsub_free[0]);
ASSERT(PP_ISAGED(pp));
- ppp = PAGE_FREELISTP(PP_ISKFLT(pp), mnode, pp->p_szc,
- bin, mtype);
+ ppp = &PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype);
} else {
VM_STAT_ADD(vmm_vmstats.plsub_cache);
ASSERT(!PP_ISAGED(pp));
@@ -1736,11 +1705,7 @@ try_again:
if (PP_ISNORELOC(pp)) {
kcage_freemem_sub(1);
}
-#elif defined(__amd64) && !defined(__xpv)
- if (PP_ISKFLT(pp)) {
- kflt_freemem_sub(1);
- }
-#endif /* __sparc */
+#endif
return;
}
@@ -1775,16 +1740,14 @@ try_again:
ASSERT(PP_ISAGED(pp));
ASSERT(pp->p_szc == 0);
- /* Large pages on the kernel freelist are not supported. */
- ASSERT(!PP_ISKFLT(pp));
-
/*
* Subtract counters before releasing pcm mutex
* to avoid race with page_freelist_coalesce.
*/
bin = PP_2_BIN(pp);
mtype = PP_2_MTYPE(pp);
- ppp = PAGE_FREELISTP(PFLT_USER, mnode, pp->p_szc, bin, mtype);
+ ppp = &PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype);
+
page_sub(ppp, pp);
page_ctr_sub(mnode, mtype, pp, flags);
page_freelist_unlock(mnode);
@@ -1793,7 +1756,7 @@ try_again:
if (PP_ISNORELOC(pp)) {
kcage_freemem_sub(1);
}
-#endif /* __sparc */
+#endif
}
void
@@ -1813,7 +1776,7 @@ page_list_sub_pages(page_t *pp, uint_t szc)
try_again:
bin = PP_2_BIN(pp);
mnode = PP_2_MEM_NODE(pp);
- pcm = PC_BIN_MUTEX(PP_ISKFLT(pp), mnode, bin, PG_FREE_LIST);
+ pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST);
mutex_enter(pcm);
if (PP_2_BIN(pp) != bin) {
mutex_exit(pcm);
@@ -1842,19 +1805,16 @@ try_again:
ASSERT(PP_ISAGED(pp));
ASSERT(pp->p_szc <= szc);
ASSERT(pp == PP_PAGEROOT(pp));
- ASSERT(!PP_ISKFLT(pp));
VM_STAT_ADD(vmm_vmstats.plsub_free[pp->p_szc]);
mtype = PP_2_MTYPE(pp);
if (pp->p_szc != 0) {
- page_vpsub(PAGE_FREELISTP(PFLT_USER, mnode, pp->p_szc,
- bin, mtype), pp);
+ page_vpsub(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp);
CHK_LPG(pp, pp->p_szc);
} else {
VM_STAT_ADD(vmm_vmstats.plsubpages_szc0);
- page_sub(PAGE_FREELISTP(PFLT_USER, mnode, pp->p_szc,
- bin, mtype), pp);
+ page_sub(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp);
}
page_ctr_sub(mnode, mtype, pp, PG_FREE_LIST);
@@ -1871,7 +1831,7 @@ try_again:
pgcnt = page_get_pagecnt(pp->p_szc);
kcage_freemem_sub(pgcnt);
}
-#endif /* __sparc */
+#endif
}
/*
@@ -1945,7 +1905,7 @@ page_promote_size(page_t *pp, uint_t cur_szc)
static uint_t page_promote_err;
static uint_t page_promote_noreloc_err;
-static uint_t page_promote_kflt_err;
+
/*
* Create a single larger page (of szc new_szc) from smaller contiguous pages
* for the given mnode starting at pfnum. Pages involved are on the freelist
@@ -1957,9 +1917,6 @@ static uint_t page_promote_kflt_err;
* caller and put the large page on the freelist instead.
* If flags is PC_FREE, then the large page will be placed on the freelist,
* and NULL will be returned.
- * If the PC_KFLT_EXPORT flag is set, the large page will be returned to the
- * caller unlocked, as the caller is going to put it on the user page
- * freelist
* The caller is responsible for locking the freelist as well as any other
* accounting which needs to be done for a returned page.
*
@@ -2047,17 +2004,6 @@ page_promote(int mnode, pfn_t pfnum, uchar_t new_szc, int flags, int mtype)
page_promote_err++;
return (NULL);
}
-
- /*
- * page promote() can only legitimately be called for
- * pages from the kernel freelist from the kflt_export()
- * routine which sets the PC_KFLT_EXPORT flag.
- */
- if (PP_ISKFLT(pp) && !(flags & PC_KFLT_EXPORT)) {
- page_promote_kflt_err++;
- page_promote_err++;
- return (NULL);
- }
}
pages_left = new_npgs;
@@ -2079,13 +2025,11 @@ page_promote(int mnode, pfn_t pfnum, uchar_t new_szc, int flags, int mtype)
* PG_FREE_LIST
*/
if (pp->p_szc) {
- page_vpsub(PAGE_FREELISTP(PFLT_USER, mnode,
+ page_vpsub(&PAGE_FREELISTS(mnode,
pp->p_szc, bin, mtype), pp);
} else {
- ASSERT(!PP_ISKFLT(pp) ||
- (flags & PC_KFLT_EXPORT));
- mach_page_sub(PAGE_FREELISTP(PP_ISKFLT(pp),
- mnode, 0, bin, mtype), pp);
+ mach_page_sub(&PAGE_FREELISTS(mnode, 0,
+ bin, mtype), pp);
}
which_list = PG_FREE_LIST;
} else {
@@ -2148,16 +2092,7 @@ page_promote(int mnode, pfn_t pfnum, uchar_t new_szc, int flags, int mtype)
* return the page to the user if requested
* in the properly locked state.
*/
- if ((flags & PC_ALLOC) && (page_trylock_cons(pplist, SE_EXCL))) {
- return (pplist);
- }
-
- /*
- * If the PC_KFLT_EXPORT flag is set, kflt_export() is just going to
- * return this large page to the user page freelist, so there is no
- * need to lock it.
- */
- if (flags & PC_KFLT_EXPORT) {
+ if (flags == PC_ALLOC && (page_trylock_cons(pplist, SE_EXCL))) {
return (pplist);
}
@@ -2167,8 +2102,7 @@ page_promote(int mnode, pfn_t pfnum, uchar_t new_szc, int flags, int mtype)
bin = PP_2_BIN(pplist);
mnode = PP_2_MEM_NODE(pplist);
mtype = PP_2_MTYPE(pplist);
- page_vpadd(PAGE_FREELISTP(PFLT_USER, mnode, new_szc,
- bin, mtype), pplist);
+ page_vpadd(&PAGE_FREELISTS(mnode, new_szc, bin, mtype), pplist);
page_ctr_add(mnode, mtype, pplist, PG_FREE_LIST);
return (NULL);
@@ -2189,9 +2123,7 @@ fail_promote:
pp->p_szc = 0;
bin = PP_2_BIN(pp);
mtype = PP_2_MTYPE(pp);
- ASSERT(!PP_ISKFLT(pp));
- mach_page_add(PAGE_FREELISTP(PFLT_USER, mnode,
- 0, bin, mtype), pp);
+ mach_page_add(&PAGE_FREELISTS(mnode, 0, bin, mtype), pp);
page_ctr_add(mnode, mtype, pp, PG_FREE_LIST);
}
return (NULL);
@@ -2227,13 +2159,11 @@ page_demote(int mnode, pfn_t pfnum, pfn_t pfnmax, uchar_t cur_szc,
ASSERT(pplist != NULL);
ASSERT(pplist->p_szc == cur_szc);
- ASSERT(!PP_ISKFLT(pplist));
bin = PP_2_BIN(pplist);
ASSERT(mnode == PP_2_MEM_NODE(pplist));
mtype = PP_2_MTYPE(pplist);
- page_vpsub(PAGE_FREELISTP(PFLT_USER, mnode, cur_szc,
- bin, mtype), pplist);
+ page_vpsub(&PAGE_FREELISTS(mnode, cur_szc, bin, mtype), pplist);
CHK_LPG(pplist, cur_szc);
page_ctr_sub(mnode, mtype, pplist, PG_FREE_LIST);
@@ -2266,9 +2196,8 @@ page_demote(int mnode, pfn_t pfnum, pfn_t pfnmax, uchar_t cur_szc,
ret_pp = pp;
} else {
mtype = PP_2_MTYPE(pp);
- ASSERT(!PP_ISKFLT(pp));
- mach_page_add(PAGE_FREELISTP(PFLT_USER, mnode,
- 0, bin, mtype), pp);
+ mach_page_add(&PAGE_FREELISTS(mnode, 0, bin,
+ mtype), pp);
page_ctr_add(mnode, mtype, pp, PG_FREE_LIST);
}
} else {
@@ -2313,8 +2242,8 @@ page_demote(int mnode, pfn_t pfnum, pfn_t pfnmax, uchar_t cur_szc,
ret_pp = try_to_return_this_page;
} else {
mtype = PP_2_MTYPE(pp);
- page_vpadd(PAGE_FREELISTP(PFLT_USER, mnode,
- new_szc, bin, mtype), pplist);
+ page_vpadd(&PAGE_FREELISTS(mnode, new_szc,
+ bin, mtype), pplist);
page_ctr_add(mnode, mtype, pplist,
PG_FREE_LIST);
@@ -2348,6 +2277,7 @@ page_freelist_coalesce(int mnode, uchar_t szc, uint_t color, uint_t ceq_mask,
#if defined(__sparc)
pfn_t pfnum0, nlo, nhi;
#endif
+
if (mpss_coalesce_disable) {
ASSERT(szc < MMU_PAGE_SIZES);
VM_STAT_ADD(vmm_vmstats.page_ctrs_coalesce[szc][0]);
@@ -2505,21 +2435,11 @@ page_freelist_coalesce(int mnode, uchar_t szc, uint_t color, uint_t ceq_mask,
npgs = page_get_pagecnt(ret_pp->p_szc);
kcage_freemem_sub(npgs);
}
-#elif defined(__amd64) && !defined(__xpv)
- /*
- * Only a single page size is supported on
- * the kernel freelist. This will need to
- * be changed to increase the availability
- * of more than one large page size.
- */
- ASSERT(!PP_ISKFLT(ret_pp));
-#endif /* __sparc */
+#endif
return (ret_pp);
}
-#ifdef VM_STATS
} else {
VM_STAT_ADD(vmm_vmstats.page_ctrs_changed[r][mrange]);
-#endif
}
page_freelist_unlock(mnode);
@@ -2685,10 +2605,9 @@ page_freelist_split(uchar_t szc, uint_t color, int mnode, int mtype,
/*
* If page found then demote it.
*/
- if (PAGE_FREELISTS(PFLT_USER, mnode, nszc, bin, mtype)) {
+ if (PAGE_FREELISTS(mnode, nszc, bin, mtype)) {
page_freelist_lock(mnode);
- firstpp = pp = PAGE_FREELISTS(PFLT_USER, mnode,
- nszc, bin, mtype);
+ firstpp = pp = PAGE_FREELISTS(mnode, nszc, bin, mtype);
/*
* If pfnhi is not PFNNULL, look for large page below
@@ -2732,9 +2651,7 @@ page_freelist_split(uchar_t szc, uint_t color, int mnode, int mtype,
ret_pp->p_szc);
kcage_freemem_sub(npgs);
}
-#elif defined(__amd64) && !defined(__xpv)
- ASSERT(!PP_ISKFLT(pp));
-#endif /* __sparc */
+#endif
return (ret_pp);
}
}
@@ -2892,42 +2809,6 @@ page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin, int can_split,
plw->plw_bins[1] = 0;
plw->plw_ceq_mask[1] = INVALID_MASK;
}
- ASSERT(bin < plw->plw_colors);
-}
-
-/*
- * Walker variables for the kernel freelist are initialized so that all
- * kernel page colors are treated as equivalent. This mimimizes the amount
- * of memory used by the the kernel freelist.
- */
-/* ARGSUSED */
-void
-page_kflt_walk_init(uchar_t szc, uint_t flags, uint_t bin, int can_split,
- int use_ceq, page_list_walker_t *plw)
-{
- /*
- * Note that the following values are only valid for pages with
- * szc == 0.
- */
- ASSERT(szc == 0);
-
- /* The number of colors for kernel pages */
- plw->plw_colors = KFLT_PAGE_COLORS;
- plw->plw_color_mask = KFLT_PAGE_COLORS - 1;
-
- /* The marker indicates when at all the bins have been processed */
- plw->plw_bin_marker = plw->plw_bin0 = bin;
- plw->plw_bin_split_prev = bin;
-
- /* Add plw_bin_step to get the next bin to process */
- plw->plw_bin_step = vac_colors;
-
- /* There is only 1 color group i.e. all colors are equivalent */
- plw->plw_ceq_dif = 1;
- plw->plw_ceq_mask[0] = 0;
- plw->plw_do_split = 0;
-
- ASSERT(bin < plw->plw_colors);
}
/*
@@ -3030,8 +2911,8 @@ page_list_walk_next_bin(uchar_t szc, uint_t bin, page_list_walker_t *plw)
}
page_t *
-page_get_mnode_freelist(page_freelist_type_t *fp, int mnode, uint_t bin,
- int mtype, uchar_t szc, uint_t flags)
+page_get_mnode_freelist(int mnode, uint_t bin, int mtype, uchar_t szc,
+ uint_t flags)
{
kmutex_t *pcm;
page_t *pp, *first_pp;
@@ -3049,6 +2930,7 @@ page_get_mnode_freelist(page_freelist_type_t *fp, int mnode, uint_t bin,
return (NULL);
}
try_again:
+
plw_initialized = 0;
plw.plw_ceq_dif = 1;
@@ -3061,19 +2943,14 @@ try_again:
plw.plw_count < plw.plw_ceq_dif; plw.plw_count++) {
sbin = bin;
do {
- if (!PAGE_FREELISTS(PC_ISKFLT(fp), mnode, szc,
- bin, mtype)) {
+ if (!PAGE_FREELISTS(mnode, szc, bin, mtype))
goto bin_empty_1;
- }
- pcm = PC_BIN_MUTEX(PC_ISKFLT(fp), mnode, bin,
- PG_FREE_LIST);
+ pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST);
mutex_enter(pcm);
- pp = PAGE_FREELISTS(PC_ISKFLT(fp), mnode, szc,
- bin, mtype);
- if (pp == NULL) {
+ pp = PAGE_FREELISTS(mnode, szc, bin, mtype);
+ if (pp == NULL)
goto bin_empty_0;
- }
/*
* These were set before the page
@@ -3125,10 +3002,10 @@ try_again:
ASSERT(mtype == PP_2_MTYPE(pp));
ASSERT(pp->p_szc == szc);
if (szc == 0) {
- page_sub(PAGE_FREELISTP(PC_ISKFLT(fp), mnode,
+ page_sub(&PAGE_FREELISTS(mnode,
szc, bin, mtype), pp);
} else {
- page_vpsub(PAGE_FREELISTP(PC_ISKFLT(fp), mnode,
+ page_vpsub(&PAGE_FREELISTS(mnode,
szc, bin, mtype), pp);
CHK_LPG(pp, szc);
}
@@ -3144,12 +3021,7 @@ try_again:
if (PP_ISNORELOC(pp))
kcage_freemem_sub(page_get_pagecnt(szc));
-#elif defined(__amd64) && !defined(__xpv)
- if (PP_ISKFLT(pp)) {
- ASSERT(szc == 0);
- kflt_freemem_sub(1);
- }
-#endif /* __sparc */
+#endif
VM_STAT_ADD(vmm_vmstats.pgmf_allocok[szc]);
return (pp);
@@ -3157,7 +3029,7 @@ bin_empty_0:
mutex_exit(pcm);
bin_empty_1:
if (plw_initialized == 0) {
- PAGE_LIST_WALK_INIT(fp, szc, flags, bin, 1, 1,
+ page_list_walk_init(szc, flags, bin, 1, 1,
&plw);
plw_initialized = 1;
ASSERT(plw.plw_colors <=
@@ -3171,7 +3043,6 @@ bin_empty_1:
/* calculate the next bin with equivalent color */
bin = ADD_MASKED(bin, plw.plw_bin_step,
plw.plw_ceq_mask[szc], plw.plw_color_mask);
-
} while (sbin != bin);
/*
@@ -3193,7 +3064,7 @@ bin_empty_1:
return (pp);
if (plw.plw_ceq_dif > 1)
- bin = PAGE_LIST_WALK_NEXT(fp, szc, bin, &plw);
+ bin = page_list_walk_next_bin(szc, bin, &plw);
}
/* if allowed, cycle through additional mtypes */
@@ -3320,17 +3191,6 @@ skipptcpcheck:
}
return (0);
}
- if (PP_ISKFLT(pp)) {
- VM_STAT_ADD(vmm_vmstats.ptcpfailkflt[szc]);
- ASSERT(i == 0);
- while (i != (pgcnt_t)-1) {
- pp = &spp[i];
- ASSERT(PAGE_EXCL(pp));
- page_unlock_nocapture(pp);
- i--;
- }
- return (0);
- }
}
VM_STAT_ADD(vmm_vmstats.ptcpok[szc]);
return (1);
@@ -3355,7 +3215,6 @@ page_claim_contig_pages(page_t *pp, uchar_t szc, int flags)
while (pgcnt) {
ASSERT(PAGE_EXCL(pp));
ASSERT(!PP_ISNORELOC(pp));
- ASSERT(!PP_ISKFLT(pp));
if (PP_ISFREE(pp)) {
/*
* If this is a PG_FREE_LIST page then its
@@ -3457,7 +3316,6 @@ page_claim_contig_pages(page_t *pp, uchar_t szc, int flags)
ASSERT(PAGE_EXCL(targpp));
ASSERT(!PP_ISFREE(targpp));
ASSERT(!PP_ISNORELOC(targpp));
- ASSERT(!PP_ISKFLT(targpp));
PP_SETFREE(targpp);
ASSERT(PP_ISAGED(targpp));
ASSERT(targpp->p_szc < szc || (szc == 0 &&
@@ -3484,7 +3342,6 @@ page_claim_contig_pages(page_t *pp, uchar_t szc, int flags)
* Trim kernel cage from pfnlo-pfnhi and store result in lo-hi. Return code
* of 0 means nothing left after trim.
*/
-/* LINTED */
int
trimkcage(struct memseg *mseg, pfn_t *lo, pfn_t *hi, pfn_t pfnlo, pfn_t pfnhi)
{
@@ -3547,12 +3404,14 @@ trimkcage(struct memseg *mseg, pfn_t *lo, pfn_t *hi, pfn_t pfnlo, pfn_t pfnhi)
*
* 'pfnflag' specifies the subset of the pfn range to search.
*/
+
static page_t *
page_geti_contig_pages(int mnode, uint_t bin, uchar_t szc, int flags,
pfn_t pfnlo, pfn_t pfnhi, pgcnt_t pfnflag)
{
struct memseg *mseg;
pgcnt_t szcpgcnt = page_get_pagecnt(szc);
+ pgcnt_t szcpgmask = szcpgcnt - 1;
pfn_t randpfn;
page_t *pp, *randpp, *endpp;
uint_t colors, ceq_mask;
@@ -3561,16 +3420,13 @@ page_geti_contig_pages(int mnode, uint_t bin, uchar_t szc, int flags,
pfn_t hi, lo;
uint_t skip;
MEM_NODE_ITERATOR_DECL(it);
-#ifdef DEBUG
- pgcnt_t szcpgmask = szcpgcnt - 1;
-#endif
ASSERT(szc != 0 || (flags & PGI_PGCPSZC0));
+
pfnlo = P2ROUNDUP(pfnlo, szcpgcnt);
- if ((pfnhi - pfnlo) + 1 < szcpgcnt || pfnlo >= pfnhi) {
+ if ((pfnhi - pfnlo) + 1 < szcpgcnt || pfnlo >= pfnhi)
return (NULL);
- }
ASSERT(szc < mmu_page_sizes);
@@ -3611,9 +3467,8 @@ page_geti_contig_pages(int mnode, uint_t bin, uchar_t szc, int flags,
szcpages = ((pfnhi - pfnlo) + 1) / szcpgcnt;
slotlen = howmany(szcpages, slots);
/* skip if 'slotid' slot is empty */
- if (slotid * slotlen >= szcpages) {
+ if (slotid * slotlen >= szcpages)
return (NULL);
- }
pfnlo = pfnlo + (((slotid * slotlen) % szcpages) * szcpgcnt);
ASSERT(pfnlo < pfnhi);
if (pfnhi > pfnlo + (slotlen * szcpgcnt))
@@ -3716,12 +3571,6 @@ page_geti_contig_pages(int mnode, uint_t bin, uchar_t szc, int flags,
ASSERT(!(pp->p_pagenum & szcpgmask));
ASSERT(((PP_2_BIN(pp) ^ bin) & ceq_mask) == 0);
- /* Skip over pages on the kernel freelist */
- if (PP_ISKFLT(pp)) {
- pp += skip;
- goto skip_contig;
- }
-
if (page_trylock_contig_pages(mnode, pp, szc, flags)) {
/* pages unlocked by page_claim on failure */
if (page_claim_contig_pages(pp, szc, flags)) {
@@ -3744,7 +3593,6 @@ page_geti_contig_pages(int mnode, uint_t bin, uchar_t szc, int flags,
(pfn - mseg->pages_base);
}
}
-skip_contig:
if (pp >= endpp) {
/* start from the beginning */
MEM_NODE_ITERATOR_INIT(lo, mnode, szc, &it);
@@ -3758,6 +3606,7 @@ skip_contig:
return (NULL);
}
+
/*
* controlling routine that searches through physical memory in an attempt to
* claim a large page based on the input parameters.
@@ -3773,10 +3622,9 @@ skip_contig:
* for PGI_PGCPSZC0 requests, page_get_contig_pages will relocate a base
* pagesize page that satisfies mtype.
*/
-/* ARGSUSED */
page_t *
-page_get_contig_pages(page_freelist_type_t *fp, int mnode, uint_t bin,
- int mtype, uchar_t szc, uint_t flags)
+page_get_contig_pages(int mnode, uint_t bin, int mtype, uchar_t szc,
+ uint_t flags)
{
pfn_t pfnlo, pfnhi; /* contig pages pfn range */
page_t *pp;
@@ -3809,6 +3657,7 @@ page_get_contig_pages(page_freelist_type_t *fp, int mnode, uint_t bin,
do {
/* get pfn range based on mnode and mtype */
MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi);
+
ASSERT(pfnhi >= pfnlo);
pp = page_geti_contig_pages(mnode, bin, szc, flags,
@@ -3872,10 +3721,137 @@ page_t *
page_get_freelist(struct vnode *vp, u_offset_t off, struct seg *seg,
caddr_t vaddr, size_t size, uint_t flags, struct lgrp *lgrp)
{
- page_t *pp;
+ struct as *as = seg->s_as;
+ page_t *pp = NULL;
+ ulong_t bin;
+ uchar_t szc;
+ int mnode;
+ int mtype;
+ page_t *(*page_get_func)(int, uint_t, int, uchar_t, uint_t);
+ lgrp_mnode_cookie_t lgrp_cookie;
+
+ page_get_func = page_get_mnode_freelist;
+
+ /*
+ * If we aren't passed a specific lgroup, or passed a freed lgrp
+ * assume we wish to allocate near to the current thread's home.
+ */
+ if (!LGRP_EXISTS(lgrp))
+ lgrp = lgrp_home_lgrp();
+
+ if (kcage_on) {
+ if ((flags & (PG_NORELOC | PG_PANIC)) == PG_NORELOC &&
+ kcage_freemem < kcage_throttlefree + btop(size) &&
+ curthread != kcage_cageout_thread) {
+ /*
+ * Set a "reserve" of kcage_throttlefree pages for
+ * PG_PANIC and cageout thread allocations.
+ *
+ * Everybody else has to serialize in
+ * page_create_get_something() to get a cage page, so
+ * that we don't deadlock cageout!
+ */
+ return (NULL);
+ }
+ } else {
+ flags &= ~PG_NORELOC;
+ flags |= PGI_NOCAGE;
+ }
+
+ /* LINTED */
+ MTYPE_INIT(mtype, vp, vaddr, flags, size);
+
+ /*
+ * Convert size to page size code.
+ */
+ if ((szc = page_szc(size)) == (uchar_t)-1)
+ panic("page_get_freelist: illegal page size request");
+ ASSERT(szc < mmu_page_sizes);
+
+ VM_STAT_ADD(vmm_vmstats.pgf_alloc[szc]);
+
+ /* LINTED */
+ AS_2_BIN(as, seg, vp, vaddr, bin, szc);
+
+ ASSERT(bin < PAGE_GET_PAGECOLORS(szc));
+
+ /*
+ * Try to get a local page first, but try remote if we can't
+ * get a page of the right color.
+ */
+pgretry:
+ LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, LGRP_SRCH_LOCAL);
+ while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) {
+ pp = page_get_func(mnode, bin, mtype, szc, flags);
+ if (pp != NULL) {
+ VM_STAT_ADD(vmm_vmstats.pgf_allocok[szc]);
+ DTRACE_PROBE4(page__get,
+ lgrp_t *, lgrp,
+ int, mnode,
+ ulong_t, bin,
+ uint_t, flags);
+ return (pp);
+ }
+ }
+ ASSERT(pp == NULL);
+
+ /*
+ * for non-SZC0 PAGESIZE requests, check cachelist before checking
+ * remote free lists. Caller expected to call page_get_cachelist which
+ * will check local cache lists and remote free lists.
+ */
+ if (szc == 0 && ((flags & PGI_PGCPSZC0) == 0)) {
+ VM_STAT_ADD(vmm_vmstats.pgf_allocdeferred);
+ return (NULL);
+ }
+
+ ASSERT(szc > 0 || (flags & PGI_PGCPSZC0));
- PAGE_GET_FREELISTS(pp, vp, off, seg, vaddr, size, flags, lgrp);
- return (pp);
+ lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ALLOC_FAIL, 1);
+
+ if (!(flags & PG_LOCAL)) {
+ /*
+ * Try to get a non-local freelist page.
+ */
+ LGRP_MNODE_COOKIE_UPGRADE(lgrp_cookie);
+ while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) {
+ pp = page_get_func(mnode, bin, mtype, szc, flags);
+ if (pp != NULL) {
+ DTRACE_PROBE4(page__get,
+ lgrp_t *, lgrp,
+ int, mnode,
+ ulong_t, bin,
+ uint_t, flags);
+ VM_STAT_ADD(vmm_vmstats.pgf_allocokrem[szc]);
+ return (pp);
+ }
+ }
+ ASSERT(pp == NULL);
+ }
+
+ /*
+ * when the cage is off chances are page_get_contig_pages() will fail
+ * to lock a large page chunk therefore when the cage is off it's not
+ * called by default. this can be changed via /etc/system.
+ *
+ * page_get_contig_pages() also called to acquire a base pagesize page
+ * for page_create_get_something().
+ */
+ if (!(flags & PG_NORELOC) && (pg_contig_disable == 0) &&
+ (kcage_on || pg_lpgcreate_nocage || szc == 0) &&
+ (page_get_func != page_get_contig_pages)) {
+
+ VM_STAT_ADD(vmm_vmstats.pgf_allocretry[szc]);
+ page_get_func = page_get_contig_pages;
+ goto pgretry;
+ }
+
+ if (!(flags & PG_LOCAL) && pgcplimitsearch &&
+ page_get_func == page_get_contig_pages)
+ SETPGCPFAILCNT(szc);
+
+ VM_STAT_ADD(vmm_vmstats.pgf_allocfailed[szc]);
+ return (NULL);
}
/*
@@ -3929,7 +3905,7 @@ page_get_cachelist(struct vnode *vp, u_offset_t off, struct seg *seg,
}
/* LINTED */
- AS_2_BIN(PFLT_USER, as, seg, vp, vaddr, bin, 0);
+ AS_2_BIN(as, seg, vp, vaddr, bin, 0);
ASSERT(bin < PAGE_GET_PAGECOLORS(0));
@@ -3964,7 +3940,7 @@ page_get_cachelist(struct vnode *vp, u_offset_t off, struct seg *seg,
*/
LGRP_MNODE_COOKIE_UPGRADE(lgrp_cookie);
while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) {
- pp = page_get_mnode_freelist(ufltp, mnode, bin, mtype,
+ pp = page_get_mnode_freelist(mnode, bin, mtype,
0, flags);
if (pp != NULL) {
VM_STAT_ADD(vmm_vmstats.pgc_allocokdeferred);
@@ -4027,16 +4003,7 @@ try_again:
if (!PAGE_CACHELISTS(mnode, bin, mtype))
goto bin_empty_1;
- /*
- * The first parameter is irrelevant here as the flags
- * parameter to this macro decides which mutex to lock.
- * With the PG_CACHE_LIST flag, we lock the cpc_mutex[].
- *
- * User pages from the kernel page freelist may be
- * on the cachelist.
- */
- pcm = PC_BIN_MUTEX(PFLT_USER, mnode, bin,
- PG_CACHE_LIST);
+ pcm = PC_BIN_MUTEX(mnode, bin, PG_CACHE_LIST);
mutex_enter(pcm);
pp = PAGE_CACHELISTS(mnode, bin, mtype);
if (pp == NULL)
@@ -4096,11 +4063,7 @@ try_again:
if (PP_ISNORELOC(pp)) {
kcage_freemem_sub(1);
}
-#elif defined(__amd64) && !defined(__xpv)
- if (PP_ISKFLT(pp)) {
- kflt_freemem_sub(1);
- }
-#endif /* __sparc */
+#endif
VM_STAT_ADD(vmm_vmstats. pgmc_allocok);
return (pp);
}
@@ -4239,8 +4202,8 @@ page_get_replacement_page(page_t *orig_like_pp, struct lgrp *lgrp_target,
(mnode = lgrp_memnode_choose(&lgrp_cookie))
!= -1) {
pplist =
- page_get_mnode_freelist(ufltp,
- mnode, bin, mtype, szc, flags);
+ page_get_mnode_freelist(mnode, bin,
+ mtype, szc, flags);
}
/*
@@ -4280,7 +4243,7 @@ page_get_replacement_page(page_t *orig_like_pp, struct lgrp *lgrp_target,
* First try the local freelist...
*/
mnode = PP_2_MEM_NODE(like_pp);
- pplist = page_get_mnode_freelist(ufltp, mnode, bin,
+ pplist = page_get_mnode_freelist(mnode, bin,
mtype, szc, flags);
if (pplist != NULL)
break;
@@ -4319,13 +4282,14 @@ page_get_replacement_page(page_t *orig_like_pp, struct lgrp *lgrp_target,
(mem_node_config[mnode].exists == 0))
continue;
- pplist = page_get_mnode_freelist(ufltp, mnode,
+ pplist = page_get_mnode_freelist(mnode,
bin, mtype, szc, flags);
}
if (pplist != NULL)
break;
+
/* Now try remote cachelists */
LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp,
LGRP_SRCH_HIER);
@@ -4373,7 +4337,7 @@ page_get_replacement_page(page_t *orig_like_pp, struct lgrp *lgrp_target,
lgrp_memnode_choose(&lgrp_cookie))
!= -1) {
pplist = page_get_contig_pages(
- ufltp, mnode, bin, mtype, szc,
+ mnode, bin, mtype, szc,
flags | PGI_PGCPHIPRI);
}
break;
@@ -4482,481 +4446,3 @@ page_set_colorequiv_arr(void)
}
}
}
-
-/*
- * The freelist type data structures allow freelist type specific allocation
- * and policy routines to be configured. There are two freelist types currently
- * defined, one for kernel memory allocation and the the other for user memory.
- * The page_get_uflt() routine is called by the PAGE_GET_FREELISTS() macro to
- * allocate memory from the user freelist type.
- */
-
-/* ARGSUSED */
-page_t *
-page_get_uflt(struct vnode *vp, u_offset_t off, struct seg *seg, caddr_t vaddr,
- size_t size, uint_t flags, struct lgrp *lgrp)
-{
- struct as *as = seg->s_as;
- ulong_t bin;
- uchar_t szc;
- int mtype;
-
- /*
- * If we aren't passed a specific lgroup, or passed a freed lgrp
- * assume we wish to allocate near the current thread's home.
- */
- if (!LGRP_EXISTS(lgrp))
- lgrp = lgrp_home_lgrp();
-
- if (kcage_on) {
- if ((flags & (PG_NORELOC | PG_PANIC)) == PG_NORELOC &&
- kcage_freemem < kcage_throttlefree + btop(size) &&
- curthread != kcage_cageout_thread) {
- /*
- * Set a "reserve" of kcage_throttlefree pages for
- * PG_PANIC and cageout thread allocations.
- *
- * Everybody else has to serialize in
- * page_create_get_something() to get a cage page, so
- * that we don't deadlock cageout!
- */
- return (NULL);
- }
- } else {
- flags &= ~PG_NORELOC;
- flags |= PGI_NOCAGE;
- }
-
- /* LINTED */
- MTYPE_INIT(mtype, vp, vaddr, flags, size);
-
- /*
- * Convert size to page size code.
- */
- if ((szc = page_szc(size)) == (uchar_t)-1)
- panic("page_get_uflt: illegal page size request");
- ASSERT(szc < mmu_page_sizes);
-
- VM_STAT_ADD(vmm_vmstats.pgf_alloc[szc][ufltp->pflt_type]);
-
- /* LINTED */
- AS_2_BIN(PFLT_USER, as, seg, vp, vaddr, bin, szc);
-
- ASSERT(bin < PAGE_GET_PAGECOLORS(szc));
-
- return (page_get_flist(ufltp, bin, mtype, szc, flags, lgrp));
-}
-
-/*
- * This routine is passed a page color and inital mtype, and calls the page
- * freelist type policy routines which actually do the allocations, first
- * trying the local and then remote lgroups. The policy routines for user
- * page allocations are currently configured to be:
- *
- * x64 systems support two freelist types, user and kernel.
- *
- * The user freelist has 3 policy routines.
- *
- * 1. page_get_mnode_freelist to allocate a page from the user freelists.
- * 2. page_user_alloc_kflt to allocate a page from the kernel freelists
- * 3. page_get_contig_pages to search for a large page in physical memory.
- *
- * The kernel freelist has only 1 policy routine.
- *
- * 1. page_get_mnode_freelist to allocate a page from the kernel freelists.
- *
- * Sparc, x32 and Xen, systems support only the user freelist type.
- *
- * The user freelist has 2 policy routines.
- *
- * 1. page_get_mnode_freelist to allocate a page from the user freelists.
- * 2. page_get_contig_pages to search for a large page in physical memory.
- *
- */
-page_t *
-page_get_flist(page_freelist_type_t *fltp, uint_t bin, int mtype,
- uchar_t szc, uint_t flags, struct lgrp *lgrp)
-{
- page_t *pp = NULL;
- page_t *(*page_get_func)(page_freelist_type_t *,
- int, uint_t, int, uchar_t, uint_t);
- lgrp_mnode_cookie_t lgrp_cookie;
- int i;
- int mnode;
-
- for (i = 0; i < fltp->pflt_num_policies; i++) {
- page_get_func = PAGE_GET_FREELISTS_POLICY(fltp, i);
-
- /*
- * when the cage and the kernel freelist are off chances are
- * that page_get_contig_pages() will fail to lock a large
- * page chunk therefore in this case it's not called by
- * default. This can be changed via /etc/system.
- *
- * page_get_contig_pages() also called to acquire a base
- * pagesize page for page_create_get_something().
- */
- if (page_get_func == page_get_contig_pages) {
- if ((flags & PG_NORELOC) ||
- (pg_contig_disable != 0) ||
- (!kcage_on && !kflt_on &&
- !pg_lpgcreate_nocage && szc != 0)) {
- continue;
-#ifdef VM_STATS
- } else {
- VM_STAT_ADD(
- vmm_vmstats.
- pgf_allocretry[szc][fltp->pflt_type]);
-#endif
- }
- }
-
- /*
- * Try to get a local page first, but try remote if we can't
- * get a page of the right color.
- */
- LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, LGRP_SRCH_LOCAL);
- while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) {
-
- pp = page_get_func(fltp, mnode, bin, mtype, szc,
- flags);
- if (pp != NULL) {
-#ifdef VM_STATS
- VM_STAT_ADD(
- vmm_vmstats.
- pgf_allocok[szc][fltp->pflt_type]);
-#endif
- DTRACE_PROBE4(page__get__page,
- lgrp_t *, lgrp,
- int, mnode,
- ulong_t, bin,
- uint_t, flags);
- return (pp);
- }
- }
- ASSERT(pp == NULL);
-
- /*
- * for non-PGI_PGCPSZC0 PAGESIZE requests, check cachelist
- * before checking remote free lists. Caller expected to call
- * page_get_cachelist which will check local cache lists
- * and remote free lists.
- */
- if (!PC_ISKFLT(fltp) && szc == 0 &&
- ((flags & PGI_PGCPSZC0) == 0)) {
- VM_STAT_ADD(vmm_vmstats.pgf_allocdeferred);
- return (NULL);
- }
-
- ASSERT(PC_ISKFLT(fltp) || szc > 0 || (flags & PGI_PGCPSZC0));
-
- lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ALLOC_FAIL, 1);
-
- if (!(flags & PG_LOCAL)) {
- /*
- * Try to get a non-local freelist page.
- */
- LGRP_MNODE_COOKIE_UPGRADE(lgrp_cookie);
- while ((mnode =
- lgrp_memnode_choose(&lgrp_cookie)) >= 0) {
- pp = page_get_func(fltp, mnode, bin, mtype,
- szc, flags);
- if (pp != NULL) {
- DTRACE_PROBE4(page__get,
- lgrp_t *, lgrp,
- int, mnode,
- ulong_t, bin,
- uint_t, flags);
-#ifdef VM_STATS
- VM_STAT_ADD(vmm_vmstats.
- pgf_allocokrem[szc]
- [fltp->pflt_type]);
-#endif
- return (pp);
- }
- }
- ASSERT(pp == NULL);
- }
-
- if (!(flags & PG_LOCAL) && pgcplimitsearch &&
- page_get_func == page_get_contig_pages)
- SETPGCPFAILCNT(szc);
- }
-
-#ifdef VM_STATS
- VM_STAT_ADD(vmm_vmstats.pgf_allocfailed[szc][fltp->pflt_type]);
-#endif
-
- return (NULL);
-}
-#if defined(__amd64) && !defined(__xpv)
-/*
- * The page_get_kflt() routine is called by the PAGE_GET_FREELISTS() macro to
- * allocate memory from the kernel freelist type.
- */
-/* ARGSUSED */
-page_t *
-page_get_kflt(struct vnode *vp, u_offset_t off, struct seg *seg, caddr_t vaddr,
- size_t size, uint_t flags, struct lgrp *lgrp)
-{
- struct as *as = seg->s_as;
- page_t *pp = NULL;
- ulong_t bin;
- uchar_t szc;
- int mtype;
-
- ASSERT(!kcage_on);
- ASSERT(kflt_on);
- ASSERT((flags & PG_KFLT) == PG_KFLT);
-
- flags &= ~PG_NORELOC;
- flags |= PGI_NOCAGE;
-
- if ((flags & PG_PANIC) == 0 &&
- kflt_freemem < kflt_throttlefree + btop(size) &&
- curthread != kflt_evict_thread) {
- return (NULL);
- }
-
- /* LINTED */
- MTYPE_INIT(mtype, vp, vaddr, flags, size);
-
- /*
- * If we aren't passed a specific lgroup, or passed a freed lgrp
- * assume we wish to allocate near to the current thread's home.
- */
- if (!LGRP_EXISTS(lgrp))
- lgrp = lgrp_home_lgrp();
-
- /*
- * Convert size to page size code.
- */
- if ((szc = page_szc(size)) == (uchar_t)-1)
- panic("page_get_kflt: illegal page size request");
- ASSERT(szc == 0);
- ASSERT(!(flags & PG_LOCAL));
-
- VM_STAT_ADD(vmm_vmstats.pgf_alloc[szc][kfltp->pflt_type]);
-
- /* LINTED */
- AS_2_BIN(PFLT_KMEM, as, seg, vp, vaddr, bin, szc);
-
- ASSERT(bin < PAGE_GET_PAGECOLORS(szc));
- ASSERT(bin < KFLT_PAGE_COLORS);
-
-retry:
- pp = page_get_flist(kfltp, bin, mtype, szc, flags, lgrp);
-
- if (pp != NULL) {
- return (pp);
- }
-
-#if defined(__amd64)
- if (kernel_page_update_flags_x86(&flags)) {
- goto retry;
- }
-#endif
- /*
- * Import memory from user page freelists.
- */
-
- /* LINTED: constant in conditional context */
- AS_2_BIN(PFLT_USER, as, seg, vp, vaddr, bin, KFLT_PAGESIZE);
-
- ASSERT(bin < PAGE_GET_PAGECOLORS(KFLT_PAGESIZE));
-
- if ((pp = page_import_kflt(kfltp, bin, mtype, szc,
- flags | PGI_NOPGALLOC | PGI_PGCPHIPRI, NULL)) != NULL) {
- VM_STAT_ADD(vmm_vmstats.pgf_allocok[szc][kfltp->pflt_type]);
- return (pp);
- }
-
- VM_STAT_ADD(vmm_vmstats.pgf_allocfailed[szc][kfltp->pflt_type]);
- return (NULL);
-}
-
-/*
- * This is the policy routine used to allocate user memory on the kernel
- * freelist.
- */
-/* ARGSUSED */
-page_t *
-page_user_alloc_kflt(page_freelist_type_t *fp, int mnode, uint_t bin, int mtype,
- uchar_t szc, uint_t flags)
-{
- page_t *pp;
-
- if (szc != 0)
- return (NULL);
-
- if (kflt_freemem < kflt_desfree) {
- kflt_evict_wakeup();
- }
- flags &= ~PG_MATCH_COLOR;
-
- bin = USER_2_KMEM_BIN(bin);
-
- if ((pp = page_get_mnode_freelist(kfltp, mnode,
- bin, mtype, szc, flags)) != NULL) {
- VM_STAT_ADD(vmm_vmstats.puak_allocok);
- atomic_add_long(&kflt_user_alloc, 1);
- PP_SETUSERKFLT(pp);
- return (pp);
- }
-
- VM_STAT_ADD(vmm_vmstats.puak_allocfailed);
- return (NULL);
-}
-
-/*
- * This routine is called in order to allocate a large page from the user page
- * freelist and split this into small pages which are then placed on the kernel
- * freelist. If it is is called from kflt_expand() routine the PGI_NOPGALLOC
- * flag is set to indicate that all pages should be placed on the freelist,
- * otherwise a page of the requested type and color will be returned.
- */
-/* ARGSUSED */
-page_t *
-page_import_kflt(page_freelist_type_t *fp, uint_t bin, int mtype,
- uchar_t szc, uint_t flags, int *np)
-{
- page_t *pp, *pplist;
- uint_t alloc_szc = KFLT_PAGESIZE;
- kmutex_t *pcm;
- page_t *ret_pp = NULL;
- uint_t req_bin = bin;
- int req_mtype = mtype;
- int pgcnt = 0;
- int pgalloc;
- int mnode;
- struct lgrp *lgrp;
-
- ASSERT(szc == 0);
-
- flags &= ~(PG_LOCAL|PG_MATCH_COLOR);
- lgrp = lgrp_home_lgrp();
-
- pgalloc = ((flags & PGI_NOPGALLOC) == 0);
-
- /* Allocate a large page from the user pagelist */
- if ((pplist = page_get_flist(ufltp, bin, mtype, alloc_szc,
- flags, lgrp)) != NULL) {
-
- VM_STAT_ADD(vmm_vmstats.pgik_allocok);
- CHK_LPG(pplist, alloc_szc);
- mnode = PP_2_MEM_NODE(pplist);
- /*
- * Split up the large page and put the constituent pages
- * on the kernel freelist.
- */
- while (pplist) {
- pgcnt++;
- pp = pplist;
- ASSERT(pp->p_szc == alloc_szc);
- ASSERT(PP_ISFREE(pp));
- mach_page_sub(&pplist, pp);
-
- pp->p_szc = 0;
- PP_SETKFLT(pp);
- mtype = PP_2_MTYPE(pp);
- bin = PP_2_BIN(pp);
- if (pgalloc && (ret_pp == NULL) &&
- ((bin == req_bin && mtype == req_mtype))) {
- ret_pp = pp;
- } else {
- pcm = PC_BIN_MUTEX(PFLT_KMEM, mnode, bin,
- PG_FREE_LIST);
- ASSERT(mtype == PP_2_MTYPE(pp));
- mutex_enter(pcm);
- mach_page_add(PAGE_FREELISTP(PFLT_KMEM, mnode,
- 0, bin, mtype), pp);
- page_ctr_add(mnode, mtype, pp, PG_FREE_LIST);
- mutex_exit(pcm);
- page_unlock(pp);
- }
- }
-
- if (np != NULL)
- *np = pgcnt;
-
- if (ret_pp == NULL) {
- kflt_freemem_add(pgcnt);
- } else {
- kflt_freemem_add(pgcnt - 1);
- }
- return (ret_pp);
-
- } else {
-
- VM_STAT_ADD(vmm_vmstats.pgik_allocfailed);
- return (NULL);
- }
-}
-
-/*
- * This routine is called from the kflt_user_evict() thread when kernel
- * memory is low and the thread has not managed to increase it by freeing up
- * user pages
- */
-void
-kflt_expand()
-{
- ulong_t bin;
- int mtype;
- uint_t flags;
- spgcnt_t wanted;
- caddr_t vaddr;
- int np;
- int lpallocated = 0;
- int retries;
-
- ASSERT(kflt_on);
- vaddr = 0;
- flags = PGI_NOPGALLOC | PGI_PGCPHIPRI;
-
- wanted = MAX(kflt_lotsfree, kflt_throttlefree + kflt_needfree)
- - kflt_freemem;
-
- if (wanted <= 0) {
- return;
- }
-
- /* LINTED */
- MTYPE_INIT(mtype, &kvp, vaddr, flags, KFLT_PAGESIZE);
-
-#if defined(__amd64)
- (void) kernel_page_update_flags_x86(&flags);
-#endif
- /* LINTED */
- AS_2_BIN(PFLT_USER, &kas, NULL, &kvp, vaddr, bin, 1);
-
- retries = 0;
- while (kflt_on && wanted > 0) {
- (void) page_import_kflt(kfltp, bin, mtype, 0,
- flags, &np);
-
- if (np == 0) {
- if (lpallocated == 0 &&
- retries < KFLT_EXPAND_RETRIES) {
- retries++;
- ASSERT((flags & (PGI_NOPGALLOC | PGI_PGCPHIPRI))
- == (PGI_NOPGALLOC | PGI_PGCPHIPRI));
- continue;
- }
- break;
- } else {
- wanted -= np;
- lpallocated = 1;
- }
-
- }
-
-#ifdef DEBUG
- if (lpallocated) {
- VM_STAT_ADD(vmm_vmstats.pgkx_allocok);
- } else {
- VM_STAT_ADD(vmm_vmstats.pgkx_allocfailed);
- }
-#endif
-}
-#endif /* __amd64 && !__xpv */
diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files
index 4851ad7bac..8156574ad1 100644
--- a/usr/src/uts/i86pc/Makefile.files
+++ b/usr/src/uts/i86pc/Makefile.files
@@ -75,7 +75,6 @@ CORE_OBJS += \
kdi_idt.o \
kdi_idthdl.o \
kdi_asm.o \
- kflt_mem.o \
lgrpplat.o \
mach_kdi.o \
mach_sysconfig.o \
diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c
index 7ff23da796..f69b37a9f2 100644
--- a/usr/src/uts/i86pc/os/startup.c
+++ b/usr/src/uts/i86pc/os/startup.c
@@ -119,7 +119,6 @@
#include <sys/ddi_timer.h>
#include <sys/systeminfo.h>
#include <sys/multiboot.h>
-#include <sys/kflt_mem.h>
#ifdef __xpv
@@ -234,14 +233,6 @@ int kobj_file_bufsize; /* set in /etc/system */
caddr_t rm_platter_va = 0;
uint32_t rm_platter_pa;
-/*
- * On 64 bit systems enable the kernel page freelist
- */
-#if defined(__amd64) && !defined(__xpv)
-int kflt_disable = 0;
-#else
-int kflt_disable = 1;
-#endif /* __amd64 && !__xpv */
int auto_lpg_disable = 1;
/*
@@ -2196,13 +2187,6 @@ startup_end(void)
#endif
/*
- * Create the kernel page freelist management thread for x64 systems.
- */
- if (!kflt_disable) {
- kflt_init();
- }
-
- /*
* Configure the system.
*/
PRM_POINT("Calling configure()...");
diff --git a/usr/src/uts/i86pc/vm/kflt_mem.c b/usr/src/uts/i86pc/vm/kflt_mem.c
deleted file mode 100644
index 1d22bbc6c5..0000000000
--- a/usr/src/uts/i86pc/vm/kflt_mem.c
+++ /dev/null
@@ -1,990 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright (c) 2010, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/thread.h>
-#include <sys/proc.h>
-#include <sys/callb.h>
-#include <sys/vnode.h>
-#include <sys/debug.h>
-#include <sys/systm.h> /* for bzero */
-#include <sys/memlist.h>
-#include <sys/cmn_err.h>
-#include <sys/sysmacros.h>
-#include <sys/vmsystm.h> /* for NOMEMWAIT() */
-#include <sys/atomic.h> /* used to update kflt_freemem */
-#include <sys/kmem.h> /* for kmem_reap */
-#include <sys/errno.h>
-#include <sys/kflt_mem.h>
-#include <vm/seg_kmem.h>
-#include <vm/page.h>
-#include <vm/hat.h>
-#include <vm/vm_dep.h>
-#include <sys/mem_config.h>
-#include <sys/lgrp.h>
-#include <sys/rwlock.h>
-#include <sys/cpupart.h>
-
-#ifdef DEBUG
-#define KFLT_STATS
-#endif
-
-#ifdef KFLT_STATS
-
-#define KFLT_STATS_VERSION 1 /* can help report generators */
-#define KFLT_STATS_NSCANS 256 /* depth of scan statistics buffer */
-
-struct kflt_stats_scan {
- /* managed by KFLT_STAT_* macros */
- clock_t scan_lbolt;
- uint_t scan_id;
-
- /* set in kflt_user_evict() */
- uint_t kt_passes;
- clock_t kt_ticks;
- pgcnt_t kt_kflt_freemem_start;
- pgcnt_t kt_kflt_freemem_end;
- pgcnt_t kt_kflt_user_alloc_start;
- pgcnt_t kt_kflt_user_alloc_end;
- pgcnt_t kt_pfn_start;
- pgcnt_t kt_pfn_end;
- pgcnt_t kt_mnode_start;
- pgcnt_t kt_mnode_end;
- uint_t kt_examined;
- uint_t kt_cantlock;
- uint_t kt_skiplevel;
- uint_t kt_skipshared;
- uint_t kt_skiprefd;
- uint_t kt_destroy;
-
- /* set in kflt_invalidate_page() */
- uint_t kip_reloclocked;
- uint_t kip_relocmod;
- uint_t kip_destroy;
- uint_t kip_nomem;
- uint_t kip_demotefailed;
-
- /* set in kflt_export */
- uint_t kex_lp;
- uint_t kex_err;
- uint_t kex_scan;
-};
-
-struct kflt_stats {
- /* managed by KFLT_STAT_* macros */
- uint_t version;
- uint_t size;
-
- /* set in kflt_evict_thread */
- uint_t kt_wakeups;
- uint_t kt_scans;
- uint_t kt_evict_break;
-
- /* set in kflt_create_throttle */
- uint_t kft_calls;
- uint_t kft_user_evict;
- uint_t kft_critical;
- uint_t kft_exempt;
- uint_t kft_wait;
- uint_t kft_progress;
- uint_t kft_noprogress;
- uint_t kft_timeout;
-
- /* managed by KFLT_STAT_* macros */
- uint_t scan_array_size;
- uint_t scan_index;
- struct kflt_stats_scan scans[KFLT_STATS_NSCANS];
-};
-
-static struct kflt_stats kflt_stats;
-static struct kflt_stats_scan kflt_stats_scan_zero;
-
-/*
- * No real need for atomics here. For the most part the incs and sets are
- * done by the kernel freelist thread. There are a few that are done by any
- * number of other threads. Those cases are noted by comments.
- */
-#define KFLT_STAT_INCR(m) kflt_stats.m++
-
-#define KFLT_STAT_NINCR(m, v) kflt_stats.m += (v)
-
-#define KFLT_STAT_INCR_SCAN(m) \
- KFLT_STAT_INCR(scans[kflt_stats.scan_index].m)
-
-#define KFLT_STAT_NINCR_SCAN(m, v) \
- KFLT_STAT_NINCR(scans[kflt_stats.scan_index].m, v)
-
-#define KFLT_STAT_SET(m, v) kflt_stats.m = (v)
-
-#define KFLT_STAT_SETZ(m, v) \
- if (kflt_stats.m == 0) kflt_stats.m = (v)
-
-#define KFLT_STAT_SET_SCAN(m, v) \
- KFLT_STAT_SET(scans[kflt_stats.scan_index].m, v)
-
-#define KFLT_STAT_SETZ_SCAN(m, v) \
- KFLT_STAT_SETZ(scans[kflt_stats.scan_index].m, v)
-
-#define KFLT_STAT_INC_SCAN_INDEX \
- KFLT_STAT_SET_SCAN(scan_lbolt, ddi_get_lbolt()); \
- KFLT_STAT_SET_SCAN(scan_id, kflt_stats.scan_index); \
- kflt_stats.scan_index = \
- (kflt_stats.scan_index + 1) % KFLT_STATS_NSCANS; \
- kflt_stats.scans[kflt_stats.scan_index] = kflt_stats_scan_zero
-
-#define KFLT_STAT_INIT_SCAN_INDEX \
- kflt_stats.version = KFLT_STATS_VERSION; \
- kflt_stats.size = sizeof (kflt_stats); \
- kflt_stats.scan_array_size = KFLT_STATS_NSCANS; \
- kflt_stats.scan_index = 0
-
-#else /* KFLT_STATS */
-
-#define KFLT_STAT_INCR(v)
-#define KFLT_STAT_NINCR(m, v)
-#define KFLT_STAT_INCR_SCAN(v)
-#define KFLT_STAT_NINCR_SCAN(m, v)
-#define KFLT_STAT_SET(m, v)
-#define KFLT_STAT_SETZ(m, v)
-#define KFLT_STAT_SET_SCAN(m, v)
-#define KFLT_STAT_SETZ_SCAN(m, v)
-#define KFLT_STAT_INC_SCAN_INDEX
-#define KFLT_STAT_INIT_SCAN_INDEX
-
-#endif /* KFLT_STATS */
-
-/* Internal Routines */
-void kflt_init(void);
-void kflt_evict_wakeup(void);
-static boolean_t kflt_evict_cpr(void *, int);
-static void kflt_thread_init(void);
-static pfn_t kflt_get_next_pfn(int *, pfn_t);
-static void kflt_user_evict(void);
-static int kflt_invalidate_page(page_t *, pgcnt_t *);
-static int kflt_relocate_page(page_t *, pgcnt_t *);
-
-extern mnoderange_t *mnoderanges;
-extern int mnoderangecnt;
-void wakeup_pcgs(void);
-
-page_t *page_promote(int, pfn_t, uchar_t, int, int);
-
-static kcondvar_t kflt_evict_cv; /* evict thread naps here */
-static kmutex_t kflt_evict_mutex; /* protects cv and ready flag */
-static int kflt_evict_ready; /* nonzero when evict thread ready */
-kthread_id_t kflt_evict_thread; /* to aid debugging */
-static kmutex_t kflt_throttle_mutex; /* protects kflt_throttle_cv */
-static kcondvar_t kflt_throttle_cv;
-
-/*
- * Statistics used to drive the behavior of the evict demon.
- */
-pgcnt_t kflt_freemem; /* free memory on kernel freelist */
-pgcnt_t kflt_needfree; /* memory requirement for throttled threads */
-pgcnt_t kflt_lotsfree; /* export free kernel memory if > lotsfree */
-pgcnt_t kflt_desfree; /* wakeup evict thread if freemem < desfree */
-pgcnt_t kflt_minfree; /* keep scanning if freemem < minfree */
-pgcnt_t kflt_user_alloc; /* user memory allocated on kernel freelist */
-pgcnt_t kflt_throttlefree; /* throttle non-critical threads */
-pgcnt_t kflt_reserve; /* don't throttle real time if > reserve */
- /* time in seconds to check on throttled threads */
-int kflt_maxwait = 10;
-
-int kflt_on = 0; /* indicates evict thread is initialised */
-
-/*
- * This is called before a CPR suspend and after a CPR resume. We have to
- * turn off kflt_evict before a suspend, and turn it back on after a
- * restart.
- */
-/*ARGSUSED*/
-static boolean_t
-kflt_evict_cpr(void *arg, int code)
-{
- if (code == CB_CODE_CPR_CHKPT) {
- ASSERT(kflt_evict_ready);
- kflt_evict_ready = 0;
- return (B_TRUE);
- } else if (code == CB_CODE_CPR_RESUME) {
- ASSERT(kflt_evict_ready == 0);
- kflt_evict_ready = 1;
- return (B_TRUE);
- }
- return (B_FALSE);
-}
-
-/*
- * Sets up kernel freelist related statistics and starts the evict thread.
- */
-void
-kflt_init(void)
-{
- ASSERT(!kflt_on);
-
- if (kflt_disable) {
- return;
- }
-
- mutex_init(&kflt_evict_mutex, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&kflt_evict_cv, NULL, CV_DEFAULT, NULL);
-
- if (kflt_lotsfree == 0)
- kflt_lotsfree = MAX(32, total_pages / 128);
-
- if (kflt_minfree == 0)
- kflt_minfree = MAX(32, kflt_lotsfree / 4);
-
- if (kflt_desfree == 0)
- kflt_desfree = MAX(32, kflt_minfree);
-
- if (kflt_throttlefree == 0)
- kflt_throttlefree = MAX(32, kflt_minfree / 2);
-
- if (kflt_reserve == 0)
- kflt_reserve = MAX(32, kflt_throttlefree / 2);
-
- (void) callb_add(kflt_evict_cpr, NULL, CB_CL_CPR_POST_KERNEL,
- "kflt_evict_thread");
-
- kflt_on = 1;
- kflt_thread_init();
-}
-
-/*
- * Wakeup kflt_user_evict thread and throttle waiting for the number of pages
- * requested to become available. For non-critical requests, a
- * timeout is added, since freemem accounting is separate from kflt
- * freemem accounting: it's possible for us to get stuck and not make
- * forward progress even though there was sufficient freemem before
- * arriving here.
- */
-int
-kflt_create_throttle(pgcnt_t npages, int flags)
-{
- int niter = 0;
- pgcnt_t lastfree;
- int enough = kflt_freemem > kflt_throttlefree + npages;
-
- KFLT_STAT_INCR(kft_calls); /* unprotected incr. */
-
- kflt_evict_wakeup(); /* just to be sure */
- KFLT_STAT_INCR(kft_user_evict); /* unprotected incr. */
-
- /*
- * Obviously, we can't throttle the evict thread since
- * we depend on it. We also can't throttle the panic thread.
- */
- if (curthread == kflt_evict_thread ||
- !kflt_evict_ready || panicstr) {
- KFLT_STAT_INCR(kft_user_evict); /* unprotected incr. */
- return (KFT_CRIT);
- }
-
- /*
- * Don't throttle threads which are critical for proper
- * vm management if we're above kfLt_throttlefree or
- * if freemem is very low.
- */
- if (NOMEMWAIT()) {
- if (enough) {
- KFLT_STAT_INCR(kft_exempt); /* unprotected incr. */
- return (KFT_CRIT);
- } else if (freemem < minfree) {
- KFLT_STAT_INCR(kft_critical); /* unprotected incr. */
- return (KFT_CRIT);
- }
- }
-
- /*
- * Don't throttle real-time threads if kflt_freemem > kflt_reserve.
- */
- if (DISP_PRIO(curthread) > maxclsyspri &&
- kflt_freemem > kflt_reserve) {
- KFLT_STAT_INCR(kft_exempt); /* unprotected incr. */
- return (KFT_CRIT);
- }
-
- /*
- * Cause all other threads (which are assumed to not be
- * critical to kflt_user_evict) to wait here until their request
- * can be satisfied. Be a little paranoid and wake the
- * kernel evict thread on each loop through this logic.
- */
- while (kflt_freemem < kflt_throttlefree + npages) {
- ASSERT(kflt_on);
-
- lastfree = kflt_freemem;
-
- if (kflt_evict_ready) {
- mutex_enter(&kflt_throttle_mutex);
-
- kflt_needfree += npages;
- KFLT_STAT_INCR(kft_wait);
-
- kflt_evict_wakeup();
- KFLT_STAT_INCR(kft_user_evict);
-
- cv_wait(&kflt_throttle_cv, &kflt_throttle_mutex);
-
- kflt_needfree -= npages;
-
- mutex_exit(&kflt_throttle_mutex);
- } else {
- /*
- * NOTE: atomics are used just in case we enter
- * mp operation before the evict thread is ready.
- */
- atomic_add_long(&kflt_needfree, npages);
-
- kflt_evict_wakeup();
- KFLT_STAT_INCR(kft_user_evict); /* unprotected incr. */
-
- atomic_add_long(&kflt_needfree, -npages);
- }
-
- if ((flags & PG_WAIT) == 0) {
- if (kflt_freemem > lastfree) {
- KFLT_STAT_INCR(kft_progress);
- niter = 0;
- } else {
- KFLT_STAT_INCR(kft_noprogress);
- if (++niter >= kflt_maxwait) {
- KFLT_STAT_INCR(kft_timeout);
- return (KFT_FAILURE);
- }
- }
- }
-
- if (NOMEMWAIT() && freemem < minfree) {
- return (KFT_CRIT);
- }
-
- }
- return (KFT_NONCRIT);
-}
-/*
- * Creates the kernel freelist evict thread.
- */
-static void
-kflt_thread_init(void)
-{
- if (kflt_on) {
- if (thread_create(NULL, 0, kflt_user_evict,
- NULL, 0, &p0, TS_RUN, maxclsyspri - 1) == NULL) {
- kflt_on = 0;
- }
- }
-}
-
-/*
- * This routine is used by the kernel freelist evict thread to iterate over the
- * pfns.
- */
-static pfn_t
-kflt_get_next_pfn(int *mnode, pfn_t pfn)
-{
- ASSERT((*mnode >= 0) && (*mnode <= mnoderangecnt));
- ASSERT((pfn == PFN_INVALID) || (pfn >= mnoderanges[*mnode].mnr_pfnlo));
-
- if (pfn == PFN_INVALID) {
- *mnode = 0;
- pfn = mnoderanges[0].mnr_pfnlo;
- return (pfn);
- }
-
- pfn++;
- if (pfn > mnoderanges[*mnode].mnr_pfnhi) {
- (*mnode)++;
- if (*mnode >= mnoderangecnt) {
- return (PFN_INVALID);
- }
- pfn = mnoderanges[*mnode].mnr_pfnlo;
- }
- return (pfn);
-}
-/*
- * Locks all the kernel page freelist mutexes before promoting a group of pages
- * and returning the large page to the user page freelist.
- */
-void
-page_kflt_lock(int mnode)
-{
- int i;
- for (i = 0; i < NPC_MUTEX; i++) {
- mutex_enter(KFPC_MUTEX(mnode, i));
- }
-}
-
-/*
- * Unlocks all the kernel page freelist mutexes after promoting a group of pages
- * and returning the large page to the user page freelist.
- */
-void
-page_kflt_unlock(int mnode)
-{
- int i;
- for (i = 0; i < NPC_MUTEX; i++) {
- mutex_exit(KFPC_MUTEX(mnode, i));
- }
-}
-
-/*
- * This routine is called by the kflt_user_evict() thread whenever a free page
- * is found on the kernel page freelist and there is an excess of free memory on
- * the kernel freelist. It determines whether it is possible to promote groups
- * of small free pages into a large page which can then be returned to the
- * user page freelist.
- */
-static int
-kflt_export(page_t *pp, int init_state)
-{
- static page_t *lp_base = 0;
- static pfn_t lp_base_page_num = 0;
- static pgcnt_t lp_count = 0;
- page_t *tpp;
- page_t *lpp;
- pfn_t lp_page_num;
- int mtype;
- int mnode;
- int bin;
- pgcnt_t pages_left, npgs;
- uchar_t new_szc = KFLT_PAGESIZE;
- int ret;
- kmutex_t *pcm;
-
-
- /*
- * We're not holding any locks yet, so pp state may change.
- */
- if (init_state || !PP_ISFREE(pp) || !PP_ISKFLT(pp)) {
- lp_base = NULL;
- lp_base_page_num = 0;
- lp_count = 0;
- return (0);
- }
-
- ret = 0;
- npgs = page_get_pagecnt(new_szc);
- lp_page_num = PFN_BASE(pp->p_pagenum, new_szc);
-
- /* Count pages with the same large page base */
- if (lp_page_num == lp_base_page_num) {
- ASSERT((pp->p_pagenum - lp_base_page_num) < npgs);
- ASSERT(lp_count < npgs);
- lp_count++;
- if (lp_count == npgs) {
- KFLT_STAT_INCR_SCAN(kex_lp);
- ASSERT(lp_base != NULL);
- mnode = PP_2_MEM_NODE(pp);
- page_kflt_lock(mnode);
-
- /*
- * Check that all pages are still free and on the kernel
- * freelist.
- */
- for (tpp = lp_base, pages_left = npgs; pages_left;
- tpp++, pages_left--) {
- if (!PP_ISFREE(tpp) || !PP_ISKFLT(tpp)) {
- page_kflt_unlock(mnode);
- KFLT_STAT_INCR_SCAN(kex_err);
- goto out;
- }
- }
-
- lpp = page_promote(PP_2_MEM_NODE(lp_base),
- lp_base_page_num, new_szc, PC_KFLT_EXPORT,
- PP_2_MTYPE(lp_base));
- page_kflt_unlock(mnode);
-
-#ifdef KFLT_STATS
- if (lpp == NULL)
- VM_STAT_ADD(vmm_vmstats.pgexportfail);
-#endif
- if (lpp != NULL) {
- VM_STAT_ADD(vmm_vmstats.pgexportok);
- /* clear kflt bit in each page */
- tpp = lpp;
- do {
- ASSERT(PP_ISKFLT(tpp));
- ASSERT(PP_ISFREE(tpp));
- PP_CLRKFLT(tpp);
- tpp = tpp->p_next;
- } while (tpp != lpp);
-
- /*
- * Return large page to the user page
- * freelist
- */
- atomic_add_long(&kflt_freemem, -npgs);
- bin = PP_2_BIN(lpp);
- mnode = PP_2_MEM_NODE(lpp);
- mtype = PP_2_MTYPE(lpp);
- pcm = PC_FREELIST_BIN_MUTEX(PFLT_USER, mnode,
- bin, 0);
- mutex_enter(pcm);
- page_vpadd(PAGE_FREELISTP(PFLT_USER, mnode,
- new_szc, bin, mtype), lpp);
- mutex_exit(pcm);
- ret = 1;
- }
- }
- } else {
-out:
- lp_base = pp;
- lp_base_page_num = lp_page_num;
- lp_count = 1;
- }
- return (ret);
-}
-
-/*
- * This thread is woken up whenever pages are added or removed from the kernel
- * page freelist and free memory on this list is low, or when there is excess
- * memory on the kernel freelist. It iterates over the physical pages in the
- * system and has two main tasks:
- *
- * 1) Relocate user pages which have been allocated on the kernel page freelist
- * wherever this is possible.
- *
- * 2) Identify groups of free pages on the kernel page freelist which can be
- * promoted to large pages and then exported to the user page freelist.
- */
-static void
-kflt_user_evict(void)
-{
- pfn_t pfn;
- int mnode;
- page_t *pp = NULL;
- callb_cpr_t cprinfo;
- int pass;
- int last_pass;
- int did_something;
- int scan_again;
- int pages_skipped;
- int shared_skipped;
- ulong_t shared_level = 8;
- pgcnt_t nfreed;
- int prm;
- pfn_t start_pfn;
- int pages_scanned;
- int pages_skipped_thresh = 20;
- int shared_skipped_thresh = 20;
- clock_t kflt_export_scan_start = 0;
- int kflt_export_scan;
- clock_t scan_start;
- int kflt_min_scan_delay = (hz * 60);
- int kflt_max_scan_delay = kflt_min_scan_delay * 5;
- int kflt_scan_delay = kflt_min_scan_delay;
-
- ASSERT(kflt_on);
- CALLB_CPR_INIT(&cprinfo, &kflt_evict_mutex,
- callb_generic_cpr, "kflt_user_evict");
-
- mutex_enter(&kflt_evict_mutex);
- kflt_evict_thread = curthread;
-
- pfn = PFN_INVALID; /* force scan reset */
- start_pfn = PFN_INVALID; /* force init with 1st pfn */
- mnode = 0;
- kflt_evict_ready = 1;
-
-loop:
- CALLB_CPR_SAFE_BEGIN(&cprinfo);
- cv_wait(&kflt_evict_cv, &kflt_evict_mutex);
- CALLB_CPR_SAFE_END(&cprinfo, &kflt_evict_mutex);
-
- scan_start = ddi_get_lbolt();
- kflt_export_scan = 0;
- if (kflt_freemem > kflt_lotsfree) {
- /* Force a delay between kflt export scans */
- if ((scan_start - kflt_export_scan_start) >
- kflt_scan_delay) {
- kflt_export_scan = 1;
- kflt_export_scan_start = scan_start;
- KFLT_STAT_SET_SCAN(kex_scan, 1);
- }
- }
-
- KFLT_STAT_INCR(kt_wakeups);
- KFLT_STAT_SET_SCAN(kt_kflt_user_alloc_start, kflt_user_alloc);
- KFLT_STAT_SET_SCAN(kt_pfn_start, pfn);
- KFLT_STAT_SET_SCAN(kt_kflt_freemem_start, kflt_freemem);
- KFLT_STAT_SET_SCAN(kt_mnode_start, mnode);
- pass = 0;
- last_pass = 0;
-
-
-again:
- did_something = 0;
- pages_skipped = 0;
- shared_skipped = 0;
- pages_scanned = 0;
-
- KFLT_STAT_INCR(kt_scans);
- KFLT_STAT_INCR_SCAN(kt_passes);
-
- /*
- * There are two conditions which drive the loop -
- *
- * 1. If we have too much free memory then it may be possible to
- * export some large pages back to the user page freelist.
- *
- * 2. If a large number of user pages have been allocated from the
- * kernel freelist then we try to relocate them.
- */
-
- while ((kflt_export_scan || kflt_needfree ||
- (kflt_freemem < kflt_lotsfree && kflt_user_alloc)) &&
- ((pfn = kflt_get_next_pfn(&mnode, pfn)) != PFN_INVALID)) {
- if (start_pfn == PFN_INVALID) {
- start_pfn = pfn;
- } else if (start_pfn == pfn) {
- last_pass = pass;
- pass += 1;
-
- /* initialize internal state in kflt_export() */
- (void) kflt_export(pp, 1);
- /*
- * Did a complete walk of kernel freelist, but didn't
- * free any pages.
- */
- if (cp_default.cp_ncpus == 1 && did_something == 0) {
- KFLT_STAT_INCR(kt_evict_break);
- break;
- }
- did_something = 0;
- }
- pages_scanned = 1;
-
- pp = page_numtopp_nolock(pfn);
- if (pp == NULL) {
- continue;
- }
-
- KFLT_STAT_INCR_SCAN(kt_examined);
-
- if (!PP_ISKFLT(pp))
- continue;
-
- if (kflt_export_scan) {
- if (PP_ISFREE(pp) && kflt_export(pp, 0)) {
- did_something = 1;
- }
- continue;
- }
-
- if (!kflt_user_alloc) {
- continue;
- }
-
- if (PP_ISKAS(pp) || !page_trylock(pp, SE_EXCL)) {
- KFLT_STAT_INCR_SCAN(kt_cantlock);
- continue;
- }
-
- /* Check that the page is in the same state after locking */
- if (PP_ISFREE(pp) || PP_ISKAS(pp)) {
- page_unlock(pp);
- continue;
- }
-
- KFLT_STAT_SET_SCAN(kt_skiplevel, shared_level);
- if (hat_page_checkshare(pp, shared_level)) {
- page_unlock(pp);
- pages_skipped++;
- shared_skipped++;
- KFLT_STAT_INCR_SCAN(kt_skipshared);
- continue;
- }
-
- prm = hat_pagesync(pp,
- HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_MOD);
-
- /* On first pass ignore ref'd pages */
- if (pass <= 1 && (prm & P_REF)) {
- page_unlock(pp);
- KFLT_STAT_INCR_SCAN(kt_skiprefd);
- continue;
- }
-
- /* On pass 2, VN_DISPOSE if mod bit is not set */
- if (pass <= 2) {
- if (pp->p_szc != 0 || (prm & P_MOD) ||
- pp->p_lckcnt || pp->p_cowcnt) {
- page_unlock(pp);
- } else {
- /*
- * unload the mappings before
- * checking if mod bit is set
- */
- (void) hat_pageunload(pp,
- HAT_FORCE_PGUNLOAD);
-
- /*
- * skip this page if modified
- */
- if (hat_ismod(pp)) {
- pages_skipped++;
- page_unlock(pp);
- continue;
- }
-
- /* LINTED: constant in conditional context */
- VN_DISPOSE(pp, B_INVAL, 0, kcred);
- KFLT_STAT_INCR_SCAN(kt_destroy);
- did_something = 1;
- }
- continue;
- }
-
- if (kflt_invalidate_page(pp, &nfreed) == 0) {
- did_something = 1;
- }
-
- /*
- * No need to drop the page lock here.
- * kflt_invalidate_page has done that for us
- * either explicitly or through a page_free.
- */
- }
-
- /*
- * Scan again if we need more memory from the kernel
- * freelist or user memory allocations from the kernel freelist
- * are too high.
- */
- scan_again = 0;
- if (kflt_freemem < kflt_minfree || kflt_needfree) {
- if (pass <= 3 && kflt_user_alloc && pages_scanned &&
- pages_skipped > pages_skipped_thresh) {
- scan_again = 1;
- } else {
- /*
- * We need to allocate more memory to the kernel
- * freelist.
- */
- kflt_expand();
- }
- } else if (kflt_freemem < kflt_lotsfree && kflt_user_alloc) {
- ASSERT(pages_scanned);
- if (pass <= 2 && pages_skipped > pages_skipped_thresh)
- scan_again = 1;
- if (pass == last_pass || did_something)
- scan_again = 1;
- else if (shared_skipped > shared_skipped_thresh &&
- shared_level < (8<<24)) {
- shared_level <<= 1;
- scan_again = 1;
- }
- } else if (kflt_export_scan) {
- /*
- * The delay between kflt export scans varies between a minimum
- * of 60 secs and a maximum of 5 mins. The delay is set to the
- * minimum if a page is promoted during a scan and increased
- * otherwise.
- */
- if (did_something) {
- kflt_scan_delay = kflt_min_scan_delay;
- } else if (kflt_scan_delay < kflt_max_scan_delay) {
- kflt_scan_delay += kflt_min_scan_delay;
- }
- }
-
- if (scan_again && cp_default.cp_ncpus > 1) {
- goto again;
- } else {
- if (shared_level > 8)
- shared_level >>= 1;
-
- KFLT_STAT_SET_SCAN(kt_pfn_end, pfn);
- KFLT_STAT_SET_SCAN(kt_mnode_end, mnode);
- KFLT_STAT_SET_SCAN(kt_kflt_user_alloc_end, kflt_user_alloc);
- KFLT_STAT_SET_SCAN(kt_kflt_freemem_end, kflt_freemem);
- KFLT_STAT_SET_SCAN(kt_ticks, ddi_get_lbolt() - scan_start);
- KFLT_STAT_INC_SCAN_INDEX;
- goto loop;
- }
-
-}
-
-/*
- * Relocate page opp (Original Page Pointer) from kernel page freelist to page
- * rpp * (Replacement Page Pointer) on the user page freelist. Page opp will be
- * freed if relocation is successful, otherwise it is only unlocked.
- * On entry, page opp must be exclusively locked and not free.
- * *nfreedp: number of pages freed.
- */
-static int
-kflt_relocate_page(page_t *pp, pgcnt_t *nfreedp)
-{
- page_t *opp = pp;
- page_t *rpp = NULL;
- spgcnt_t npgs;
- int result;
-
- ASSERT(!PP_ISFREE(opp));
- ASSERT(PAGE_EXCL(opp));
-
- result = page_relocate(&opp, &rpp, 1, 1, &npgs, NULL);
- *nfreedp = npgs;
- if (result == 0) {
- while (npgs-- > 0) {
- page_t *tpp;
-
- ASSERT(rpp != NULL);
- tpp = rpp;
- page_sub(&rpp, tpp);
- page_unlock(tpp);
- }
-
- ASSERT(rpp == NULL);
-
- return (0); /* success */
- }
-
- page_unlock(opp);
- return (result);
-}
-
-/*
- * Based on page_invalidate_pages()
- *
- * Kflt_invalidate_page() uses page_relocate() twice. Both instances
- * of use must be updated to match the new page_relocate() when it
- * becomes available.
- *
- * Return result of kflt_relocate_page or zero if page was directly freed.
- * *nfreedp: number of pages freed.
- */
-static int
-kflt_invalidate_page(page_t *pp, pgcnt_t *nfreedp)
-{
- int result;
-
- ASSERT(!PP_ISFREE(pp));
- ASSERT(PAGE_EXCL(pp));
-
- /*
- * Is this page involved in some I/O? shared?
- * The page_struct_lock need not be acquired to
- * examine these fields since the page has an
- * "exclusive" lock.
- */
- if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
- result = kflt_relocate_page(pp, nfreedp);
-#ifdef KFLT_STATS
- if (result == 0)
- KFLT_STAT_INCR_SCAN(kip_reloclocked);
- else if (result == ENOMEM)
- KFLT_STAT_INCR_SCAN(kip_nomem);
-#endif
- return (result);
- }
-
- ASSERT(pp->p_vnode->v_type != VCHR);
-
- /*
- * Unload the mappings and check if mod bit is set.
- */
- (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
-
- if (hat_ismod(pp)) {
- result = kflt_relocate_page(pp, nfreedp);
-#ifdef KFLT_STATS
- if (result == 0)
- KFLT_STAT_INCR_SCAN(kip_relocmod);
- else if (result == ENOMEM)
- KFLT_STAT_INCR_SCAN(kip_nomem);
-#endif
- return (result);
- }
-
- if (!page_try_demote_pages(pp)) {
- KFLT_STAT_INCR_SCAN(kip_demotefailed);
- page_unlock(pp);
- return (EAGAIN);
- }
-
- /* LINTED: constant in conditional context */
- VN_DISPOSE(pp, B_INVAL, 0, kcred);
- KFLT_STAT_INCR_SCAN(kip_destroy);
- *nfreedp = 1;
- return (0);
-}
-
-void
-kflt_evict_wakeup(void)
-{
- if (mutex_tryenter(&kflt_evict_mutex)) {
- if (kflt_evict_ready && (kflt_freemem > kflt_lotsfree ||
- (kflt_freemem < kflt_desfree && kflt_user_alloc) ||
- kflt_needfree)) {
- cv_signal(&kflt_evict_cv);
- }
- mutex_exit(&kflt_evict_mutex);
- }
- /* else, kflt thread is already running */
-}
-
-void
-kflt_freemem_sub(pgcnt_t npages)
-{
- atomic_add_long(&kflt_freemem, -npages);
-
- ASSERT(kflt_freemem >= 0);
-
- if (kflt_evict_ready &&
- (kflt_freemem > kflt_lotsfree ||
- kflt_freemem < kflt_desfree || kflt_needfree)) {
- kflt_evict_wakeup();
- }
-}
-
-void
-kflt_freemem_add(pgcnt_t npages)
-{
- atomic_add_long(&kflt_freemem, npages);
-
- wakeup_pcgs(); /* wakeup threads in pcgs() */
-
- if (kflt_evict_ready && kflt_needfree &&
- kflt_freemem >= (kflt_throttlefree + kflt_needfree)) {
- mutex_enter(&kflt_throttle_mutex);
- cv_broadcast(&kflt_throttle_cv);
- mutex_exit(&kflt_throttle_mutex);
- }
-}
-
-void
-kflt_tick()
-{
- /*
- * Once per second we wake up all the threads throttled
- * waiting for kernel freelist memory, in case we've become stuck
- * and haven't made forward progress expanding the kernel freelist.
- */
- if (kflt_on && kflt_evict_ready)
- cv_broadcast(&kflt_throttle_cv);
-}
diff --git a/usr/src/uts/i86pc/vm/vm_dep.h b/usr/src/uts/i86pc/vm/vm_dep.h
index 057f88a7c7..753aa9d146 100644
--- a/usr/src/uts/i86pc/vm/vm_dep.h
+++ b/usr/src/uts/i86pc/vm/vm_dep.h
@@ -19,7 +19,8 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
*/
/*
* Copyright (c) 2010, Intel Corporation.
@@ -56,7 +57,6 @@ extern "C" {
extern hrtime_t randtick();
extern uint_t page_create_update_flags_x86(uint_t);
-extern int kernel_page_update_flags_x86(uint_t *);
extern size_t plcnt_sz(size_t);
#define PLCNT_SZ(ctrs_sz) (ctrs_sz = plcnt_sz(ctrs_sz))
@@ -99,36 +99,6 @@ extern int mnode_range_cnt(int);
#define MNODE_MAX_MRANGE(mnode) memrange_num(mem_node_config[mnode].physbase)
/*
- * combined memory ranges from mnode and memranges[] to manage single
- * mnode/mtype dimension in the page lists.
- */
-typedef struct {
- pfn_t mnr_pfnlo;
- pfn_t mnr_pfnhi;
- int mnr_mnode;
- int mnr_memrange; /* index into memranges[] */
- int mnr_next; /* next lower PA mnoderange */
- int mnr_exists;
- /* maintain page list stats */
- pgcnt_t mnr_mt_clpgcnt; /* cache list cnt */
- pgcnt_t mnr_mt_flpgcnt[MMU_PAGE_SIZES]; /* free list cnt per szc */
- pgcnt_t mnr_mt_totcnt; /* sum of cache and free lists */
-#ifdef DEBUG
- struct mnr_mts { /* mnode/mtype szc stats */
- pgcnt_t mnr_mts_pgcnt;
- int mnr_mts_colors;
- pgcnt_t *mnr_mtsc_pgcnt;
- } *mnr_mts;
-#endif
-} mnoderange_t;
-
-#define MEMRANGEHI(mtype) \
- (((mtype) > 0) ? memranges[(mtype) - 1] - 1: physmax)
-#define MEMRANGELO(mtype) (memranges[(mtype)])
-
-#define MTYPE_FREEMEM(mt) (mnoderanges[(mt)].mnr_mt_totcnt)
-
-/*
* This was really badly defined, it implicitly uses mnode_maxmrange[]
* which is a static in vm_pagelist.c
*/
@@ -137,127 +107,16 @@ extern int mtype_2_mrange(int);
(mnode_maxmrange[mnode] - mtype_2_mrange(mtype))
/*
- * this structure is used for walking free page lists, it
- * controls when to split large pages into smaller pages,
- * and when to coalesce smaller pages into larger pages
- */
-typedef struct page_list_walker {
- uint_t plw_colors; /* num of colors for szc */
- uint_t plw_color_mask; /* colors-1 */
- uint_t plw_bin_step; /* next bin: 1 or 2 */
- uint_t plw_count; /* loop count */
- uint_t plw_bin0; /* starting bin */
- uint_t plw_bin_marker; /* bin after initial jump */
- uint_t plw_bin_split_prev; /* last bin we tried to split */
- uint_t plw_do_split; /* set if OK to split */
- uint_t plw_split_next; /* next bin to split */
- uint_t plw_ceq_dif; /* number of different color groups */
- /* to check */
- uint_t plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */
- uint_t plw_bins[MMU_PAGE_SIZES + 1]; /* num of bins */
-} page_list_walker_t;
-
-/*
- * Page freelists are organized as two freelist types user and kernel, with
- * their own policy and allocation routines. The definitions related to the
- * freelist type structure are grouped below.
- *
- * The page size free lists themselves are allocated dynamically with
+ * Per page size free lists. Allocated dynamically.
* dimensions [mtype][mmu_page_sizes][colors]
*
* mtype specifies a physical memory range with a unique mnode.
*/
-#define MAX_PFLT_POLICIES 3
-#define MAX_PFLT_TYPE 2
-enum freelist_types {PFLT_USER, PFLT_KMEM};
-
-/*
- * The kernel only needs a small number of page colors, far fewer than user
- * programs.
- */
-#define KFLT_PAGE_COLORS 16
-
-typedef struct page_freelist_type page_freelist_type_t;
-extern page_freelist_type_t flt_user;
-extern page_freelist_type_t flt_kern;
-extern page_freelist_type_t *ufltp;
-extern page_freelist_type_t *kfltp;
-
-void page_flt_init(page_freelist_type_t *, page_freelist_type_t *);
-page_t *page_get_uflt(struct vnode *, u_offset_t, struct seg *, caddr_t,
- size_t, uint_t, struct lgrp *);
-page_t *page_get_kflt(struct vnode *, u_offset_t, struct seg *, caddr_t,
- size_t, uint_t, struct lgrp *);
-void page_kflt_walk_init(uchar_t, uint_t, uint_t, int, int,
- page_list_walker_t *);
-uint_t page_kflt_walk_next_bin(uchar_t, uint_t, page_list_walker_t *);
-page_t *page_import_kflt(page_freelist_type_t *, uint_t, int, uchar_t,
- uint_t, int *);
-page_t *page_user_alloc_kflt(page_freelist_type_t *, int, uint_t, int, uchar_t,
- uint_t);
-void kflt_expand(void);
-
-typedef page_t *(*pflt_get_func_p) (struct vnode *, u_offset_t, struct seg *,
- caddr_t, size_t, uint_t, lgrp_t *);
-typedef page_t *(*pflt_policy_func_p)(page_freelist_type_t *, int, uint_t, int,
- uchar_t, uint_t);
-typedef void (*pflt_list_walk_init_func_p)(uchar_t, uint_t, uint_t, int, int,
- page_list_walker_t *);
-typedef uint_t (*pflt_list_walk_next_func_p)(uchar_t, uint_t,
- page_list_walker_t *);
-
-struct page_freelist_type {
- int pflt_type; /* type is user or kernel */
- pflt_get_func_p pflt_get_free; /* top-level alloc routine */
- pflt_list_walk_init_func_p pflt_walk_init; /* walker routines */
- pflt_list_walk_next_func_p pflt_walk_next;
- int pflt_num_policies; /* the number of policy routines */
- /*
- * the policy routines are called by the allocator routine
- * to implement the actual allocation policies.
- */
- pflt_policy_func_p pflt_policy[MAX_PFLT_POLICIES];
- page_t ****pflt_freelists; /* the page freelist arrays */
-};
-
-#if defined(__amd64) && !defined(__xpv)
-#define PAGE_FREELISTP(is_kflt, mnode, szc, color, mtype) \
- ((is_kflt) ? \
- (page_t **)(kfltp->pflt_freelists[mtype] + (color)) : \
- ((ufltp->pflt_freelists[mtype][szc] + (color))))
-
-#define PAGE_GET_FREELISTS(pp, vp, off, seg, vaddr, size, flags, lgrp) \
- { \
- if (kflt_on && (((flags) & PG_KFLT) == PG_KFLT)) { \
- pp = kfltp->pflt_get_free(vp, off, seg, vaddr, size, \
- flags, lgrp); \
- } else { \
- pp = ufltp->pflt_get_free(vp, off, seg, vaddr, size, \
- flags, lgrp); \
- } \
- }
-#else /* __amd64 && ! __xpv */
-#define PAGE_FREELISTP(is_kflt, mnode, szc, color, mtype) \
- ((ufltp->pflt_freelists[mtype][szc] + (color)))
-
-#define PAGE_GET_FREELISTS(pp, vp, off, seg, vaddr, size, flags, lgrp) \
- pp = ufltp->pflt_get_free(vp, off, seg, vaddr, size, \
- flags, lgrp);
-#endif /* __amd64 && ! __xpv */
-
-#define PAGE_FREELISTS(is_kflt, mnode, szc, color, mtype) \
- (*(PAGE_FREELISTP(is_kflt, mnode, szc, color, mtype)))
-
-#define PAGE_GET_FREELISTS_POLICY(fp, i) \
- (fp->pflt_policy[i])
-
-#define PAGE_LIST_WALK_INIT(fp, szc, flags, bin, can_split, use_ceq, plw) \
- fp->pflt_walk_init(szc, flags, bin, can_split, use_ceq, plw)
-
-#define PAGE_LIST_WALK_NEXT(fp, szc, bin, plw) \
- fp->pflt_walk_next(szc, bin, plw)
+extern page_t ****page_freelists;
+#define PAGE_FREELISTS(mnode, szc, color, mtype) \
+ (*(page_freelists[mtype][szc] + (color)))
/*
* For now there is only a single size cache list. Allocated dynamically.
@@ -271,7 +130,7 @@ extern page_t ***page_cachelists;
(*(page_cachelists[mtype] + (color)))
/*
- * There are mutexes for the user page freelist, the kernel page freelist
+ * There are mutexes for both the page freelist
* and the page cachelist. We want enough locks to make contention
* reasonable, but not too many -- otherwise page_freelist_lock() gets
* so expensive that it becomes the bottleneck!
@@ -279,32 +138,11 @@ extern page_t ***page_cachelists;
#define NPC_MUTEX 16
-/*
- * The kflt_disable variable is used to determine whether the kernel freelist
- * is supported on this platform.
- */
-extern int kflt_disable;
-
extern kmutex_t *fpc_mutex[NPC_MUTEX];
-extern kmutex_t *kfpc_mutex[NPC_MUTEX];
extern kmutex_t *cpc_mutex[NPC_MUTEX];
-#define PC_ISKFLT(fltp) (fltp->pflt_type == PFLT_KMEM)
- /* flag used by the kflt_export function when calling page_promote */
-#define PC_KFLT_EXPORT 0x4
-
-extern page_t *page_get_mnode_freelist(page_freelist_type_t *, int, uint_t,
- int, uchar_t, uint_t);
+extern page_t *page_get_mnode_freelist(int, uint_t, int, uchar_t, uint_t);
extern page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int);
-extern page_t *page_get_contig_pages(page_freelist_type_t *, int, uint_t, int,
- uchar_t, uint_t);
-extern void page_list_walk_init(uchar_t, uint_t, uint_t, int, int,
- page_list_walker_t *);
-extern uint_t page_list_walk_next_bin(uchar_t, uint_t, page_list_walker_t *);
-
-extern void kflt_evict_wakeup();
-extern void kflt_freemem_add(pgcnt_t);
-extern void kflt_freemem_sub(pgcnt_t);
/* mem node iterator is not used on x86 */
#define MEM_NODE_ITERATOR_DECL(it)
@@ -391,8 +229,6 @@ extern void kflt_freemem_sub(pgcnt_t);
} \
}
-#define USER_2_KMEM_BIN(bin) ((bin) & (KFLT_PAGE_COLORS - 1))
-
/* get the color equivalency mask for the next szc */
#define PAGE_GET_NSZ_MASK(szc, mask) \
((mask) >> (PAGE_GET_SHIFT((szc) + 1) - PAGE_GET_SHIFT(szc)))
@@ -404,9 +240,7 @@ extern void kflt_freemem_sub(pgcnt_t);
/* Find the bin for the given page if it was of size szc */
#define PP_2_BIN_SZC(pp, szc) (PFN_2_COLOR(pp->p_pagenum, szc, NULL))
-#define PP_2_BIN(pp) ((PP_ISKFLT(pp)) ? \
- USER_2_KMEM_BIN(PP_2_BIN_SZC(pp, pp->p_szc)) : \
- (PP_2_BIN_SZC(pp, pp->p_szc)))
+#define PP_2_BIN(pp) (PP_2_BIN_SZC(pp, pp->p_szc))
#define PP_2_MEM_NODE(pp) (PFN_2_MEM_NODE(pp->p_pagenum))
#define PP_2_MTYPE(pp) (pfn_2_mtype(pp->p_pagenum))
@@ -415,6 +249,27 @@ extern void kflt_freemem_sub(pgcnt_t);
#define SZCPAGES(szc) (1 << PAGE_BSZS_SHIFT(szc))
#define PFN_BASE(pfnum, szc) (pfnum & ~(SZCPAGES(szc) - 1))
+/*
+ * this structure is used for walking free page lists
+ * controls when to split large pages into smaller pages,
+ * and when to coalesce smaller pages into larger pages
+ */
+typedef struct page_list_walker {
+ uint_t plw_colors; /* num of colors for szc */
+ uint_t plw_color_mask; /* colors-1 */
+ uint_t plw_bin_step; /* next bin: 1 or 2 */
+ uint_t plw_count; /* loop count */
+ uint_t plw_bin0; /* starting bin */
+ uint_t plw_bin_marker; /* bin after initial jump */
+ uint_t plw_bin_split_prev; /* last bin we tried to split */
+ uint_t plw_do_split; /* set if OK to split */
+ uint_t plw_split_next; /* next bin to split */
+ uint_t plw_ceq_dif; /* number of different color groups */
+ /* to check */
+ uint_t plw_ceq_mask[MMU_PAGE_SIZES + 1]; /* color equiv mask */
+ uint_t plw_bins[MMU_PAGE_SIZES + 1]; /* num of bins */
+} page_list_walker_t;
+
void page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin,
int can_split, int use_ceq, page_list_walker_t *plw);
@@ -460,19 +315,12 @@ extern void mnodetype_2_pfn(int, int, pfn_t *, pfn_t *);
#define MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi) \
mnodetype_2_pfn(mnode, mtype, &pfnlo, &pfnhi)
-#define PC_FREELIST_BIN_MUTEX(is_kflt, mnode, bin, flags) \
- ((is_kflt) ? \
- (&kfpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode]) : \
- (&fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode]))
-
-#define PC_BIN_MUTEX(is_kflt, mnode, bin, flags) \
- ((flags & PG_FREE_LIST) ? \
- PC_FREELIST_BIN_MUTEX(is_kflt, mnode, bin, flags): \
+#define PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ? \
+ &fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] : \
&cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode])
#define FPC_MUTEX(mnode, i) (&fpc_mutex[i][mnode])
#define CPC_MUTEX(mnode, i) (&cpc_mutex[i][mnode])
-#define KFPC_MUTEX(mnode, i) (&kfpc_mutex[i][mnode])
#ifdef DEBUG
#define CHK_LPG(pp, szc) chk_lpg(pp, szc)
@@ -560,8 +408,6 @@ extern int l2cache_sz, l2cache_linesz, l2cache_assoc;
#define PGI_MT_NEXT 0x8000000 /* get next mtype */
#define PGI_MT_RANGE (PGI_MT_RANGE0 | PGI_MT_RANGE16M | PGI_MT_RANGE4G)
-/* Flag to avoid allocating a page in page_import_kflt() */
-#define PGI_NOPGALLOC 0x10000000
/*
* Maximum and default values for user heap, stack, private and shared
@@ -590,17 +436,11 @@ extern pgcnt_t shm_lpg_min_physmem;
* hash as and addr to get a bin.
*/
-#define AS_2_USER_BIN(as, seg, vp, addr, bin, szc) \
+#define AS_2_BIN(as, seg, vp, addr, bin, szc) \
bin = (((((uintptr_t)(addr) >> PAGESHIFT) + ((uintptr_t)(as) >> 4)) \
& page_colors_mask) >> \
(hw_page_array[szc].hp_shift - hw_page_array[0].hp_shift))
-#define AS_2_BIN(is_kflt, as, seg, vp, addr, bin, szc) { \
- AS_2_USER_BIN(as, seg, vp, addr, bin, szc); \
- if (is_kflt) { \
- bin = USER_2_KMEM_BIN(bin); \
- } \
-}
/*
* cpu private vm data - accessed thru CPU->cpu_vm_data
* vc_pnum_memseg: tracks last memseg visited in page_numtopp_nolock()
@@ -643,22 +483,12 @@ extern char vm_cpu_data0[];
#ifdef VM_STATS
struct vmm_vmstats_str {
- /* page_get_uflt and page_get_kflt */
- ulong_t pgf_alloc[MMU_PAGE_SIZES][MAX_PFLT_TYPE];
- ulong_t pgf_allocok[MMU_PAGE_SIZES][MAX_PFLT_TYPE];
- ulong_t pgf_allocokrem[MMU_PAGE_SIZES][MAX_PFLT_TYPE];
- ulong_t pgf_allocfailed[MMU_PAGE_SIZES][MAX_PFLT_TYPE];
+ ulong_t pgf_alloc[MMU_PAGE_SIZES]; /* page_get_freelist */
+ ulong_t pgf_allocok[MMU_PAGE_SIZES];
+ ulong_t pgf_allocokrem[MMU_PAGE_SIZES];
+ ulong_t pgf_allocfailed[MMU_PAGE_SIZES];
ulong_t pgf_allocdeferred;
- ulong_t pgf_allocretry[MMU_PAGE_SIZES][MAX_PFLT_TYPE];
- ulong_t pgik_allocok; /* page_import_kflt */
- ulong_t pgik_allocfailed;
- ulong_t pgkx_allocok; /* kflt_expand */
- ulong_t pgkx_allocfailed;
- ulong_t puak_allocok; /* page_user_alloc_kflt */
- ulong_t puak_allocfailed;
- ulong_t pgexportok; /* kflt_export */
- ulong_t pgexportfail;
- ulong_t pgkflt_disable; /* kflt_user_evict */
+ ulong_t pgf_allocretry[MMU_PAGE_SIZES];
ulong_t pgc_alloc; /* page_get_cachelist */
ulong_t pgc_allocok;
ulong_t pgc_allocokrem;
@@ -673,7 +503,6 @@ struct vmm_vmstats_str {
ulong_t ptcpfailexcl[MMU_PAGE_SIZES];
ulong_t ptcpfailszc[MMU_PAGE_SIZES];
ulong_t ptcpfailcage[MMU_PAGE_SIZES];
- ulong_t ptcpfailkflt[MMU_PAGE_SIZES];
ulong_t ptcpok[MMU_PAGE_SIZES];
ulong_t pgmf_alloc[MMU_PAGE_SIZES]; /* page_get_mnode_freelist */
ulong_t pgmf_allocfailed[MMU_PAGE_SIZES];
diff --git a/usr/src/uts/i86pc/vm/vm_machdep.c b/usr/src/uts/i86pc/vm/vm_machdep.c
index f108d092e5..dfdca87e1c 100644
--- a/usr/src/uts/i86pc/vm/vm_machdep.c
+++ b/usr/src/uts/i86pc/vm/vm_machdep.c
@@ -87,7 +87,6 @@
#include <sys/stack.h>
#include <util/qsort.h>
#include <sys/taskq.h>
-#include <sys/kflt_mem.h>
#ifdef __xpv
@@ -138,6 +137,36 @@ extern uint_t page_create_putbacks;
extern int use_sse_pagecopy, use_sse_pagezero;
/*
+ * combined memory ranges from mnode and memranges[] to manage single
+ * mnode/mtype dimension in the page lists.
+ */
+typedef struct {
+ pfn_t mnr_pfnlo;
+ pfn_t mnr_pfnhi;
+ int mnr_mnode;
+ int mnr_memrange; /* index into memranges[] */
+ int mnr_next; /* next lower PA mnoderange */
+ int mnr_exists;
+ /* maintain page list stats */
+ pgcnt_t mnr_mt_clpgcnt; /* cache list cnt */
+ pgcnt_t mnr_mt_flpgcnt[MMU_PAGE_SIZES]; /* free list cnt per szc */
+ pgcnt_t mnr_mt_totcnt; /* sum of cache and free lists */
+#ifdef DEBUG
+ struct mnr_mts { /* mnode/mtype szc stats */
+ pgcnt_t mnr_mts_pgcnt;
+ int mnr_mts_colors;
+ pgcnt_t *mnr_mtsc_pgcnt;
+ } *mnr_mts;
+#endif
+} mnoderange_t;
+
+#define MEMRANGEHI(mtype) \
+ ((mtype > 0) ? memranges[mtype - 1] - 1: physmax)
+#define MEMRANGELO(mtype) (memranges[mtype])
+
+#define MTYPE_FREEMEM(mt) (mnoderanges[mt].mnr_mt_totcnt)
+
+/*
* As the PC architecture evolved memory up was clumped into several
* ranges for various historical I/O devices to do DMA.
* < 16Meg - ISA bus
@@ -315,9 +344,8 @@ page_t ***page_cachelists;
*/
hw_pagesize_t hw_page_array[MAX_NUM_LEVEL + 1];
-kmutex_t *fpc_mutex[NPC_MUTEX]; /* user page freelist mutexes */
-kmutex_t *kfpc_mutex[NPC_MUTEX]; /* kernel page freelist mutexes */
-kmutex_t *cpc_mutex[NPC_MUTEX]; /* page cachelist mutexes */
+kmutex_t *fpc_mutex[NPC_MUTEX];
+kmutex_t *cpc_mutex[NPC_MUTEX];
/* Lock to protect mnoderanges array for memory DR operations. */
static kmutex_t mnoderange_lock;
@@ -1145,9 +1173,7 @@ page_get_contigpage(pgcnt_t *pgcnt, ddi_dma_attr_t *mattr, int iolock)
if (!*pgcnt || ((*pgcnt <= sgllen) && !pfnalign)) {
startpfn = pfn;
CONTIG_UNLOCK();
-#ifdef DEBUG
check_dma(mattr, pplist, *pgcnt);
-#endif
return (pplist);
}
minctg = howmany(*pgcnt, sgllen);
@@ -1182,9 +1208,7 @@ page_get_contigpage(pgcnt_t *pgcnt, ddi_dma_attr_t *mattr, int iolock)
if (!*pgcnt || ((*pgcnt <= sgllen) && !pfnalign)) {
startpfn = pfn;
CONTIG_UNLOCK();
-#ifdef DEBUG
check_dma(mattr, pplist, *pgcnt);
-#endif
return (pplist);
}
minctg = howmany(*pgcnt, sgllen);
@@ -1784,15 +1808,6 @@ page_coloring_init(uint_t l2_sz, int l2_linesz, int l2_assoc)
}
colorsz = mnoderangecnt * sizeof (mnoderange_t);
- if (!kflt_disable) {
- /* size for kernel page freelists */
- colorsz += mnoderangecnt * sizeof (page_t ***);
- colorsz += (mnoderangecnt * KFLT_PAGE_COLORS *
- sizeof (page_t *));
-
- /* size for kfpc_mutex */
- colorsz += (max_mem_nodes * sizeof (kmutex_t) * NPC_MUTEX);
- }
/* size for fpc_mutex and cpc_mutex */
colorsz += (2 * max_mem_nodes * sizeof (kmutex_t) * NPC_MUTEX);
@@ -1842,44 +1857,28 @@ page_coloring_setup(caddr_t pcmemaddr)
fpc_mutex[k] = (kmutex_t *)addr;
addr += (max_mem_nodes * sizeof (kmutex_t));
}
- if (!kflt_disable) {
- for (k = 0; k < NPC_MUTEX; k++) {
- kfpc_mutex[k] = (kmutex_t *)addr;
- addr += (max_mem_nodes * sizeof (kmutex_t));
- }
- }
for (k = 0; k < NPC_MUTEX; k++) {
cpc_mutex[k] = (kmutex_t *)addr;
addr += (max_mem_nodes * sizeof (kmutex_t));
}
- ufltp->pflt_freelists = (page_t ****)addr;
+ page_freelists = (page_t ****)addr;
addr += (mnoderangecnt * sizeof (page_t ***));
page_cachelists = (page_t ***)addr;
addr += (mnoderangecnt * sizeof (page_t **));
for (i = 0; i < mnoderangecnt; i++) {
- ufltp->pflt_freelists[i] = (page_t ***)addr;
+ page_freelists[i] = (page_t ***)addr;
addr += (mmu_page_sizes * sizeof (page_t **));
for (j = 0; j < mmu_page_sizes; j++) {
colors = page_get_pagecolors(j);
- ufltp->pflt_freelists[i][j] = (page_t **)addr;
+ page_freelists[i][j] = (page_t **)addr;
addr += (colors * sizeof (page_t *));
}
page_cachelists[i] = (page_t **)addr;
addr += (page_colors * sizeof (page_t *));
}
-
- if (!kflt_disable) {
- kfltp->pflt_freelists = (page_t ****)addr;
- addr += (mnoderangecnt * sizeof (page_t ***));
- for (i = 0; i < mnoderangecnt; i++) {
- kfltp->pflt_freelists[i] = (page_t ***)addr;
- addr += (KFLT_PAGE_COLORS * sizeof (page_t *));
- }
- }
- page_flt_init(ufltp, kfltp);
}
#if defined(__xpv)
@@ -1957,30 +1956,6 @@ page_create_update_flags_x86(uint_t flags)
return (flags);
}
-int
-kernel_page_update_flags_x86(uint_t *flags)
-{
- /*
- * page_get_kflt() calls this after walking the kernel pagelists and
- * not finding a free page to allocate. If the PGI_MT_RANGE4G flag is
- * set then we only walk mnodes in the greater than 4g range, so if we
- * didn't find a page there must be free kernel memory below this range.
- *
- * kflt_expand() calls this before trying to allocate large pages for
- * kernel memory.
- */
- if (physmax4g) {
- if (*flags & PGI_MT_RANGE4G) {
- *flags &= ~PGI_MT_RANGE4G;
- *flags |= PGI_MT_RANGE0;
- return (1);
- } else {
- return (0);
- }
- }
- return (0);
-}
-
/*ARGSUSED*/
int
bp_color(struct buf *bp)
@@ -2925,9 +2900,7 @@ page_get_contigpages(
goto fail;
off += minctg * MMU_PAGESIZE;
}
-#ifdef DEBUG
check_dma(mattr, mcpl, minctg);
-#endif
/*
* Here with a minctg run of contiguous pages, add them to the
* list we will return for this request.
@@ -3131,14 +3104,12 @@ page_get_mnode_anylist(ulong_t origbin, uchar_t szc, uint_t flags,
for (plw.plw_count = 0;
plw.plw_count < page_colors; plw.plw_count++) {
- if (PAGE_FREELISTS(PFLT_USER, mnode, szc, bin, mtype)
- == NULL)
+ if (PAGE_FREELISTS(mnode, szc, bin, mtype) == NULL)
goto nextfreebin;
- pcm = PC_FREELIST_BIN_MUTEX(PFLT_USER, mnode, bin,
- PG_FREE_LIST);
+ pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST);
mutex_enter(pcm);
- pp = PAGE_FREELISTS(PFLT_USER, mnode, szc, bin, mtype);
+ pp = PAGE_FREELISTS(mnode, szc, bin, mtype);
first_pp = pp;
while (pp != NULL) {
if (page_trylock(pp, SE_EXCL) == 0) {
@@ -3176,8 +3147,8 @@ page_get_mnode_anylist(ulong_t origbin, uchar_t szc, uint_t flags,
ASSERT(pp->p_szc == 0);
/* found a page with specified DMA attributes */
- page_sub(PAGE_FREELISTP(PFLT_USER, mnode, szc,
- bin, mtype), pp);
+ page_sub(&PAGE_FREELISTS(mnode, szc, bin,
+ mtype), pp);
page_ctr_sub(mnode, mtype, pp, PG_FREE_LIST);
if ((PP_ISFREE(pp) == 0) ||
@@ -3187,9 +3158,7 @@ page_get_mnode_anylist(ulong_t origbin, uchar_t szc, uint_t flags,
}
mutex_exit(pcm);
-#ifdef DEBUG
check_dma(dma_attr, pp, 1);
-#endif
VM_STAT_ADD(pga_vmstats.pgma_allocok);
return (pp);
}
@@ -3208,9 +3177,7 @@ nextfreebin:
mmu_btop(dma_attr->dma_attr_addr_hi + 1),
&plw);
if (pp != NULL) {
-#ifdef DEBUG
check_dma(dma_attr, pp, 1);
-#endif
return (pp);
}
}
@@ -3234,8 +3201,7 @@ nextfreebin:
for (i = 0; i <= page_colors; i++) {
if (PAGE_CACHELISTS(mnode, bin, mtype) == NULL)
goto nextcachebin;
- pcm = PC_BIN_MUTEX(PFLT_USER, mnode, bin,
- PG_CACHE_LIST);
+ pcm = PC_BIN_MUTEX(mnode, bin, PG_CACHE_LIST);
mutex_enter(pcm);
pp = PAGE_CACHELISTS(mnode, bin, mtype);
first_pp = pp;
@@ -3279,9 +3245,7 @@ nextfreebin:
mutex_exit(pcm);
ASSERT(pp->p_vnode);
ASSERT(PP_ISAGED(pp) == 0);
-#ifdef DEBUG
check_dma(dma_attr, pp, 1);
-#endif
VM_STAT_ADD(pga_vmstats.pgma_allocok);
return (pp);
}
@@ -3337,7 +3301,7 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
lgrp = lgrp_home_lgrp();
/* LINTED */
- AS_2_BIN(PFLT_USER, as, seg, vp, vaddr, bin, 0);
+ AS_2_BIN(as, seg, vp, vaddr, bin, 0);
/*
* Only hold one freelist or cachelist lock at a time, that way we
@@ -3391,7 +3355,7 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
mtype = m;
do {
if (fullrange != 0) {
- pp = page_get_mnode_freelist(ufltp, mnode,
+ pp = page_get_mnode_freelist(mnode,
bin, mtype, szc, flags);
if (pp == NULL) {
pp = page_get_mnode_cachelist(
@@ -3403,9 +3367,7 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
}
if (pp != NULL) {
VM_STAT_ADD(pga_vmstats.pga_allocok);
-#ifdef DEBUG
check_dma(dma_attr, pp, 1);
-#endif
return (pp);
}
} while (mtype != n &&
@@ -3517,9 +3479,7 @@ page_create_io(
} while (pp != plist);
if (!npages) {
-#ifdef DEBUG
check_dma(mattr, plist, pages_req);
-#endif
return (plist);
} else {
vaddr += (pages_req - npages) << MMU_PAGESHIFT;
@@ -3682,9 +3642,7 @@ top:
vaddr += MMU_PAGESIZE;
}
-#ifdef DEBUG
check_dma(mattr, plist, pages_req);
-#endif
return (plist);
fail:
@@ -4026,33 +3984,3 @@ page_get_physical(uintptr_t seed)
}
return (pp);
}
-
-/*
- * Initializes the user and kernel page freelist type structures.
- */
-/* ARGSUSED */
-void
-page_flt_init(page_freelist_type_t *ufp, page_freelist_type_t *kfp)
-{
- ufp->pflt_type = PFLT_USER;
- ufp->pflt_get_free = &page_get_uflt;
- ufp->pflt_walk_init = page_list_walk_init;
- ufp->pflt_walk_next = page_list_walk_next_bin;
- ufp->pflt_policy[0] = page_get_mnode_freelist;
- ufp->pflt_policy[1] = page_get_contig_pages;
- ufp->pflt_num_policies = 2;
-#if defined(__amd64) && !defined(__xpv)
- if (!kflt_disable) {
- ufp->pflt_num_policies = 3;
- ufp->pflt_policy[1] = page_user_alloc_kflt;
- ufp->pflt_policy[2] = page_get_contig_pages;
-
- kfp->pflt_type = PFLT_KMEM;
- kfp->pflt_get_free = &page_get_kflt;
- kfp->pflt_walk_init = page_kflt_walk_init;
- kfp->pflt_walk_next = page_list_walk_next_bin;
- kfp->pflt_num_policies = 1;
- kfp->pflt_policy[0] = page_get_mnode_freelist;
- }
-#endif /* __amd64 && !__xpv */
-}
diff --git a/usr/src/uts/i86xpv/Makefile.files b/usr/src/uts/i86xpv/Makefile.files
index d2f59e1de9..e9501d7684 100644
--- a/usr/src/uts/i86xpv/Makefile.files
+++ b/usr/src/uts/i86xpv/Makefile.files
@@ -20,7 +20,8 @@
#
#
-# Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
#
# This Makefile defines file modules in the directory uts/i86xpv
@@ -74,7 +75,6 @@ CORE_OBJS += \
mach_sysconfig.o \
machdep.o \
mem_config_stubs.o \
- kflt_mem_stubs.o \
memnode.o \
microcode.o \
mlsetup.o \
diff --git a/usr/src/uts/sun4/vm/vm_dep.c b/usr/src/uts/sun4/vm/vm_dep.c
index 4cdb8a8e01..baa8271ca9 100644
--- a/usr/src/uts/sun4/vm/vm_dep.c
+++ b/usr/src/uts/sun4/vm/vm_dep.c
@@ -19,7 +19,8 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
*/
/*
@@ -91,8 +92,6 @@ plcnt_t plcnt; /* page list count */
caddr_t errata57_limit;
#endif
-static void page_flt_init(page_freelist_type_t *);
-
extern void page_relocate_hash(page_t *, page_t *);
/*
@@ -725,6 +724,41 @@ map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, int type,
}
/*
+ * Anchored in the table below are counters used to keep track
+ * of free contiguous physical memory. Each element of the table contains
+ * the array of counters, the size of array which is allocated during
+ * startup based on physmax and a shift value used to convert a pagenum
+ * into a counter array index or vice versa. The table has page size
+ * for rows and region size for columns:
+ *
+ * page_counters[page_size][region_size]
+ *
+ * page_size: TTE size code of pages on page_size freelist.
+ *
+ * region_size: TTE size code of a candidate larger page made up
+ * made up of contiguous free page_size pages.
+ *
+ * As you go across a page_size row increasing region_size each
+ * element keeps track of how many (region_size - 1) size groups
+ * made up of page_size free pages can be coalesced into a
+ * regsion_size page. Yuck! Lets try an example:
+ *
+ * page_counters[1][3] is the table element used for identifying
+ * candidate 4M pages from contiguous pages off the 64K free list.
+ * Each index in the page_counters[1][3].array spans 4M. Its the
+ * number of free 512K size (regsion_size - 1) groups of contiguous
+ * 64K free pages. So when page_counters[1][3].counters[n] == 8
+ * we know we have a candidate 4M page made up of 512K size groups
+ * of 64K free pages.
+ */
+
+/*
+ * Per page size free lists. 3rd (max_mem_nodes) and 4th (page coloring bins)
+ * dimensions are allocated dynamically.
+ */
+page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES];
+
+/*
* For now there is only a single size cache list.
* Allocated dynamically.
*/
@@ -787,11 +821,10 @@ alloc_page_freelists(caddr_t alloc_base)
for (szc = 0; szc < mmu_page_sizes; szc++) {
clrs = page_get_pagecolors(szc);
for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) {
- ufltp->pflt_freelists[szc][mtype] =
- (page_t ***)alloc_base;
+ page_freelists[szc][mtype] = (page_t ***)alloc_base;
alloc_base += (max_mem_nodes * sizeof (page_t **));
for (mnode = 0; mnode < max_mem_nodes; mnode++) {
- ufltp->pflt_freelists[szc][mtype][mnode] =
+ page_freelists[szc][mtype][mnode] =
(page_t **)alloc_base;
alloc_base += (clrs * (sizeof (page_t *)));
}
@@ -799,8 +832,6 @@ alloc_page_freelists(caddr_t alloc_base)
}
alloc_base = page_ctrs_alloc(alloc_base);
-
- page_flt_init(ufltp);
return (alloc_base);
}
@@ -1045,17 +1076,3 @@ get_segkmem_lpsize(size_t lpsize)
}
return (PAGESIZE);
}
-/*
- * Initializes the user page freelist type structures.
- */
-static void
-page_flt_init(page_freelist_type_t *ufp)
-{
- ufp->pflt_type = PFLT_USER;
- ufp->pflt_get_free = &page_get_uflt;
- ufp->pflt_walk_init = page_list_walk_init;
- ufp->pflt_walk_next = page_list_walk_next_bin;
- ufp->pflt_num_policies = 2;
- ufp->pflt_policy[0] = page_get_mnode_freelist;
- ufp->pflt_policy[1] = page_get_contig_pages;
-}
diff --git a/usr/src/uts/sun4/vm/vm_dep.h b/usr/src/uts/sun4/vm/vm_dep.h
index d11a1bdae7..6750585419 100644
--- a/usr/src/uts/sun4/vm/vm_dep.h
+++ b/usr/src/uts/sun4/vm/vm_dep.h
@@ -19,7 +19,8 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
*/
/*
@@ -84,8 +85,12 @@ extern "C" {
#define PGI_MT_RANGE 0x1000000 /* mtype range */
#define PGI_MT_NEXT 0x2000000 /* get next mtype */
+extern page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES];
extern page_t ***page_cachelists[MAX_MEM_TYPES];
+#define PAGE_FREELISTS(mnode, szc, color, mtype) \
+ (*(page_freelists[szc][mtype][mnode] + (color)))
+
#define PAGE_CACHELISTS(mnode, color, mtype) \
(*(page_cachelists[mtype][mnode] + (color)))
@@ -286,8 +291,8 @@ extern uint_t page_pfn_2_color_cpu(pfn_t, uchar_t, void *);
#define PP_2_MEM_NODE(pp) (PFN_2_MEM_NODE(pp->p_pagenum))
-#define PC_BIN_MUTEX(iskflt, mnode, bin, flags) ((flags & PG_FREE_LIST) ? \
- &fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] : \
+#define PC_BIN_MUTEX(mnode, bin, flags) ((flags & PG_FREE_LIST) ? \
+ &fpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode] : \
&cpc_mutex[(bin) & (NPC_MUTEX - 1)][mnode])
#define FPC_MUTEX(mnode, i) (&fpc_mutex[i][mnode])
@@ -319,88 +324,6 @@ typedef struct page_list_walker {
void page_list_walk_init(uchar_t szc, uint_t flags, uint_t bin,
int can_split, int use_ceq, page_list_walker_t *plw);
-/*
- * Page freelists have a single freelist type, the user page freelist. The
- * kernel page freelist is disabled on SPARC platforms. The definitions related
- * to the freelist type structure are grouped below.
- */
-
-#define MAX_PFLT_POLICIES 3
-#define MAX_PFLT_TYPE 2
-enum freelist_types {PFLT_USER, PFLT_KMEM};
-
-/*
- * The kernel only needs a small number of page colors, far fewer than user
- * programs.
- */
-#define KFLT_PAGE_COLORS 16
- /* flag used by the kflt_export function when calling page_promote */
-#define PC_KFLT_EXPORT 0x4
-#define PC_ISKFLT(fltp) (fltp->pflt_type == PFLT_KMEM)
-
-typedef struct page_freelist_type page_freelist_type_t;
-extern page_freelist_type_t flt_user;
-extern page_freelist_type_t *ufltp;
-
-typedef page_t *(*pflt_get_func_p) (struct vnode *, u_offset_t, struct seg *,
- caddr_t, size_t, uint_t, lgrp_t *);
-typedef page_t *(*pflt_policy_func_p)(page_freelist_type_t *, int, uint_t, int,
- uchar_t, uint_t);
-typedef void (*pflt_list_walk_init_func_p)(uchar_t, uint_t, uint_t, int, int,
- page_list_walker_t *);
-typedef uint_t (*pflt_list_walk_next_func_p)(uchar_t, uint_t,
- page_list_walker_t *);
-
-page_t *page_get_uflt(struct vnode *, u_offset_t, struct seg *, caddr_t,
- size_t, uint_t, struct lgrp *);
-extern page_t *page_get_mnode_freelist(page_freelist_type_t *, int, uint_t,
- int, uchar_t, uint_t);
-extern page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int);
-extern page_t *page_get_contig_pages(page_freelist_type_t *, int, uint_t, int,
- uchar_t, uint_t);
-extern void page_list_walk_init(uchar_t, uint_t, uint_t, int, int,
- page_list_walker_t *);
-extern uint_t page_list_walk_next_bin(uchar_t, uint_t, page_list_walker_t *);
-
-/*
- * Page freelists are organized as freelist types, on Sparc systems there
- * is only a single user freelist type as the kernel cage provides a
- * similar function to kernel freelist in that it prevents memory
- * fragmentation.
- *
- * The page freelists have fixed page size and memory type dimensions.
- * the 3rd (max_mem_nodes) and 4th (page coloring bins) dimensions are
- * allocated dynamically.
- */
-struct page_freelist_type {
- int pflt_type;
- pflt_get_func_p pflt_get_free;
- pflt_list_walk_init_func_p pflt_walk_init;
- pflt_list_walk_next_func_p pflt_walk_next;
- int pflt_num_policies;
- pflt_policy_func_p pflt_policy[MAX_PFLT_POLICIES];
- page_t ***pflt_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES];
-};
-
-#define PAGE_FREELISTP(is_kflt, mnode, szc, color, mtype) \
- ((ufltp->pflt_freelists[szc][mtype][mnode] + (color)))
-
-#define PAGE_FREELISTS(is_kflt, mnode, szc, color, mtype) \
- (*(ufltp->pflt_freelists[szc][mtype][mnode] + (color)))
-
-#define PAGE_GET_FREELISTS(pp, vp, off, seg, vaddr, size, flags, lgrp) \
- pp = ufltp->pflt_get_free(vp, off, seg, vaddr, size, \
- flags, lgrp);
-
-#define PAGE_GET_FREELISTS_POLICY(fp, i) \
- (fp->pflt_policy[i])
-
-#define PAGE_LIST_WALK_INIT(fp, szc, flags, bin, can_split, use_ceq, plw) \
- fp->pflt_walk_init(szc, flags, bin, can_split, use_ceq, plw)
-
-#define PAGE_LIST_WALK_NEXT(fp, szc, bin, plw) \
- fp->pflt_walk_next(szc, bin, plw)
-
typedef char hpmctr_t;
#ifdef DEBUG
@@ -700,7 +623,7 @@ extern pgcnt_t shm_lpg_min_physmem;
* 1 virtual=paddr
* 2 bin hopping
*/
-#define AS_2_BIN(kflt, as, seg, vp, addr, bin, szc) \
+#define AS_2_BIN(as, seg, vp, addr, bin, szc) \
switch (consistent_coloring) { \
default: \
cmn_err(CE_WARN, \
@@ -826,26 +749,16 @@ extern char vm_cpu_data0[];
#ifdef VM_STATS
struct vmm_vmstats_str {
- /* page_get_uflt and page_get_kflt */
- ulong_t pgf_alloc[MMU_PAGE_SIZES][MAX_PFLT_TYPE];
- ulong_t pgf_allocok[MMU_PAGE_SIZES][MAX_PFLT_TYPE];
- ulong_t pgf_allocokrem[MMU_PAGE_SIZES][MAX_PFLT_TYPE];
- ulong_t pgf_allocfailed[MMU_PAGE_SIZES][MAX_PFLT_TYPE];
- ulong_t pgf_allocdeferred;
- ulong_t pgf_allocretry[MMU_PAGE_SIZES][MAX_PFLT_TYPE];
- ulong_t pgik_allocok; /* page_import_kflt */
- ulong_t pgik_allocfailed;
- ulong_t pgkx_allocok; /* kflt_expand */
- ulong_t pgkx_allocfailed;
- ulong_t puak_allocok; /* page_user_alloc_kflt */
- ulong_t puak_allocfailed;
- ulong_t pgexportok; /* kflt_export */
- ulong_t pgexportfail;
- ulong_t pgkflt_disable; /* kflt_user_evict */
+ ulong_t pgf_alloc[MMU_PAGE_SIZES]; /* page_get_freelist */
+ ulong_t pgf_allocok[MMU_PAGE_SIZES];
+ ulong_t pgf_allocokrem[MMU_PAGE_SIZES];
+ ulong_t pgf_allocfailed[MMU_PAGE_SIZES];
+ ulong_t pgf_allocdeferred;
+ ulong_t pgf_allocretry[MMU_PAGE_SIZES];
ulong_t pgc_alloc; /* page_get_cachelist */
ulong_t pgc_allocok;
ulong_t pgc_allocokrem;
- ulong_t pgc_allocokdeferred;
+ ulong_t pgc_allocokdeferred;
ulong_t pgc_allocfailed;
ulong_t pgcp_alloc[MMU_PAGE_SIZES]; /* page_get_contig_pages */
ulong_t pgcp_allocfailed[MMU_PAGE_SIZES];
@@ -856,7 +769,6 @@ struct vmm_vmstats_str {
ulong_t ptcpfailexcl[MMU_PAGE_SIZES];
ulong_t ptcpfailszc[MMU_PAGE_SIZES];
ulong_t ptcpfailcage[MMU_PAGE_SIZES];
- ulong_t ptcpfailkflt[MMU_PAGE_SIZES];
ulong_t ptcpok[MMU_PAGE_SIZES];
ulong_t pgmf_alloc[MMU_PAGE_SIZES]; /* page_get_mnode_freelist */
ulong_t pgmf_allocfailed[MMU_PAGE_SIZES];
@@ -875,24 +787,24 @@ struct vmm_vmstats_str {
ulong_t pfs_req[MMU_PAGE_SIZES]; /* page_freelist_split */
ulong_t pfs_demote[MMU_PAGE_SIZES];
ulong_t pfc_coalok[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
- ulong_t ppr_reloc[MMU_PAGE_SIZES]; /* page_relocate */
+ ulong_t ppr_reloc[MMU_PAGE_SIZES]; /* page_relocate */
+ ulong_t ppr_relocok[MMU_PAGE_SIZES];
ulong_t ppr_relocnoroot[MMU_PAGE_SIZES];
ulong_t ppr_reloc_replnoroot[MMU_PAGE_SIZES];
ulong_t ppr_relocnolock[MMU_PAGE_SIZES];
ulong_t ppr_relocnomem[MMU_PAGE_SIZES];
- ulong_t ppr_relocok[MMU_PAGE_SIZES];
ulong_t ppr_krelocfail[MMU_PAGE_SIZES];
ulong_t ppr_copyfail;
/* page coalesce counter */
- ulong_t page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
+ ulong_t page_ctrs_coalesce[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
/* candidates useful */
- ulong_t page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
+ ulong_t page_ctrs_cands_skip[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
/* ctrs changed after locking */
- ulong_t page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
+ ulong_t page_ctrs_changed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
/* page_freelist_coalesce failed */
- ulong_t page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
- ulong_t page_ctrs_coalesce_all; /* page coalesce all counter */
- ulong_t page_ctrs_cands_skip_all; /* candidates useful for all func */
+ ulong_t page_ctrs_failed[MMU_PAGE_SIZES][MAX_MNODE_MRANGES];
+ ulong_t page_ctrs_coalesce_all; /* page coalesce all counter */
+ ulong_t page_ctrs_cands_skip_all; /* candidates useful for all func */
};
extern struct vmm_vmstats_str vmm_vmstats;
#endif /* VM_STATS */
diff --git a/usr/src/uts/sun4u/Makefile.files b/usr/src/uts/sun4u/Makefile.files
index e2ec108950..0bbe2dd930 100644
--- a/usr/src/uts/sun4u/Makefile.files
+++ b/usr/src/uts/sun4u/Makefile.files
@@ -20,7 +20,8 @@
#
#
-# Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
#
# This Makefile defines all file modules for the directory uts/sun4u
# and it's children. These are the source files which are sun4u
@@ -56,7 +57,6 @@ CORE_OBJS += mach_trap.o
CORE_OBJS += mach_vm_dep.o
CORE_OBJS += mach_xc.o
CORE_OBJS += mem_cage.o
-CORE_OBJS += kflt_mem_stubs.o
CORE_OBJS += mem_config.o
CORE_OBJS += memlist_new.o
CORE_OBJS += memscrub.o
diff --git a/usr/src/uts/sun4u/starfire/os/starfire.c b/usr/src/uts/sun4u/starfire/os/starfire.c
index 11efe65fd0..2dd0688f4a 100644
--- a/usr/src/uts/sun4u/starfire/os/starfire.c
+++ b/usr/src/uts/sun4u/starfire/os/starfire.c
@@ -20,7 +20,8 @@
*/
/*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
*/
#include <sys/param.h>
@@ -294,15 +295,14 @@ plat_freelist_process(int mnode)
}
/* find freelist */
- freelist = &PAGE_FREELISTS(PLT_USER, mnode,
- size, color, mtype);
+ freelist = &PAGE_FREELISTS(mnode, size,
+ color, mtype);
if (*freelist == NULL)
continue;
/* acquire locks */
- pcm = PC_BIN_MUTEX(PLT_USER, mnode, color,
- PG_FREE_LIST);
+ pcm = PC_BIN_MUTEX(mnode, color, PG_FREE_LIST);
mutex_enter(pcm);
/*
diff --git a/usr/src/uts/sun4v/Makefile.files b/usr/src/uts/sun4v/Makefile.files
index 51cb8adc90..d8b201668f 100644
--- a/usr/src/uts/sun4v/Makefile.files
+++ b/usr/src/uts/sun4v/Makefile.files
@@ -20,7 +20,8 @@
#
#
-# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
#
# This Makefile defines all file modules for the directory uts/sun4v
# and it's children. These are the source files which are sun4v
@@ -58,7 +59,6 @@ CORE_OBJS += mach_trap.o
CORE_OBJS += mach_vm_dep.o
CORE_OBJS += mach_xc.o
CORE_OBJS += mem_cage.o
-CORE_OBJS += kflt_mem_stubs.o
CORE_OBJS += mem_config.o
CORE_OBJS += memlist_new.o
CORE_OBJS += memseg.o