summaryrefslogtreecommitdiff
path: root/usr/src/uts/i86pc/vm/htable.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/i86pc/vm/htable.c')
-rw-r--r--usr/src/uts/i86pc/vm/htable.c1066
1 files changed, 453 insertions, 613 deletions
diff --git a/usr/src/uts/i86pc/vm/htable.c b/usr/src/uts/i86pc/vm/htable.c
index 3105ad9e27..bd1ac11630 100644
--- a/usr/src/uts/i86pc/vm/htable.c
+++ b/usr/src/uts/i86pc/vm/htable.c
@@ -18,8 +18,9 @@
*
* CDDL HEADER END
*/
+
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -45,6 +46,7 @@
#include <sys/promif.h>
#include <sys/var.h>
#include <sys/x86_archext.h>
+#include <sys/archsystm.h>
#include <sys/bootconf.h>
#include <sys/dumphdr.h>
#include <vm/seg_kmem.h>
@@ -53,8 +55,12 @@
#include <vm/hat_i86.h>
#include <sys/cmn_err.h>
+#include <sys/bootinfo.h>
+#include <vm/kboot_mmu.h>
+
+static void x86pte_zero(htable_t *dest, uint_t entry, uint_t count);
+
kmem_cache_t *htable_cache;
-extern cpuset_t khat_cpuset;
/*
* The variable htable_reserve_amount, rather than HTABLE_RESERVE_AMOUNT,
@@ -98,18 +104,12 @@ kmutex_t htable_mutex[NUM_HTABLE_MUTEX];
static void link_ptp(htable_t *higher, htable_t *new, uintptr_t vaddr);
static void unlink_ptp(htable_t *higher, htable_t *old, uintptr_t vaddr);
static void htable_free(htable_t *ht);
-static x86pte_t *x86pte_access_pagetable(htable_t *ht);
+static x86pte_t *x86pte_access_pagetable(htable_t *ht, uint_t index);
static void x86pte_release_pagetable(htable_t *ht);
static x86pte_t x86pte_cas(htable_t *ht, uint_t entry, x86pte_t old,
x86pte_t new);
/*
- * Address used for kernel page tables. See ptable_alloc() below.
- */
-uintptr_t ptable_va = 0;
-size_t ptable_sz = 2 * MMU_PAGESIZE;
-
-/*
* A counter to track if we are stealing or reaping htables. When non-zero
* htable_free() will directly free htables (either to the reserve or kmem)
* instead of putting them in a hat's htable cache.
@@ -124,142 +124,54 @@ static uint32_t active_ptables = 0;
/*
* Allocate a memory page for a hardware page table.
*
- * The pages allocated for page tables are currently gotten in a hacked up
- * way. It works for now, but really needs to be fixed up a bit.
- *
- * During boot: The boot loader controls physical memory allocation via
- * boot_alloc(). To avoid conflict with vmem, we just do boot_alloc()s with
- * addresses less than kernelbase. These addresses are ignored when we take
- * over mappings from the boot loader.
- *
- * Post-boot: we currently use page_create_va() on the kvp with fake offsets,
- * segments and virt address. This is pretty bogus, but was copied from the
- * old hat_i86.c code. A better approach would be to have a custom
- * page_get_physical() interface that can specify either mnode random or
- * mnode local and takes a page from whatever color has the MOST available -
- * this would have a minimal impact on page coloring.
- *
- * For now the htable pointer in ht is only used to compute a unique vnode
- * offset for the page.
+ * A wrapper around page_get_physical(), with some extra checks.
*/
-static void
-ptable_alloc(htable_t *ht)
+static pfn_t
+ptable_alloc(uintptr_t seed)
{
pfn_t pfn;
page_t *pp;
- u_offset_t offset;
- static struct seg tmpseg;
- static int first_time = 1;
- /*
- * Allocating the associated hardware page table is very different
- * before boot has finished. We get a physical page to from boot
- * w/o eating up any kernel address space.
- */
- ht->ht_pfn = PFN_INVALID;
+ pfn = PFN_INVALID;
atomic_add_32(&active_ptables, 1);
- if (use_boot_reserve) {
- ASSERT(ptable_va != 0);
-
- /*
- * Allocate, then demap the ptable_va, so that we're
- * sure there exist page table entries for the addresses
- */
- if (first_time) {
- first_time = 0;
- if ((uintptr_t)BOP_ALLOC(bootops, (caddr_t)ptable_va,
- ptable_sz, BO_NO_ALIGN) != ptable_va)
- panic("BOP_ALLOC failed");
-
- hat_boot_demap(ptable_va);
- hat_boot_demap(ptable_va + MMU_PAGESIZE);
- }
-
- pfn = ((uintptr_t)BOP_EALLOC(bootops, 0, MMU_PAGESIZE,
- BO_NO_ALIGN, BOPF_X86_ALLOC_PHYS)) >> MMU_PAGESHIFT;
- if (page_resv(1, KM_NOSLEEP) == 0)
- panic("page_resv() failed in ptable alloc");
-
- pp = page_numtopp_nolock(pfn);
- ASSERT(pp != NULL);
- if (pp->p_szc != 0)
- page_boot_demote(pp);
- pp = page_numtopp(pfn, SE_EXCL);
- ASSERT(pp != NULL);
-
- } else {
- /*
- * Post boot get a page for the table.
- *
- * The first check is to see if there is memory in
- * the system. If we drop to throttlefree, then fail
- * the ptable_alloc() and let the stealing code kick in.
- * Note that we have to do this test here, since the test in
- * page_create_throttle() would let the NOSLEEP allocation
- * go through and deplete the page reserves.
- *
- * The !NOMEMWAIT() lets pageout, fsflush, etc. skip this check.
- */
- if (!NOMEMWAIT() && freemem <= throttlefree + 1)
- return;
+ /*
+ * The first check is to see if there is memory in the system. If we
+ * drop to throttlefree, then fail the ptable_alloc() and let the
+ * stealing code kick in. Note that we have to do this test here,
+ * since the test in page_create_throttle() would let the NOSLEEP
+ * allocation go through and deplete the page reserves.
+ *
+ * The !NOMEMWAIT() lets pageout, fsflush, etc. skip this check.
+ */
+ if (!NOMEMWAIT() && freemem <= throttlefree + 1)
+ return (PFN_INVALID);
#ifdef DEBUG
- /*
- * This code makes htable_ steal() easier to test. By setting
- * force_steal we force pagetable allocations to fall
- * into the stealing code. Roughly 1 in ever "force_steal"
- * page table allocations will fail.
- */
- if (ht->ht_hat != kas.a_hat && force_steal > 1 &&
- ++ptable_cnt > force_steal) {
- ptable_cnt = 0;
- return;
- }
+ /*
+ * This code makes htable_steal() easier to test. By setting
+ * force_steal we force pagetable allocations to fall
+ * into the stealing code. Roughly 1 in ever "force_steal"
+ * page table allocations will fail.
+ */
+ if (proc_pageout != NULL && force_steal > 1 &&
+ ++ptable_cnt > force_steal) {
+ ptable_cnt = 0;
+ return (PFN_INVALID);
+ }
#endif /* DEBUG */
- /*
- * This code is temporary, so don't review too critically.
- * I'm awaiting a new phys page allocator from Kit -- Joe
- *
- * We need assign an offset for the page to call
- * page_create_va. To avoid conflicts with other pages,
- * we get creative with the offset.
- * for 32 bits, we pic an offset > 4Gig
- * for 64 bits, pic an offset somewhere in the VA hole.
- */
- offset = (uintptr_t)ht - kernelbase;
- offset <<= MMU_PAGESHIFT;
-#if defined(__amd64)
- offset += mmu.hole_start; /* something in VA hole */
-#else
- offset += 1ULL << 40; /* something > 4 Gig */
-#endif
-
- if (page_resv(1, KM_NOSLEEP) == 0)
- return;
-
-#ifdef DEBUG
- pp = page_exists(&kvp, offset);
- if (pp != NULL)
- panic("ptable already exists %p", pp);
-#endif
- pp = page_create_va(&kvp, offset, MMU_PAGESIZE,
- PG_EXCL | PG_NORELOC, &tmpseg,
- (void *)((uintptr_t)ht << MMU_PAGESHIFT));
- if (pp == NULL)
- return;
- page_io_unlock(pp);
- page_hashout(pp, NULL);
- pfn = pp->p_pagenum;
- }
+ pp = page_get_physical(seed);
+ if (pp == NULL)
+ return (PFN_INVALID);
+ pfn = pp->p_pagenum;
page_downgrade(pp);
ASSERT(PAGE_SHARED(pp));
if (pfn == PFN_INVALID)
panic("ptable_alloc(): Invalid PFN!!");
- ht->ht_pfn = pfn;
HATSTAT_INC(hs_ptable_allocs);
+ return (pfn);
}
/*
@@ -267,10 +179,9 @@ ptable_alloc(htable_t *ht)
* for ptable_alloc().
*/
static void
-ptable_free(htable_t *ht)
+ptable_free(pfn_t pfn)
{
- pfn_t pfn = ht->ht_pfn;
- page_t *pp;
+ page_t *pp = page_numtopp_nolock(pfn);
/*
* need to destroy the page used for the pagetable
@@ -278,7 +189,6 @@ ptable_free(htable_t *ht)
ASSERT(pfn != PFN_INVALID);
HATSTAT_INC(hs_ptable_frees);
atomic_add_32(&active_ptables, -1);
- pp = page_numtopp_nolock(pfn);
if (pp == NULL)
panic("ptable_free(): no page for pfn!");
ASSERT(PAGE_SHARED(pp));
@@ -299,7 +209,6 @@ ptable_free(htable_t *ht)
}
page_free(pp, 1);
page_unresv(1);
- ht->ht_pfn = PFN_INVALID;
}
/*
@@ -340,14 +249,12 @@ htable_get_reserve(void)
}
/*
- * Allocate initial htables with page tables and put them on the kernel hat's
- * cache list.
+ * Allocate initial htables and put them on the reserve list
*/
void
htable_initial_reserve(uint_t count)
{
htable_t *ht;
- hat_t *hat = kas.a_hat;
count += HTABLE_RESERVE_AMOUNT;
while (count > 0) {
@@ -355,51 +262,23 @@ htable_initial_reserve(uint_t count)
ASSERT(ht != NULL);
ASSERT(use_boot_reserve);
- ht->ht_hat = kas.a_hat; /* so htable_free() works */
- ht->ht_flags = 0; /* so x86pte_zero works */
- ptable_alloc(ht);
- if (ht->ht_pfn == PFN_INVALID)
- panic("ptable_alloc() failed");
-
- x86pte_zero(ht, 0, mmu.ptes_per_table);
-
- ht->ht_next = hat->hat_ht_cached;
- hat->hat_ht_cached = ht;
+ ht->ht_pfn = PFN_INVALID;
+ htable_put_reserve(ht);
--count;
}
}
/*
* Readjust the reserves after a thread finishes using them.
- *
- * The first time this is called post boot, we'll also clear out the
- * extra boot htables that were put in the kernel hat's cache list.
*/
void
htable_adjust_reserve()
{
- static int first_time = 1;
htable_t *ht;
ASSERT(curthread != hat_reserves_thread);
/*
- * The first time this is called after we can steal, we free up the
- * the kernel's cache htable list. It has lots of extra htable/page
- * tables that were allocated for boot up.
- */
- if (first_time) {
- first_time = 0;
- while ((ht = kas.a_hat->hat_ht_cached) != NULL) {
- kas.a_hat->hat_ht_cached = ht->ht_next;
- ASSERT(ht->ht_hat == kas.a_hat);
- ptable_free(ht);
- htable_put_reserve(ht);
- }
- return;
- }
-
- /*
* Free any excess htables in the reserve list
*/
while (htable_reserve_cnt > htable_reserve_amount) {
@@ -586,7 +465,7 @@ htable_steal(uint_t cnt)
* - unload and invalidate all PTEs
*/
for (e = 0, va = ht->ht_vaddr;
- e < ht->ht_num_ptes &&
+ e < HTABLE_NUM_PTES(ht) &&
ht->ht_valid_cnt > 0 &&
ht->ht_busy == 1 &&
ht->ht_lock_cnt == 0;
@@ -637,7 +516,7 @@ htable_steal(uint_t cnt)
/*
* Break to outer loop to release the
- * higher (ht_parent) pagtable. This
+ * higher (ht_parent) pagetable. This
* spreads out the pain caused by
* pagefaults.
*/
@@ -699,7 +578,7 @@ htable_reap(void *handle)
}
/*
- * allocate an htable, stealing one or using the reserve if necessary
+ * Allocate an htable, stealing one or using the reserve if necessary
*/
static htable_t *
htable_alloc(
@@ -723,8 +602,7 @@ htable_alloc(
/*
* First reuse a cached htable from the hat_ht_cached field, this
- * avoids unnecessary trips through kmem/page allocators. This is also
- * what happens during use_boot_reserve.
+ * avoids unnecessary trips through kmem/page allocators.
*/
if (hat->hat_ht_cached != NULL && !is_bare) {
hat_enter(hat);
@@ -739,15 +617,12 @@ htable_alloc(
}
if (ht == NULL) {
- ASSERT(!use_boot_reserve);
/*
* When allocating for hat_memload_arena, we use the reserve.
* Also use reserves if we are in a panic().
*/
- if (curthread == hat_reserves_thread || panicstr != NULL) {
- ASSERT(panicstr != NULL || !is_bare);
- ASSERT(panicstr != NULL ||
- curthread == hat_reserves_thread);
+ if (use_boot_reserve || curthread == hat_reserves_thread ||
+ panicstr != NULL) {
ht = htable_get_reserve();
} else {
/*
@@ -772,7 +647,7 @@ htable_alloc(
*/
if (ht != NULL && !is_bare) {
ht->ht_hat = hat;
- ptable_alloc(ht);
+ ht->ht_pfn = ptable_alloc((uintptr_t)ht);
if (ht->ht_pfn == PFN_INVALID) {
kmem_cache_free(htable_cache, ht);
ht = NULL;
@@ -796,8 +671,12 @@ htable_alloc(
/*
* If we stole for a bare htable, release the pagetable page.
*/
- if (ht != NULL && is_bare)
- ptable_free(ht);
+ if (ht != NULL) {
+ if (is_bare) {
+ ptable_free(ht->ht_pfn);
+ ht->ht_pfn = PFN_INVALID;
+ }
+ }
}
/*
@@ -833,13 +712,8 @@ htable_alloc(
*/
if (is_vlp) {
ht->ht_flags |= HTABLE_VLP;
- ht->ht_num_ptes = VLP_NUM_PTES;
ASSERT(ht->ht_pfn == PFN_INVALID);
need_to_zero = 0;
- } else if (level == mmu.max_level) {
- ht->ht_num_ptes = mmu.top_level_count;
- } else {
- ht->ht_num_ptes = mmu.ptes_per_table;
}
/*
@@ -858,6 +732,7 @@ htable_alloc(
*/
if (need_to_zero)
x86pte_zero(ht, 0, mmu.ptes_per_table);
+
return (ht);
}
@@ -890,14 +765,14 @@ htable_free(htable_t *ht)
/*
* If we have a hardware page table, free it.
- * We don't free page tables that are accessed by sharing someone else.
+ * We don't free page tables that are accessed by sharing.
*/
if (ht->ht_flags & HTABLE_SHARED_PFN) {
ASSERT(ht->ht_pfn != PFN_INVALID);
- ht->ht_pfn = PFN_INVALID;
} else if (!(ht->ht_flags & HTABLE_VLP)) {
- ptable_free(ht);
+ ptable_free(ht->ht_pfn);
}
+ ht->ht_pfn = PFN_INVALID;
/*
* If we are the thread using the reserves, put free htables
@@ -1015,12 +890,9 @@ link_ptp(htable_t *higher, htable_t *new, uintptr_t vaddr)
}
/*
- * Release of an htable.
- *
- * During process exit, some empty page tables are not unlinked - hat_free_end()
- * cleans them up. Upper level pagetable (mmu.max_page_level and higher) are
- * only released during hat_free_end() or by htable_steal(). We always
- * release SHARED page tables.
+ * Release of hold on an htable. If this is the last use and the pagetable
+ * is empty we may want to free it, then recursively look at the pagetable
+ * above it. The recursion is handled by the outer while() loop.
*/
void
htable_release(htable_t *ht)
@@ -1074,8 +946,8 @@ htable_release(htable_t *ht)
}
/*
- * remember if we destroy an htable that shares its PFN
- * from elsewhere
+ * Remember if we destroy an htable that shares its PFN
+ * from elsewhere.
*/
if (ht->ht_flags & HTABLE_SHARED_PFN) {
ASSERT(ht->ht_level == 0);
@@ -1103,7 +975,7 @@ htable_release(htable_t *ht)
*/
if ((hat->hat_flags & HAT_VLP) &&
level == VLP_LEVEL - 1)
- hat_demap(hat, DEMAP_ALL_ADDR);
+ hat_tlb_inval(hat, DEMAP_ALL_ADDR);
/*
* remove this htable from its hash list
@@ -1303,7 +1175,7 @@ try_again:
if ((hat->hat_flags & HAT_VLP) &&
#endif /* __i386 */
l == VLP_LEVEL - 1)
- hat_demap(hat, DEMAP_ALL_ADDR);
+ hat_tlb_inval(hat, DEMAP_ALL_ADDR);
}
ht->ht_next = hat->hat_ht_hash[h];
ASSERT(ht->ht_prev == NULL);
@@ -1336,6 +1208,96 @@ try_again:
}
/*
+ * Inherit initial pagetables from the boot program.
+ */
+void
+htable_attach(
+ hat_t *hat,
+ uintptr_t base,
+ level_t level,
+ htable_t *parent,
+ pfn_t pfn)
+{
+ htable_t *ht;
+ uint_t h;
+ uint_t i;
+ x86pte_t pte;
+ x86pte_t *ptep;
+ page_t *pp;
+ extern page_t *boot_claim_page(pfn_t);
+
+ ht = htable_get_reserve();
+ if (level == mmu.max_level)
+ kas.a_hat->hat_htable = ht;
+ ht->ht_hat = hat;
+ ht->ht_parent = parent;
+ ht->ht_vaddr = base;
+ ht->ht_level = level;
+ ht->ht_busy = 1;
+ ht->ht_next = NULL;
+ ht->ht_prev = NULL;
+ ht->ht_flags = 0;
+ ht->ht_pfn = pfn;
+ ht->ht_lock_cnt = 0;
+ ht->ht_valid_cnt = 0;
+ if (parent != NULL)
+ ++parent->ht_busy;
+
+ h = HTABLE_HASH(hat, base, level);
+ HTABLE_ENTER(h);
+ ht->ht_next = hat->hat_ht_hash[h];
+ ASSERT(ht->ht_prev == NULL);
+ if (hat->hat_ht_hash[h])
+ hat->hat_ht_hash[h]->ht_prev = ht;
+ hat->hat_ht_hash[h] = ht;
+ HTABLE_EXIT(h);
+
+ /*
+ * make sure the page table physical page is not FREE
+ */
+ if (page_resv(1, KM_NOSLEEP) == 0)
+ panic("page_resv() failed in ptable alloc");
+
+ pp = boot_claim_page(pfn);
+ ASSERT(pp != NULL);
+ page_downgrade(pp);
+ /*
+ * Record in the page_t that is a pagetable for segkpm setup.
+ */
+ if (kpm_vbase)
+ pp->p_index = 1;
+
+ /*
+ * Count valid mappings and recursively attach lower level pagetables.
+ */
+ ptep = kbm_remap_window(pfn_to_pa(pfn), 0);
+ for (i = 0; i < HTABLE_NUM_PTES(ht); ++i) {
+ if (mmu.pae_hat)
+ pte = ptep[i];
+ else
+ pte = ((x86pte32_t *)ptep)[i];
+ if (!IN_HYPERVISOR_VA(base) && PTE_ISVALID(pte)) {
+ ++ht->ht_valid_cnt;
+ if (!PTE_ISPAGE(pte, level)) {
+ htable_attach(hat, base, level - 1,
+ ht, PTE2PFN(pte, level));
+ ptep = kbm_remap_window(pfn_to_pa(pfn), 0);
+ }
+ }
+ base += LEVEL_SIZE(level);
+ if (base == mmu.hole_start)
+ base = (mmu.hole_end + MMU_PAGEOFFSET) & MMU_PAGEMASK;
+ }
+
+ /*
+ * As long as all the mappings we had were below kernel base
+ * we can release the htable.
+ */
+ if (base < kernelbase)
+ htable_release(ht);
+}
+
+/*
* Walk through a given htable looking for the first valid entry. This
* routine takes both a starting and ending address. The starting address
* is required to be within the htable provided by the caller, but there is
@@ -1355,8 +1317,8 @@ htable_scan(htable_t *ht, uintptr_t *vap, uintptr_t eaddr)
{
uint_t e;
x86pte_t found_pte = (x86pte_t)0;
- char *pte_ptr;
- char *end_pte_ptr;
+ caddr_t pte_ptr;
+ caddr_t end_pte_ptr;
int l = ht->ht_level;
uintptr_t va = *vap & LEVEL_MASK(l);
size_t pgsize = LEVEL_SIZE(l);
@@ -1373,9 +1335,9 @@ htable_scan(htable_t *ht, uintptr_t *vap, uintptr_t eaddr)
* The following page table scan code knows that the valid
* bit of a PTE is in the lowest byte AND that x86 is little endian!!
*/
- pte_ptr = (char *)x86pte_access_pagetable(ht);
- end_pte_ptr = pte_ptr + (ht->ht_num_ptes << mmu.pte_size_shift);
- pte_ptr += e << mmu.pte_size_shift;
+ pte_ptr = (caddr_t)x86pte_access_pagetable(ht, 0);
+ end_pte_ptr = (caddr_t)PT_INDEX_PTR(pte_ptr, HTABLE_NUM_PTES(ht));
+ pte_ptr = (caddr_t)PT_INDEX_PTR((x86pte_t *)pte_ptr, e);
while (!PTE_ISVALID(*pte_ptr)) {
va += pgsize;
if (va >= eaddr)
@@ -1389,13 +1351,8 @@ htable_scan(htable_t *ht, uintptr_t *vap, uintptr_t eaddr)
/*
* if we found a valid PTE, load the entire PTE
*/
- if (va < eaddr && pte_ptr != end_pte_ptr) {
- if (mmu.pae_hat) {
- ATOMIC_LOAD64((x86pte_t *)pte_ptr, found_pte);
- } else {
- found_pte = *(x86pte32_t *)pte_ptr;
- }
- }
+ if (va < eaddr && pte_ptr != end_pte_ptr)
+ found_pte = GET_PTE((x86pte_t *)pte_ptr);
x86pte_release_pagetable(ht);
#if defined(__amd64)
@@ -1611,7 +1568,7 @@ htable_va2entry(uintptr_t va, htable_t *ht)
ASSERT(va >= ht->ht_vaddr);
ASSERT(va <= HTABLE_LAST_PAGE(ht));
- return ((va >> LEVEL_SHIFT(l)) & (ht->ht_num_ptes - 1));
+ return ((va >> LEVEL_SHIFT(l)) & (HTABLE_NUM_PTES(ht) - 1));
}
/*
@@ -1624,7 +1581,7 @@ htable_e2va(htable_t *ht, uint_t entry)
level_t l = ht->ht_level;
uintptr_t va;
- ASSERT(entry < ht->ht_num_ptes);
+ ASSERT(entry < HTABLE_NUM_PTES(ht));
va = ht->ht_vaddr + ((uintptr_t)entry << LEVEL_SHIFT(l));
/*
@@ -1641,7 +1598,6 @@ htable_e2va(htable_t *ht, uint_t entry)
/*
* The code uses compare and swap instructions to read/write PTE's to
* avoid atomicity problems, since PTEs can be 8 bytes on 32 bit systems.
- * Again this can be optimized on 64 bit systems, since aligned load/store
* will naturally be atomic.
*
* The combination of using kpreempt_disable()/_enable() and the hci_mutex
@@ -1649,69 +1605,44 @@ htable_e2va(htable_t *ht, uint_t entry)
* while it's in use. If an interrupt thread tries to access a PTE, it will
* yield briefly back to the pinned thread which holds the cpu's hci_mutex.
*/
-
-static struct hat_cpu_info init_hci; /* used for cpu 0 */
-
-/*
- * Initialize a CPU private window for mapping page tables.
- * There will be 3 total pages of addressing needed:
- *
- * 1 for r/w access to pagetables
- * 1 for r access when copying pagetables (hat_alloc)
- * 1 that will map the PTEs for the 1st 2, so we can access them quickly
- *
- * We use vmem_xalloc() to get a correct alignment so that only one
- * hat_mempte_setup() is needed.
- */
void
-x86pte_cpu_init(cpu_t *cpu, void *pages)
+x86pte_cpu_init(cpu_t *cpu)
{
struct hat_cpu_info *hci;
- caddr_t va;
- /*
- * We can't use kmem_alloc/vmem_alloc for the 1st CPU, as this is
- * called before we've activated our own HAT
- */
- if (pages != NULL) {
- hci = &init_hci;
- va = pages;
- } else {
- hci = kmem_alloc(sizeof (struct hat_cpu_info), KM_SLEEP);
- va = vmem_xalloc(heap_arena, 3 * MMU_PAGESIZE, MMU_PAGESIZE, 0,
- LEVEL_SIZE(1), NULL, NULL, VM_SLEEP);
- }
+ hci = kmem_zalloc(sizeof (*hci), KM_SLEEP);
mutex_init(&hci->hci_mutex, NULL, MUTEX_DEFAULT, NULL);
+ cpu->cpu_hat_info = hci;
+}
- /*
- * If we are using segkpm, then there is no need for any of the
- * mempte support. We can access the desired memory through a kpm
- * mapping rather than setting up a temporary mempte mapping.
- */
- if (kpm_enable == 0) {
- hci->hci_mapped_pfn = PFN_INVALID;
-
- hci->hci_kernel_pte =
- hat_mempte_kern_setup(va, va + (2 * MMU_PAGESIZE));
- hci->hci_pagetable_va = (void *)va;
- }
+void
+x86pte_cpu_fini(cpu_t *cpu)
+{
+ struct hat_cpu_info *hci = cpu->cpu_hat_info;
- cpu->cpu_hat_info = hci;
+ kmem_free(hci, sizeof (*hci));
+ cpu->cpu_hat_info = NULL;
}
+#ifdef __i386
/*
- * Macro to establish temporary mappings for x86pte_XXX routines.
+ * On 32 bit kernels, loading a 64 bit PTE is a little tricky
*/
-#define X86PTE_REMAP(addr, pte, index, perm, pfn) { \
- x86pte_t t; \
- \
- t = MAKEPTE((pfn), 0) | (perm) | mmu.pt_global | mmu.pt_nx;\
- if (mmu.pae_hat) \
- pte[index] = t; \
- else \
- ((x86pte32_t *)(pte))[index] = t; \
- mmu_tlbflush_entry((caddr_t)(addr)); \
+x86pte_t
+get_pte64(x86pte_t *ptr)
+{
+ volatile uint32_t *p = (uint32_t *)ptr;
+ x86pte_t t;
+
+ ASSERT(mmu.pae_hat != 0);
+ for (;;) {
+ t = p[0];
+ t |= (uint64_t)p[1] << 32;
+ if ((t & 0xffffffff) == p[0])
+ return (t);
+ }
}
+#endif /* __i386 */
/*
* Disable preemption and establish a mapping to the pagetable with the
@@ -1719,47 +1650,65 @@ x86pte_cpu_init(cpu_t *cpu, void *pages)
* pfn as we last used referenced from this CPU.
*/
static x86pte_t *
-x86pte_access_pagetable(htable_t *ht)
+x86pte_access_pagetable(htable_t *ht, uint_t index)
{
- pfn_t pfn;
- struct hat_cpu_info *hci;
-
/*
* VLP pagetables are contained in the hat_t
*/
if (ht->ht_flags & HTABLE_VLP)
- return (ht->ht_hat->hat_vlp_ptes);
+ return (PT_INDEX_PTR(ht->ht_hat->hat_vlp_ptes, index));
+ return (x86pte_mapin(ht->ht_pfn, index, ht));
+}
+
+/*
+ * map the given pfn into the page table window.
+ */
+/*ARGSUSED*/
+x86pte_t *
+x86pte_mapin(pfn_t pfn, uint_t index, htable_t *ht)
+{
+ x86pte_t *pteptr;
+ x86pte_t pte;
+ x86pte_t newpte;
+ int x;
- /*
- * During early boot, use hat_boot_remap() of a page table adddress.
- */
- pfn = ht->ht_pfn;
ASSERT(pfn != PFN_INVALID);
- if (kpm_enable)
- return ((x86pte_t *)hat_kpm_pfn2va(pfn));
if (!khat_running) {
- (void) hat_boot_remap(ptable_va, pfn);
- return ((x86pte_t *)ptable_va);
+ caddr_t va = kbm_remap_window(pfn_to_pa(pfn), 1);
+ return (PT_INDEX_PTR(va, index));
}
/*
- * Normally, disable preemption and grab the CPU's hci_mutex
+ * If kpm is available, use it.
+ */
+ if (kpm_vbase)
+ return (PT_INDEX_PTR(hat_kpm_pfn2va(pfn), index));
+
+ /*
+ * Disable preemption and grab the CPU's hci_mutex
*/
kpreempt_disable();
- hci = CPU->cpu_hat_info;
- ASSERT(hci != NULL);
- mutex_enter(&hci->hci_mutex);
- if (hci->hci_mapped_pfn != pfn) {
- /*
- * The current mapping doesn't already point to this page.
- * Update the CPU specific pagetable mapping to map the pfn.
- */
- X86PTE_REMAP(hci->hci_pagetable_va, hci->hci_kernel_pte, 0,
- PT_WRITABLE, pfn);
- hci->hci_mapped_pfn = pfn;
+ ASSERT(CPU->cpu_hat_info != NULL);
+ mutex_enter(&CPU->cpu_hat_info->hci_mutex);
+ x = PWIN_TABLE(CPU->cpu_id);
+ pteptr = (x86pte_t *)PWIN_PTE_VA(x);
+ if (mmu.pae_hat)
+ pte = *pteptr;
+ else
+ pte = *(x86pte32_t *)pteptr;
+
+ newpte = MAKEPTE(pfn, 0) | mmu.pt_global | mmu.pt_nx;
+ newpte |= PT_WRITABLE;
+
+ if (!PTE_EQUIV(newpte, pte)) {
+ if (mmu.pae_hat)
+ *pteptr = newpte;
+ else
+ *(x86pte32_t *)pteptr = newpte;
+ mmu_tlbflush_entry((caddr_t)(PWIN_VA(x)));
}
- return (hci->hci_pagetable_va);
+ return (PT_INDEX_PTR(PWIN_VA(x), index));
}
/*
@@ -1768,31 +1717,25 @@ x86pte_access_pagetable(htable_t *ht)
static void
x86pte_release_pagetable(htable_t *ht)
{
- struct hat_cpu_info *hci;
-
- if (kpm_enable)
- return;
-
/*
* nothing to do for VLP htables
*/
if (ht->ht_flags & HTABLE_VLP)
return;
- /*
- * During boot-up hat_kern_setup(), erase the boot loader remapping.
- */
- if (!khat_running) {
- hat_boot_demap(ptable_va);
+ x86pte_mapout();
+}
+
+void
+x86pte_mapout(void)
+{
+ if (mmu.pwin_base == NULL || !khat_running)
return;
- }
/*
- * Normal Operation: drop the CPU's hci_mutex and restore preemption
+ * Drop the CPU's hci_mutex and restore preemption.
*/
- hci = CPU->cpu_hat_info;
- ASSERT(hci != NULL);
- mutex_exit(&hci->hci_mutex);
+ mutex_exit(&CPU->cpu_hat_info->hci_mutex);
kpreempt_enable();
}
@@ -1803,362 +1746,267 @@ x86pte_t
x86pte_get(htable_t *ht, uint_t entry)
{
x86pte_t pte;
- x86pte32_t *pte32p;
x86pte_t *ptep;
/*
* Be careful that loading PAE entries in 32 bit kernel is atomic.
*/
- ptep = x86pte_access_pagetable(ht);
- if (mmu.pae_hat) {
- ATOMIC_LOAD64(ptep + entry, pte);
- } else {
- pte32p = (x86pte32_t *)ptep;
- pte = pte32p[entry];
- }
+ ASSERT(entry < mmu.ptes_per_table);
+ ptep = x86pte_access_pagetable(ht, entry);
+ pte = GET_PTE(ptep);
x86pte_release_pagetable(ht);
return (pte);
}
/*
* Atomic unconditional set of a page table entry, it returns the previous
- * value.
+ * value. For pre-existing mappings if the PFN changes, then we don't care
+ * about the old pte's REF / MOD bits. If the PFN remains the same, we leave
+ * the MOD/REF bits unchanged.
+ *
+ * If asked to overwrite a link to a lower page table with a large page
+ * mapping, this routine returns the special value of LPAGE_ERROR. This
+ * allows the upper HAT layers to retry with a smaller mapping size.
*/
x86pte_t
x86pte_set(htable_t *ht, uint_t entry, x86pte_t new, void *ptr)
{
x86pte_t old;
- x86pte_t prev, n;
+ x86pte_t prev;
x86pte_t *ptep;
- x86pte32_t *pte32p;
- x86pte32_t n32, p32;
+ level_t l = ht->ht_level;
+ x86pte_t pfn_mask = (l != 0) ? PT_PADDR_LGPG : PT_PADDR;
+ x86pte_t n;
+ uintptr_t addr = htable_e2va(ht, entry);
+ hat_t *hat = ht->ht_hat;
+ ASSERT(new != 0); /* don't use to invalidate a PTE, see x86pte_update */
ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN));
- if (ptr == NULL) {
- ptep = x86pte_access_pagetable(ht);
- ptep = (void *)((caddr_t)ptep + (entry << mmu.pte_size_shift));
- } else {
+ if (ptr == NULL)
+ ptep = x86pte_access_pagetable(ht, entry);
+ else
ptep = ptr;
- }
- if (mmu.pae_hat) {
- for (;;) {
- prev = *ptep;
- n = new;
- /*
- * prevent potential data loss by preserving the
- * MOD/REF bits if set in the current PTE, the pfns are
- * the same and the 'new' pte is non-zero. For example,
- * segmap can reissue a read-only hat_memload on top
- * of a dirty page.
- *
- * 'new' is required to be non-zero on a remap as at
- * least the valid bit should be non-zero. The 'new'
- * check also avoids incorrectly preserving the REF/MOD
- * bit when unmapping pfn 0.
- */
- if (new != 0 && PTE_ISVALID(prev) &&
- PTE2PFN(prev, ht->ht_level) ==
- PTE2PFN(n, ht->ht_level)) {
- n |= prev & (PT_REF | PT_MOD);
- }
- if (prev == n) {
- old = new;
- break;
- }
- old = cas64(ptep, prev, n);
- if (old == prev)
- break;
- }
- } else {
- pte32p = (x86pte32_t *)ptep;
- for (;;) {
- p32 = *pte32p;
- n32 = new;
- if (new != 0 && PTE_ISVALID(p32) &&
- PTE2PFN(p32, ht->ht_level) ==
- PTE2PFN(n32, ht->ht_level)) {
- n32 |= p32 & (PT_REF | PT_MOD);
- }
- if (p32 == n32) {
- old = new;
- break;
- }
- old = cas32(pte32p, p32, n32);
- if (old == p32)
- break;
+ /*
+ * Install the new PTE. If remapping the same PFN, then
+ * copy existing REF/MOD bits to new mapping.
+ */
+ do {
+ prev = GET_PTE(ptep);
+ n = new;
+ if (PTE_ISVALID(n) && (prev & pfn_mask) == (new & pfn_mask))
+ n |= prev & (PT_REF | PT_MOD);
+
+ /*
+ * Another thread may have installed this mapping already,
+ * flush the local TLB and be done.
+ */
+ if (prev == n) {
+ old = new;
+ mmu_tlbflush_entry((caddr_t)addr);
+ goto done;
}
- }
+
+ /*
+ * Detect if we have a collision of installing a large
+ * page mapping where there already is a lower page table.
+ */
+ if (l > 0 && (prev & PT_VALID) && !(prev & PT_PAGESIZE))
+ return (LPAGE_ERROR);
+
+ old = CAS_PTE(ptep, prev, n);
+ } while (old != prev);
+
+ /*
+ * Do a TLB demap if needed, ie. the old pte was valid.
+ *
+ * Note that a stale TLB writeback to the PTE here either can't happen
+ * or doesn't matter. The PFN can only change for NOSYNC|NOCONSIST
+ * mappings, but they were created with REF and MOD already set, so
+ * no stale writeback will happen.
+ *
+ * Segmap is the only place where remaps happen on the same pfn and for
+ * that we want to preserve the stale REF/MOD bits.
+ */
+ if (old & PT_REF)
+ hat_tlb_inval(hat, addr);
+
+done:
if (ptr == NULL)
x86pte_release_pagetable(ht);
return (old);
}
/*
- * Atomic compare and swap of a page table entry.
+ * Atomic compare and swap of a page table entry. No TLB invalidates are done.
+ * This is used for links between pagetables of different levels.
+ * Note we always create these links with dirty/access set, so they should
+ * never change.
*/
-static x86pte_t
+x86pte_t
x86pte_cas(htable_t *ht, uint_t entry, x86pte_t old, x86pte_t new)
{
x86pte_t pte;
x86pte_t *ptep;
- x86pte32_t pte32, o32, n32;
- x86pte32_t *pte32p;
- ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN));
- ptep = x86pte_access_pagetable(ht);
- if (mmu.pae_hat) {
- pte = cas64(&ptep[entry], old, new);
- } else {
- o32 = old;
- n32 = new;
- pte32p = (x86pte32_t *)ptep;
- pte32 = cas32(&pte32p[entry], o32, n32);
- pte = pte32;
- }
+ ptep = x86pte_access_pagetable(ht, entry);
+ pte = CAS_PTE(ptep, old, new);
x86pte_release_pagetable(ht);
-
return (pte);
}
/*
- * data structure for cross call information
+ * Make sure the zero we wrote to a page table entry sticks in memory
+ * after invalidating all TLB entries on all CPUs.
*/
-typedef struct xcall_info {
- x86pte_t xi_pte;
- x86pte_t xi_old;
- x86pte_t *xi_pteptr;
- pfn_t xi_pfn;
- processorid_t xi_cpuid;
- level_t xi_level;
- xc_func_t xi_func;
-} xcall_info_t;
-
-/*
- * Cross call service function to atomically invalidate a PTE and flush TLBs
- */
-/*ARGSUSED*/
-static int
-x86pte_inval_func(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3)
+static x86pte_t
+handle_tlbs(x86pte_t oldpte, x86pte_t *ptep, htable_t *ht, uint_t entry)
{
- xcall_info_t *xi = (xcall_info_t *)a1;
- caddr_t addr = (caddr_t)a2;
-
- /*
- * Only the initiating cpu invalidates the page table entry.
- * It returns the previous PTE value to the caller.
- */
- if (CPU->cpu_id == xi->xi_cpuid) {
- x86pte_t *ptep = xi->xi_pteptr;
- pfn_t pfn = xi->xi_pfn;
- level_t level = xi->xi_level;
- x86pte_t old;
- x86pte_t prev;
- x86pte32_t *pte32p;
- x86pte32_t p32;
-
- if (mmu.pae_hat) {
- for (;;) {
- prev = *ptep;
- if (PTE2PFN(prev, level) != pfn)
- break;
- old = cas64(ptep, prev, 0);
- if (old == prev)
- break;
- }
- } else {
- pte32p = (x86pte32_t *)ptep;
- for (;;) {
- p32 = *pte32p;
- if (PTE2PFN(p32, level) != pfn)
- break;
- old = cas32(pte32p, p32, 0);
- if (old == p32)
- break;
- }
- prev = p32;
- }
- xi->xi_pte = prev;
- }
+ hat_t *hat = ht->ht_hat;
+ uintptr_t addr = htable_e2va(ht, entry);
+ x86pte_t found;
/*
- * For a normal address, we just flush one page mapping
- * Otherwise reload cr3 to effect a complete TLB flush.
- *
- * Note we don't reload VLP pte's -- this assume we never have a
- * large page size at VLP_LEVEL for VLP processes.
+ * Was the PTE ever used? If not there can't be any TLB entries.
*/
- if ((uintptr_t)addr != DEMAP_ALL_ADDR) {
- mmu_tlbflush_entry(addr);
- } else {
- reload_cr3();
- }
- return (0);
-}
-
-/*
- * Cross call service function to atomically change a PTE and flush TLBs
- */
-/*ARGSUSED*/
-static int
-x86pte_update_func(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3)
-{
- xcall_info_t *xi = (xcall_info_t *)a1;
- caddr_t addr = (caddr_t)a2;
+ if ((oldpte & PT_REF) == 0)
+ return (oldpte);
/*
- * Only the initiating cpu changes the page table entry.
- * It returns the previous PTE value to the caller.
+ * Do a full global TLB invalidation.
+ * We may have to loop until the new PTE in memory stays zero.
+ * Why? Because Intel/AMD don't document how the REF/MOD bits are
+ * copied back from the TLB to the PTE, sigh. We're protecting
+ * here against a blind write back of the MOD (and other) bits.
*/
- if (CPU->cpu_id == xi->xi_cpuid) {
- x86pte_t *ptep = xi->xi_pteptr;
- x86pte_t new = xi->xi_pte;
- x86pte_t old = xi->xi_old;
- x86pte_t prev;
-
- if (mmu.pae_hat) {
- prev = cas64(ptep, old, new);
- } else {
- x86pte32_t o32 = old;
- x86pte32_t n32 = new;
- x86pte32_t *pte32p = (x86pte32_t *)ptep;
- prev = cas32(pte32p, o32, n32);
- }
+ for (;;) {
+ hat_tlb_inval(hat, addr);
- xi->xi_pte = prev;
- }
-
- /*
- * Flush the TLB entry
- */
- if ((uintptr_t)addr != DEMAP_ALL_ADDR)
- mmu_tlbflush_entry(addr);
- else
- reload_cr3();
- return (0);
-}
+ /*
+ * Check for a stale writeback of a oldpte TLB entry.
+ * Done when the PTE stays zero.
+ */
+ found = GET_PTE(ptep);
+ if (found == 0)
+ return (oldpte);
-/*
- * Use cross calls to change a page table entry and invalidate TLBs.
- */
-void
-x86pte_xcall(hat_t *hat, xcall_info_t *xi, uintptr_t addr)
-{
- cpuset_t cpus;
+ /*
+ * The only acceptable PTE change must be from a TLB
+ * flush setting the MOD bit in, hence oldpte must
+ * have been writable.
+ */
+ if (!(oldpte & PT_WRITABLE) || !(found & PT_MOD))
+ break;
- /*
- * Given the current implementation of hat_share(), doing a
- * hat_pageunload() on a shared page table requries invalidating
- * all user TLB entries on all CPUs.
- */
- if (hat->hat_flags & HAT_SHARED) {
- hat = kas.a_hat;
- addr = DEMAP_ALL_ADDR;
- }
+ /*
+ * Did we see a complete writeback of oldpte?
+ * or
+ * Did we see the MOD bit set (plus possibly other
+ * bits rewritten) in a still invalid mapping?
+ */
+ if (found == (oldpte | PT_MOD) ||
+ (!(found & PT_VALID) &&
+ (oldpte | found) == (oldpte | PT_MOD)))
+ oldpte |= PT_MOD;
+ else
+ break;
- /*
- * Use a cross call to do the invalidations.
- * Note the current CPU always has to be in the cross call CPU set.
- */
- kpreempt_disable();
- xi->xi_cpuid = CPU->cpu_id;
- CPUSET_ZERO(cpus);
- if (hat == kas.a_hat) {
- CPUSET_OR(cpus, khat_cpuset);
- } else {
- mutex_enter(&hat->hat_switch_mutex);
- CPUSET_OR(cpus, hat->hat_cpus);
- CPUSET_ADD(cpus, CPU->cpu_id);
+ (void) CAS_PTE(ptep, found, 0);
}
/*
- * Use a cross call to modify the page table entry and invalidate TLBs.
- * If we're panic'ing, don't bother with the cross call.
- * Note the panicstr check isn't bullet proof and the panic system
- * ought to be made tighter.
+ * If we hit this, a processor attempted to set the DIRTY bit
+ * of a page table entry happened in a way we didn't anticipate
*/
- if (panicstr == NULL)
- xc_wait_sync((xc_arg_t)xi, addr, NULL, X_CALL_HIPRI,
- cpus, xi->xi_func);
- else
- (void) xi->xi_func((xc_arg_t)xi, (xc_arg_t)addr, NULL);
- if (hat != kas.a_hat)
- mutex_exit(&hat->hat_switch_mutex);
- kpreempt_enable();
+ panic("handle_tlbs(): unanticipated TLB shootdown scenario"
+ " oldpte=" FMT_PTE " found=" FMT_PTE, oldpte, found);
+ /*LINTED*/
}
/*
- * Invalidate a page table entry if it currently maps the given pfn.
- * This returns the previous value of the PTE.
+ * Invalidate a page table entry as long as it currently maps something that
+ * matches the value determined by expect.
+ *
+ * Also invalidates any TLB entries and returns the previous value of the PTE.
*/
x86pte_t
-x86pte_invalidate_pfn(htable_t *ht, uint_t entry, pfn_t pfn, void *pte_ptr)
+x86pte_inval(
+ htable_t *ht,
+ uint_t entry,
+ x86pte_t expect,
+ x86pte_t *pte_ptr)
{
- xcall_info_t xi;
x86pte_t *ptep;
- hat_t *hat;
- uintptr_t addr;
+ x86pte_t oldpte;
+ x86pte_t found;
ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN));
- if (pte_ptr != NULL) {
+ ASSERT(ht->ht_level != VLP_LEVEL);
+ if (pte_ptr != NULL)
ptep = pte_ptr;
- } else {
- ptep = x86pte_access_pagetable(ht);
- ptep = (void *)((caddr_t)ptep + (entry << mmu.pte_size_shift));
- }
+ else
+ ptep = x86pte_access_pagetable(ht, entry);
/*
- * Fill in the structure used by the cross call function to do the
- * invalidation.
+ * This loop deals with REF/MOD bits changing between the
+ * GET_PTE() and the CAS_PTE().
*/
- xi.xi_pte = 0;
- xi.xi_pteptr = ptep;
- xi.xi_pfn = pfn;
- xi.xi_level = ht->ht_level;
- xi.xi_func = x86pte_inval_func;
- ASSERT(xi.xi_level != VLP_LEVEL);
-
- hat = ht->ht_hat;
- addr = htable_e2va(ht, entry);
-
- x86pte_xcall(hat, &xi, addr);
-
+ do {
+ oldpte = GET_PTE(ptep);
+ if (expect != 0 && (oldpte & PT_PADDR) != (expect & PT_PADDR))
+ goto give_up;
+ found = CAS_PTE(ptep, oldpte, 0);
+ } while (found != oldpte);
+ oldpte = handle_tlbs(oldpte, ptep, ht, entry);
+
+give_up:
if (pte_ptr == NULL)
x86pte_release_pagetable(ht);
- return (xi.xi_pte);
+ return (oldpte);
}
/*
- * update a PTE and invalidate any stale TLB entries.
+ * Change a page table entry af it currently matches the value in expect.
*/
x86pte_t
-x86pte_update(htable_t *ht, uint_t entry, x86pte_t expected, x86pte_t new)
+x86pte_update(
+ htable_t *ht,
+ uint_t entry,
+ x86pte_t expect,
+ x86pte_t new)
{
- xcall_info_t xi;
x86pte_t *ptep;
- hat_t *hat;
- uintptr_t addr;
+ x86pte_t found;
+ ASSERT(new != 0);
ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN));
- ptep = x86pte_access_pagetable(ht);
- ptep = (void *)((caddr_t)ptep + (entry << mmu.pte_size_shift));
-
- /*
- * Fill in the structure used by the cross call function to do the
- * invalidation.
- */
- xi.xi_pte = new;
- xi.xi_old = expected;
- xi.xi_pteptr = ptep;
- xi.xi_func = x86pte_update_func;
-
- hat = ht->ht_hat;
- addr = htable_e2va(ht, entry);
+ ASSERT(ht->ht_level != VLP_LEVEL);
- x86pte_xcall(hat, &xi, addr);
+ ptep = x86pte_access_pagetable(ht, entry);
+ found = CAS_PTE(ptep, expect, new);
+ if (found == expect) {
+ hat_tlb_inval(ht->ht_hat, htable_e2va(ht, entry));
+ /*
+ * When removing write permission *and* clearing the
+ * MOD bit, check if a write happened via a stale
+ * TLB entry before the TLB shootdown finished.
+ *
+ * If it did happen, simply re-enable write permission and
+ * act like the original CAS failed.
+ */
+ if ((expect & (PT_WRITABLE | PT_MOD)) == PT_WRITABLE &&
+ (new & (PT_WRITABLE | PT_MOD)) == 0 &&
+ (GET_PTE(ptep) & PT_MOD) != 0) {
+ do {
+ found = GET_PTE(ptep);
+ found =
+ CAS_PTE(ptep, found, found | PT_WRITABLE);
+ } while ((found & PT_WRITABLE) == 0);
+ }
+ }
x86pte_release_pagetable(ht);
- return (xi.xi_pte);
+ return (found);
}
/*
@@ -2169,10 +2017,11 @@ x86pte_update(htable_t *ht, uint_t entry, x86pte_t expected, x86pte_t new)
void
x86pte_copy(htable_t *src, htable_t *dest, uint_t entry, uint_t count)
{
- struct hat_cpu_info *hci;
caddr_t src_va;
caddr_t dst_va;
size_t size;
+ x86pte_t *pteptr;
+ x86pte_t pte;
ASSERT(khat_running);
ASSERT(!(dest->ht_flags & HTABLE_VLP));
@@ -2181,27 +2030,31 @@ x86pte_copy(htable_t *src, htable_t *dest, uint_t entry, uint_t count)
ASSERT(!(dest->ht_flags & HTABLE_SHARED_PFN));
/*
- * Acquire access to the CPU pagetable window for the destination.
+ * Acquire access to the CPU pagetable windows for the dest and source.
*/
- dst_va = (caddr_t)x86pte_access_pagetable(dest);
- if (kpm_enable) {
- src_va = (caddr_t)x86pte_access_pagetable(src);
+ dst_va = (caddr_t)x86pte_access_pagetable(dest, entry);
+ if (kpm_vbase) {
+ src_va = (caddr_t)
+ PT_INDEX_PTR(hat_kpm_pfn2va(src->ht_pfn), entry);
} else {
- hci = CPU->cpu_hat_info;
+ uint_t x = PWIN_SRC(CPU->cpu_id);
/*
* Finish defining the src pagetable mapping
*/
- src_va = dst_va + MMU_PAGESIZE;
- X86PTE_REMAP(src_va, hci->hci_kernel_pte, 1, 0, src->ht_pfn);
+ src_va = (caddr_t)PT_INDEX_PTR(PWIN_VA(x), entry);
+ pte = MAKEPTE(src->ht_pfn, 0) | mmu.pt_global | mmu.pt_nx;
+ pteptr = (x86pte_t *)PWIN_PTE_VA(x);
+ if (mmu.pae_hat)
+ *pteptr = pte;
+ else
+ *(x86pte32_t *)pteptr = pte;
+ mmu_tlbflush_entry((caddr_t)(PWIN_VA(x)));
}
/*
* now do the copy
*/
-
- dst_va += entry << mmu.pte_size_shift;
- src_va += entry << mmu.pte_size_shift;
size = count << mmu.pte_size_shift;
bcopy(src_va, dst_va, size);
@@ -2211,42 +2064,29 @@ x86pte_copy(htable_t *src, htable_t *dest, uint_t entry, uint_t count)
/*
* Zero page table entries - Note this doesn't use atomic stores!
*/
-void
+static void
x86pte_zero(htable_t *dest, uint_t entry, uint_t count)
{
caddr_t dst_va;
- x86pte_t *p;
- x86pte32_t *p32;
size_t size;
- extern void hat_pte_zero(void *, size_t);
/*
* Map in the page table to be zeroed.
*/
ASSERT(!(dest->ht_flags & HTABLE_SHARED_PFN));
ASSERT(!(dest->ht_flags & HTABLE_VLP));
- dst_va = (caddr_t)x86pte_access_pagetable(dest);
- dst_va += entry << mmu.pte_size_shift;
+
+ dst_va = (caddr_t)x86pte_access_pagetable(dest, entry);
+
size = count << mmu.pte_size_shift;
- if (x86_feature & X86_SSE2) {
- hat_pte_zero(dst_va, size);
- } else if (khat_running) {
+ ASSERT(size > BLOCKZEROALIGN);
+#ifdef __i386
+ if ((x86_feature & X86_SSE2) == 0)
bzero(dst_va, size);
- } else {
- /*
- * Can't just use bzero during boot because it checks the
- * address against kernelbase. Instead just use a zero loop.
- */
- if (mmu.pae_hat) {
- p = (x86pte_t *)dst_va;
- while (count-- > 0)
- *p++ = 0;
- } else {
- p32 = (x86pte32_t *)dst_va;
- while (count-- > 0)
- *p32++ = 0;
- }
- }
+ else
+#endif
+ block_zero_no_xmm(dst_va, size);
+
x86pte_release_pagetable(dest);
}