summaryrefslogtreecommitdiff
path: root/usr/src/uts/sun4u/os/ppage.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/sun4u/os/ppage.c')
-rw-r--r--usr/src/uts/sun4u/os/ppage.c520
1 files changed, 520 insertions, 0 deletions
diff --git a/usr/src/uts/sun4u/os/ppage.c b/usr/src/uts/sun4u/os/ppage.c
new file mode 100644
index 0000000000..83283feda2
--- /dev/null
+++ b/usr/src/uts/sun4u/os/ppage.c
@@ -0,0 +1,520 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/archsystm.h>
+#include <sys/machsystm.h>
+#include <sys/t_lock.h>
+#include <sys/vmem.h>
+#include <sys/mman.h>
+#include <sys/vm.h>
+#include <sys/cpu.h>
+#include <sys/cmn_err.h>
+#include <sys/cpuvar.h>
+#include <sys/atomic.h>
+#include <vm/as.h>
+#include <vm/hat.h>
+#include <vm/as.h>
+#include <vm/page.h>
+#include <vm/seg.h>
+#include <vm/seg_kmem.h>
+#include <vm/hat_sfmmu.h>
+#include <sys/debug.h>
+#include <sys/cpu_module.h>
+
+/*
+ * A quick way to generate a cache consistent address to map in a page.
+ * users: ppcopy, pagezero, /proc, dev/mem
+ *
+ * The ppmapin/ppmapout routines provide a quick way of generating a cache
+ * consistent address by reserving a given amount of kernel address space.
+ * The base is PPMAPBASE and its size is PPMAPSIZE. This memory is divided
+ * into x number of sets, where x is the number of colors for the virtual
+ * cache. The number of colors is how many times a page can be mapped
+ * simulatenously in the cache. For direct map caches this translates to
+ * the number of pages in the cache.
+ * Each set will be assigned a group of virtual pages from the reserved memory
+ * depending on its virtual color.
+ * When trying to assign a virtual address we will find out the color for the
+ * physical page in question (if applicable). Then we will try to find an
+ * available virtual page from the set of the appropiate color.
+ */
+
+#define clsettoarray(color, set) ((color * nsets) + set)
+
+int pp_slots = 4; /* small default, tuned by cpu module */
+
+/* tuned by cpu module, default is "safe" */
+int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE;
+
+static caddr_t ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE];
+static int nsets; /* number of sets */
+static int ppmap_pages; /* generate align mask */
+static int ppmap_shift; /* set selector */
+
+#ifdef PPDEBUG
+#define MAXCOLORS 16 /* for debug only */
+static int ppalloc_noslot = 0; /* # of allocations from kernelmap */
+static int align_hits[MAXCOLORS];
+static int pp_allocs; /* # of ppmapin requests */
+#endif /* PPDEBUG */
+
+/*
+ * There are only 64 TLB entries on spitfire, 16 on cheetah
+ * (fully-associative TLB) so we allow the cpu module to tune the
+ * number to use here via pp_slots.
+ */
+static struct ppmap_va {
+ caddr_t ppmap_slots[MAXPP_SLOTS];
+} ppmap_va[NCPU];
+
+void
+ppmapinit(void)
+{
+ int color, nset, setsize;
+ caddr_t va;
+
+ ASSERT(pp_slots <= MAXPP_SLOTS);
+
+ va = (caddr_t)PPMAPBASE;
+ if (cache & CACHE_VAC) {
+ int a;
+
+ ppmap_pages = mmu_btop(shm_alignment);
+ nsets = PPMAPSIZE / shm_alignment;
+ setsize = shm_alignment;
+ ppmap_shift = MMU_PAGESHIFT;
+ a = ppmap_pages;
+ while (a >>= 1)
+ ppmap_shift++;
+ } else {
+ /*
+ * If we do not have a virtual indexed cache we simply
+ * have only one set containing all pages.
+ */
+ ppmap_pages = 1;
+ nsets = mmu_btop(PPMAPSIZE);
+ setsize = MMU_PAGESIZE;
+ ppmap_shift = MMU_PAGESHIFT;
+ }
+ for (color = 0; color < ppmap_pages; color++) {
+ for (nset = 0; nset < nsets; nset++) {
+ ppmap_vaddrs[clsettoarray(color, nset)] =
+ (caddr_t)((uintptr_t)va + (nset * setsize));
+ }
+ va += MMU_PAGESIZE;
+ }
+}
+
+/*
+ * Allocate a cache consistent virtual address to map a page, pp,
+ * with protection, vprot; and map it in the MMU, using the most
+ * efficient means possible. The argument avoid is a virtual address
+ * hint which when masked yields an offset into a virtual cache
+ * that should be avoided when allocating an address to map in a
+ * page. An avoid arg of -1 means you don't care, for instance pagezero.
+ *
+ * machine dependent, depends on virtual address space layout,
+ * understands that all kernel addresses have bit 31 set.
+ *
+ * NOTE: For sun4 platforms the meaning of the hint argument is opposite from
+ * that found in other architectures. In other architectures the hint
+ * (called avoid) was used to ask ppmapin to NOT use the specified cache color.
+ * This was used to avoid virtual cache trashing in the bcopy. Unfortunately
+ * in the case of a COW, this later on caused a cache aliasing conflict. In
+ * sun4, the bcopy routine uses the block ld/st instructions so we don't have
+ * to worry about virtual cache trashing. Actually, by using the hint to choose
+ * the right color we can almost guarantee a cache conflict will not occur.
+ */
+
+caddr_t
+ppmapin(page_t *pp, uint_t vprot, caddr_t hint)
+{
+ int color, nset, index, start;
+ caddr_t va;
+
+#ifdef PPDEBUG
+ pp_allocs++;
+#endif /* PPDEBUG */
+ if (cache & CACHE_VAC) {
+ color = sfmmu_get_ppvcolor(pp);
+ if (color == -1) {
+ if ((intptr_t)hint != -1L) {
+ color = addr_to_vcolor(hint);
+ } else {
+ color = addr_to_vcolor(mmu_ptob(pp->p_pagenum));
+ }
+ }
+
+ } else {
+ /*
+ * For physical caches, we can pick any address we want.
+ */
+ color = 0;
+ }
+
+ start = color;
+ do {
+ for (nset = 0; nset < nsets; nset++) {
+ index = clsettoarray(color, nset);
+ va = ppmap_vaddrs[index];
+ if (va != NULL) {
+#ifdef PPDEBUG
+ align_hits[color]++;
+#endif /* PPDEBUG */
+ if (casptr(&ppmap_vaddrs[index],
+ va, NULL) == va) {
+ hat_memload(kas.a_hat, va, pp,
+ vprot | HAT_NOSYNC,
+ HAT_LOAD_LOCK);
+ return (va);
+ }
+ }
+ }
+ /*
+ * first pick didn't succeed, try another
+ */
+ if (++color == ppmap_pages)
+ color = 0;
+ } while (color != start);
+
+#ifdef PPDEBUG
+ ppalloc_noslot++;
+#endif /* PPDEBUG */
+
+ /*
+ * No free slots; get a random one from the kernel heap area.
+ */
+ va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
+
+ hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK);
+
+ return (va);
+
+}
+
+void
+ppmapout(caddr_t va)
+{
+ int color, nset, index;
+
+ if (va >= kernelheap && va < ekernelheap) {
+ /*
+ * Space came from kernelmap, flush the page and
+ * return the space.
+ */
+ hat_unload(kas.a_hat, va, PAGESIZE,
+ (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
+ vmem_free(heap_arena, va, PAGESIZE);
+ } else {
+ /*
+ * Space came from ppmap_vaddrs[], give it back.
+ */
+ color = addr_to_vcolor(va);
+ ASSERT((cache & CACHE_VAC)? (color < ppmap_pages) : 1);
+
+ nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1);
+ index = clsettoarray(color, nset);
+ hat_unload(kas.a_hat, va, PAGESIZE,
+ (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
+
+ ASSERT(ppmap_vaddrs[index] == NULL);
+ ppmap_vaddrs[index] = va;
+ }
+}
+
+#ifdef DEBUG
+#define PP_STAT_ADD(stat) (stat)++
+uint_t pload, ploadfail;
+uint_t ppzero, ppzero_short;
+#else
+#define PP_STAT_ADD(stat)
+#endif /* DEBUG */
+
+/*
+ * Find a slot in per CPU page copy area. Load up a locked TLB in the
+ * running cpu. We don't call hat layer to load up the tte since the
+ * mapping is only temporary. If the thread migrates it'll get a TLB
+ * miss trap and TLB/TSB miss handler will panic since there is no
+ * official hat record of this mapping.
+ */
+static caddr_t
+pp_load_tlb(processorid_t cpu, caddr_t **pslot, page_t *pp, uint_t prot)
+{
+ struct ppmap_va *ppmap;
+ tte_t tte;
+ caddr_t *myslot;
+ caddr_t va;
+ long i, start, stride;
+ int vcolor;
+ uint_t flags, strict_flag;
+
+ PP_STAT_ADD(pload);
+
+ ppmap = &ppmap_va[cpu];
+ va = (caddr_t)(PPMAP_FAST_BASE + (MMU_PAGESIZE * MAXPP_SLOTS) * cpu);
+ myslot = ppmap->ppmap_slots;
+ ASSERT(addr_to_vcolor(va) == 0);
+
+ if (prot & TTE_HWWR_INT) {
+ flags = PPAGE_STORE_VCOLORING | PPAGE_STORES_POLLUTE;
+ strict_flag = PPAGE_STORES_POLLUTE;
+ } else {
+ flags = PPAGE_LOAD_VCOLORING | PPAGE_LOADS_POLLUTE;
+ strict_flag = PPAGE_LOADS_POLLUTE;
+ }
+
+ /*
+ * If consistent handling is required then keep the current
+ * vcolor of the page. Furthermore, if loads or stores can
+ * pollute the VAC then using a "new" page (unassigned vcolor)
+ * won't work and we have to return a failure.
+ */
+ if (pp_consistent_coloring & flags) {
+ vcolor = sfmmu_get_ppvcolor(pp);
+ if ((vcolor == -1) &&
+ (pp_consistent_coloring & strict_flag))
+ return (NULL);
+ /* else keep the current vcolor of the page */
+ } else {
+ vcolor = -1;
+ }
+
+ if (vcolor != -1) {
+ va += MMU_PAGESIZE * vcolor;
+ start = vcolor;
+ stride = ppmap_pages; /* number of colors */
+ myslot += vcolor;
+ } else {
+ start = 0;
+ stride = 1;
+ }
+
+ for (i = start; i < pp_slots; i += stride) {
+ if (*myslot == NULL) {
+ if (casptr(myslot, NULL, va) == NULL)
+ break;
+ }
+ myslot += stride;
+ va += MMU_PAGESIZE * stride;
+ }
+
+ if (i >= pp_slots) {
+ PP_STAT_ADD(ploadfail);
+ return (NULL);
+ }
+
+ ASSERT(vcolor == -1 || addr_to_vcolor(va) == vcolor);
+
+ /*
+ * Now we have a slot we can use, make the tte.
+ */
+ tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(pp->p_pagenum);
+ tte.tte_intlo = TTE_PFN_INTLO(pp->p_pagenum) | TTE_CP_INT |
+ TTE_CV_INT | TTE_PRIV_INT | TTE_LCK_INT | prot;
+
+ ASSERT(CPU->cpu_id == cpu);
+ sfmmu_dtlb_ld(va, KCONTEXT, &tte);
+
+ *pslot = myslot; /* Return ptr to the slot we used. */
+
+ return (va);
+}
+
+static void
+pp_unload_tlb(caddr_t *pslot, caddr_t va)
+{
+ ASSERT(*pslot == va);
+
+ vtag_flushpage(va, KCONTEXT);
+ *pslot = NULL; /* release the slot */
+}
+
+/*
+ * Common copy routine which attempts to use hwblkpagecopy. If this routine
+ * can't be used, failure (0) will be returned. Otherwise, a PAGESIZE page
+ * will be copied and success (1) will be returned.
+ */
+int
+ppcopy_common(page_t *fm_pp, page_t *to_pp)
+{
+ caddr_t fm_va, to_va;
+ caddr_t *fm_slot, *to_slot;
+ processorid_t cpu;
+
+ ASSERT(PAGE_LOCKED(fm_pp));
+ ASSERT(PAGE_LOCKED(to_pp));
+
+ /*
+ * If we can't use VIS block loads and stores we can't use
+ * pp_load_tlb/pp_unload_tlb due to the possibility of
+ * d$ aliasing.
+ */
+ if (!use_hw_bcopy && (cache & CACHE_VAC))
+ return (0);
+
+ kpreempt_disable();
+ cpu = CPU->cpu_id;
+ fm_va = pp_load_tlb(cpu, &fm_slot, fm_pp, 0);
+ if (fm_va == NULL) {
+ kpreempt_enable();
+ return (0);
+ }
+ to_va = pp_load_tlb(cpu, &to_slot, to_pp, TTE_HWWR_INT);
+ if (to_va == NULL) {
+ pp_unload_tlb(fm_slot, fm_va);
+ kpreempt_enable();
+ return (0);
+ }
+ hwblkpagecopy(fm_va, to_va);
+ ASSERT(CPU->cpu_id == cpu);
+ pp_unload_tlb(fm_slot, fm_va);
+ pp_unload_tlb(to_slot, to_va);
+ kpreempt_enable();
+ return (1);
+}
+
+/*
+ * Routine to copy kernel pages during relocation. It will copy one
+ * PAGESIZE page to another PAGESIZE page. This function may be called
+ * above LOCK_LEVEL so it should not grab any locks.
+ */
+void
+ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp)
+{
+ uint64_t fm_pa, to_pa;
+ size_t nbytes;
+
+ fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT;
+ to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT;
+
+ nbytes = MMU_PAGESIZE;
+
+ for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32)
+ hw_pa_bcopy32(fm_pa, to_pa);
+}
+
+/*
+ * Copy the data from the physical page represented by "frompp" to
+ * that represented by "topp".
+ *
+ * Try to use per cpu mapping first, if that fails then call pp_mapin
+ * to load it.
+ */
+void
+ppcopy(page_t *fm_pp, page_t *to_pp)
+{
+ caddr_t fm_va, to_va;
+
+ /* Try the fast path first */
+ if (ppcopy_common(fm_pp, to_pp))
+ return;
+
+ /* Fast path failed, so we need to do the slow path. */
+ fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1);
+ to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va);
+ bcopy(fm_va, to_va, PAGESIZE);
+ ppmapout(fm_va);
+ ppmapout(to_va);
+}
+
+/*
+ * Zero the physical page from off to off + len given by `pp'
+ * without changing the reference and modified bits of page.
+ *
+ * Again, we'll try per cpu mapping first.
+ */
+void
+pagezero(page_t *pp, uint_t off, uint_t len)
+{
+ caddr_t va;
+ caddr_t *slot;
+ int fast = 1;
+ processorid_t cpu;
+ extern int hwblkclr(void *, size_t);
+ extern int use_hw_bzero;
+
+ ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE);
+ ASSERT(PAGE_LOCKED(pp));
+
+ PP_STAT_ADD(ppzero);
+
+ if (len != MMU_PAGESIZE || !use_hw_bzero) {
+ /*
+ * Since the fast path doesn't do anything about
+ * VAC coloring, we make sure bcopy h/w will be used.
+ */
+ fast = 0;
+ va = NULL;
+ PP_STAT_ADD(ppzero_short);
+ }
+
+ kpreempt_disable();
+
+ if (fast) {
+ cpu = CPU->cpu_id;
+ va = pp_load_tlb(cpu, &slot, pp, TTE_HWWR_INT);
+ }
+
+ if (va == NULL) {
+ /*
+ * We are here either length != MMU_PAGESIZE or pp_load_tlb()
+ * returns NULL or use_hw_bzero is disabled.
+ */
+ va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1);
+ fast = 0;
+ }
+
+ if (hwblkclr(va + off, len)) {
+ /*
+ * We may not have used block commit asi.
+ * So flush the I-$ manually
+ */
+
+ ASSERT(fast == 0);
+
+ sync_icache(va + off, len);
+ } else {
+ /*
+ * We have used blk commit, and flushed the I-$. However we
+ * still may have an instruction in the pipeline. Only a flush
+ * instruction will invalidate that.
+ */
+ doflush(va);
+ }
+
+ if (fast) {
+ ASSERT(CPU->cpu_id == cpu);
+ pp_unload_tlb(slot, va);
+ } else {
+ ppmapout(va);
+ }
+
+ kpreempt_enable();
+}