diff options
author | ha137994 <none@none> | 2007-05-11 01:55:37 -0700 |
---|---|---|
committer | ha137994 <none@none> | 2007-05-11 01:55:37 -0700 |
commit | aaa10e6791d1614700651df2821f84d490c094bf (patch) | |
tree | 90b50746af11fd412599f19656c351561e82e5eb /usr/src | |
parent | 459190a5c46206e7885f6a649a055ceb46be49a7 (diff) | |
download | illumos-gate-aaa10e6791d1614700651df2821f84d490c094bf.tar.gz |
6473237 trapstat fails to release cpu_lock and tstat_lock in failure paths
6475905 hat_pagesuspend() hangs trying to relocate 4M page from contig_mem_slab_arena
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/uts/common/os/vmem.c | 18 | ||||
-rw-r--r-- | usr/src/uts/common/sys/vmem.h | 15 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_kmem.c | 3 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_kmem.h | 2 | ||||
-rw-r--r-- | usr/src/uts/sun4/io/trapstat.c | 7 | ||||
-rw-r--r-- | usr/src/uts/sun4/os/startup.c | 6 | ||||
-rw-r--r-- | usr/src/uts/sun4u/vm/mach_vm_dep.c | 8 | ||||
-rw-r--r-- | usr/src/uts/sun4v/cpu/niagara2.c | 3 | ||||
-rw-r--r-- | usr/src/uts/sun4v/os/fillsysinfo.c | 7 | ||||
-rw-r--r-- | usr/src/uts/sun4v/sys/machsystm.h | 1 | ||||
-rw-r--r-- | usr/src/uts/sun4v/sys/niagara2regs.h | 2 | ||||
-rw-r--r-- | usr/src/uts/sun4v/vm/mach_vm_dep.c | 342 |
12 files changed, 330 insertions, 84 deletions
diff --git a/usr/src/uts/common/os/vmem.c b/usr/src/uts/common/os/vmem.c index fda48faae8..18d6b6dad4 100644 --- a/usr/src/uts/common/os/vmem.c +++ b/usr/src/uts/common/os/vmem.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -1050,7 +1049,8 @@ do_alloc: size_t aquantum = MAX(vmp->vm_quantum, vmp->vm_source->vm_quantum); size_t aphase = phase; - if (align > aquantum) { + if ((align > aquantum) && + !(vmp->vm_cflags & VMC_XALIGN)) { aphase = (P2PHASE(phase, aquantum) != 0) ? align - vmp->vm_quantum : align - aquantum; ASSERT(aphase >= phase); @@ -1081,10 +1081,12 @@ do_alloc: size_t oasize = asize; vaddr = ((vmem_ximport_t *) vmp->vm_source_alloc)(vmp->vm_source, - &asize, vmflag & VM_KMFLAGS); + &asize, align, vmflag & VM_KMFLAGS); ASSERT(asize >= oasize); ASSERT(P2PHASE(asize, vmp->vm_source->vm_quantum) == 0); + ASSERT(!(vmp->vm_cflags & VMC_XALIGN) || + IS_P2ALIGNED(vaddr, align)); } else { vaddr = vmp->vm_source_alloc(vmp->vm_source, asize, vmflag & VM_KMFLAGS); @@ -1553,8 +1555,8 @@ vmem_create(const char *name, void *base, size_t size, size_t quantum, vmem_alloc_t *afunc, vmem_free_t *ffunc, vmem_t *source, size_t qcache_max, int vmflag) { - ASSERT(!(vmflag & VMC_XALLOC)); - vmflag &= ~VMC_XALLOC; + ASSERT(!(vmflag & (VMC_XALLOC | VMC_XALIGN))); + vmflag &= ~(VMC_XALLOC | VMC_XALIGN); return (vmem_create_common(name, base, size, quantum, afunc, ffunc, source, qcache_max, vmflag)); diff --git a/usr/src/uts/common/sys/vmem.h b/usr/src/uts/common/sys/vmem.h index 1cd2f30e9b..abcc8b26bc 100644 --- a/usr/src/uts/common/sys/vmem.h +++ b/usr/src/uts/common/sys/vmem.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -73,9 +72,13 @@ extern "C" { #define VMC_IDENTIFIER 0x00040000 /* not backed by memory */ /* * internal use only; the import function uses the vmem_ximport_t interface - * and may increase the request size if it so desires + * and may increase the request size if it so desires. + * VMC_XALIGN, for use with vmem_xcreate, specifies that + * the address returned by the import function will be + * aligned according to the alignment argument. */ #define VMC_XALLOC 0x00080000 +#define VMC_XALIGN 0x00100000 #define VMC_FLAGS 0xFFFF0000 /* @@ -110,7 +113,7 @@ typedef void (vmem_free_t)(vmem_t *, void *, size_t); * Alternate import style; the requested size is passed in a pointer, * which can be increased by the import function if desired. */ -typedef void *(vmem_ximport_t)(vmem_t *, size_t *, int); +typedef void *(vmem_ximport_t)(vmem_t *, size_t *, size_t, int); #ifdef _KERNEL extern vmem_t *vmem_init(const char *, void *, size_t, size_t, diff --git a/usr/src/uts/common/vm/seg_kmem.c b/usr/src/uts/common/vm/seg_kmem.c index 4567e3ee99..9734b6cbeb 100644 --- a/usr/src/uts/common/vm/seg_kmem.c +++ b/usr/src/uts/common/vm/seg_kmem.c @@ -1244,8 +1244,9 @@ segkmem_free_one_lp(caddr_t addr, size_t size) * it was not able to satisfy the upgraded request it then calls regular * segkmem_alloc() that satisfies the request by importing from "*vmp" arena */ +/*ARGSUSED*/ void * -segkmem_alloc_lp(vmem_t *vmp, size_t *sizep, int vmflag) +segkmem_alloc_lp(vmem_t *vmp, size_t *sizep, size_t align, int vmflag) { size_t size; kthread_t *t = curthread; diff --git a/usr/src/uts/common/vm/seg_kmem.h b/usr/src/uts/common/vm/seg_kmem.h index 5b50a070ac..2f3a778770 100644 --- a/usr/src/uts/common/vm/seg_kmem.h +++ b/usr/src/uts/common/vm/seg_kmem.h @@ -114,7 +114,7 @@ typedef struct segkmem_lpcb { uint64_t alloc_bytes_failed; } segkmem_lpcb_t; -extern void *segkmem_alloc_lp(vmem_t *, size_t *, int); +extern void *segkmem_alloc_lp(vmem_t *, size_t *, size_t, int); extern void segkmem_free_lp(vmem_t *, void *, size_t); extern int segkmem_lpsetup(); extern void segkmem_heap_lp_init(void); diff --git a/usr/src/uts/sun4/io/trapstat.c b/usr/src/uts/sun4/io/trapstat.c index 6cfed113ae..c31b4ed885 100644 --- a/usr/src/uts/sun4/io/trapstat.c +++ b/usr/src/uts/sun4/io/trapstat.c @@ -1592,8 +1592,11 @@ trapstat_go() */ tstat_va = contig_mem_alloc(MMU_PAGESIZE4M); tstat_pfn = va_to_pfn(tstat_va); - if (tstat_pfn == PFN_INVALID) + if (tstat_pfn == PFN_INVALID) { + mutex_exit(&tstat_lock); + mutex_exit(&cpu_lock); return (EAGAIN); + } /* * For detailed TLB statistics, invoke CPU specific interface @@ -1609,6 +1612,8 @@ trapstat_go() tstat_fast_tlbstat = B_TRUE; else if (error != ENOTSUP) { contig_mem_free(tstat_va, MMU_PAGESIZE4M); + mutex_exit(&tstat_lock); + mutex_exit(&cpu_lock); return (error); } } diff --git a/usr/src/uts/sun4/os/startup.c b/usr/src/uts/sun4/os/startup.c index c08c0ce996..26752b4b5d 100644 --- a/usr/src/uts/sun4/os/startup.c +++ b/usr/src/uts/sun4/os/startup.c @@ -70,6 +70,7 @@ extern void setup_trap_table(void); extern int cpu_intrq_setup(struct cpu *); extern void cpu_intrq_register(struct cpu *); extern void contig_mem_init(void); +extern caddr_t contig_mem_prealloc(caddr_t, pgcnt_t); extern void mach_dump_buffer_init(void); extern void mach_descrip_init(void); extern void mach_descrip_startup_fini(void); @@ -1135,6 +1136,11 @@ startup_memlist(void) } /* + * Allow for an early allocation of physically contiguous memory. + */ + alloc_base = contig_mem_prealloc(alloc_base, npages); + + /* * Allocate the remaining page freelists. NUMA systems can * have lots of page freelists, one per node, which quickly * outgrow the amount of nucleus memory available. diff --git a/usr/src/uts/sun4u/vm/mach_vm_dep.c b/usr/src/uts/sun4u/vm/mach_vm_dep.c index 9550242d0e..9cdabd0658 100644 --- a/usr/src/uts/sun4u/vm/mach_vm_dep.c +++ b/usr/src/uts/sun4u/vm/mach_vm_dep.c @@ -332,6 +332,14 @@ contig_mem_init(void) /* not applicable to sun4u */ } +/*ARGSUSED*/ +caddr_t +contig_mem_prealloc(caddr_t alloc_base, pgcnt_t npages) +{ + /* not applicable to sun4u */ + return (alloc_base); +} + size_t exec_get_spslew(void) { diff --git a/usr/src/uts/sun4v/cpu/niagara2.c b/usr/src/uts/sun4v/cpu/niagara2.c index 2073f61663..7f31035311 100644 --- a/usr/src/uts/sun4v/cpu/niagara2.c +++ b/usr/src/uts/sun4v/cpu/niagara2.c @@ -78,6 +78,7 @@ cpu_setup(void) { extern int mmu_exported_pagesize_mask; extern int cpc_has_overflow_intr; + extern size_t contig_mem_prealloc_base; int status; /* @@ -133,6 +134,8 @@ cpu_setup(void) max_uheap_lpsize = MMU_PAGESIZE4M; max_ustack_lpsize = MMU_PAGESIZE4M; max_privmap_lpsize = MMU_PAGESIZE4M; + + contig_mem_prealloc_base = NIAGARA2_PREALLOC_BASE; } /* diff --git a/usr/src/uts/sun4v/os/fillsysinfo.c b/usr/src/uts/sun4v/os/fillsysinfo.c index a2718cbed1..eaabde53ae 100644 --- a/usr/src/uts/sun4v/os/fillsysinfo.c +++ b/usr/src/uts/sun4v/os/fillsysinfo.c @@ -60,6 +60,7 @@ uint64_t cpu_q_entries; uint64_t dev_q_entries; uint64_t cpu_rq_entries; uint64_t cpu_nrq_entries; +uint64_t ncpu_guest_max; void fill_cpu(md_t *, mde_cookie_t); @@ -654,7 +655,6 @@ get_q_sizes(md_t *mdp, mde_cookie_t cpu_node_cookie) { uint64_t max_qsize; mde_cookie_t *platlist; - uint64_t ncpus = NCPU; int nrnode; /* @@ -667,8 +667,9 @@ get_q_sizes(md_t *mdp, mde_cookie_t cpu_node_cookie) ASSERT(nrnode == 1); - (void) md_get_prop_val(mdp, platlist[0], "max-cpus", &ncpus); - max_qsize = ncpus * CPU_MONDO_Q_MULTIPLIER; + ncpu_guest_max = NCPU; + (void) md_get_prop_val(mdp, platlist[0], "max-cpus", &ncpu_guest_max); + max_qsize = ncpu_guest_max * CPU_MONDO_Q_MULTIPLIER; md_free_scan_dag(mdp, &platlist); diff --git a/usr/src/uts/sun4v/sys/machsystm.h b/usr/src/uts/sun4v/sys/machsystm.h index fcb74954a7..c7f3b1dd27 100644 --- a/usr/src/uts/sun4v/sys/machsystm.h +++ b/usr/src/uts/sun4v/sys/machsystm.h @@ -480,6 +480,7 @@ extern uint64_t cpu_q_entries; extern uint64_t dev_q_entries; extern uint64_t cpu_rq_entries; extern uint64_t cpu_nrq_entries; +extern uint64_t ncpu_guest_max; #endif /* _ASM */ #endif /* _KERNEL */ diff --git a/usr/src/uts/sun4v/sys/niagara2regs.h b/usr/src/uts/sun4v/sys/niagara2regs.h index 2f2bf3ea65..65e4953112 100644 --- a/usr/src/uts/sun4v/sys/niagara2regs.h +++ b/usr/src/uts/sun4v/sys/niagara2regs.h @@ -41,6 +41,8 @@ extern "C" { #define NIAGARA2_HSVC_MAJOR 1 #define NIAGARA2_HSVC_MINOR 0 +#define NIAGARA2_PREALLOC_BASE MB(196) + /* PIC overflow range is -16 to -1 */ #define PIC_IN_OV_RANGE(x) (((uint32_t)x >= 0xfffffff0) ? 1 : 0) diff --git a/usr/src/uts/sun4v/vm/mach_vm_dep.c b/usr/src/uts/sun4v/vm/mach_vm_dep.c index d6211b4a1d..55ebdb715b 100644 --- a/usr/src/uts/sun4v/vm/mach_vm_dep.c +++ b/usr/src/uts/sun4v/vm/mach_vm_dep.c @@ -111,6 +111,63 @@ size_t max_utext_lpsize = MMU_PAGESIZE4M; size_t max_shm_lpsize = MMU_PAGESIZE4M; /* + * Contiguous memory allocator data structures and variables. + * + * The sun4v kernel must provide a means to allocate physically + * contiguous, non-relocatable memory. The contig_mem_arena + * and contig_mem_slab_arena exist for this purpose. Allocations + * that require physically contiguous non-relocatable memory should + * be made using contig_mem_alloc() or contig_mem_alloc_align() + * which return memory from contig_mem_arena or contig_mem_reloc_arena. + * These arenas import memory from the contig_mem_slab_arena one + * contiguous chunk at a time. + * + * When importing slabs, an attempt is made to allocate a large page + * to use as backing. As a result of the non-relocatable requirement, + * slabs are allocated from the kernel cage freelists. If the cage does + * not contain any free contiguous chunks large enough to satisfy the + * slab allocation, the slab size will be downsized and the operation + * retried. Large slab sizes are tried first to minimize cage + * fragmentation. If the slab allocation is unsuccessful still, the slab + * is allocated from outside the kernel cage. This is undesirable because, + * until slabs are freed, it results in non-relocatable chunks scattered + * throughout physical memory. + * + * Allocations from the contig_mem_arena are backed by slabs from the + * cage. Allocations from the contig_mem_reloc_arena are backed by + * slabs allocated outside the cage. Slabs are left share locked while + * in use to prevent non-cage slabs from being relocated. + * + * Since there is no guarantee that large pages will be available in + * the kernel cage, contiguous memory is reserved and added to the + * contig_mem_arena at boot time, making it available for later + * contiguous memory allocations. This reserve will be used to satisfy + * contig_mem allocations first and it is only when the reserve is + * completely allocated that new slabs will need to be imported. + */ +static vmem_t *contig_mem_slab_arena; +static vmem_t *contig_mem_arena; +static vmem_t *contig_mem_reloc_arena; +static kmutex_t contig_mem_lock; +#define CONTIG_MEM_ARENA_QUANTUM 64 +#define CONTIG_MEM_SLAB_ARENA_QUANTUM MMU_PAGESIZE64K + +/* contig_mem_arena import slab sizes, in decreasing size order */ +static size_t contig_mem_import_sizes[] = { + MMU_PAGESIZE4M, + MMU_PAGESIZE512K, + MMU_PAGESIZE64K +}; +#define NUM_IMPORT_SIZES \ + (sizeof (contig_mem_import_sizes) / sizeof (size_t)) +static size_t contig_mem_import_size_max = MMU_PAGESIZE4M; +size_t contig_mem_slab_size = MMU_PAGESIZE4M; + +/* Boot-time allocated buffer to pre-populate the contig_mem_arena */ +static size_t prealloc_size; +static void *prealloc_buf; + +/* * map_addr_proc() is the routine called when the system is to * choose an address for the user. We will pick an address * range which is just below the current stack limit. The @@ -332,13 +389,6 @@ mmu_init_kernel_pgsz(struct hat *hat) { } -#define QUANTUM_SIZE 64 - -static vmem_t *contig_mem_slab_arena; -static vmem_t *contig_mem_arena; - -uint_t contig_mem_slab_size = MMU_PAGESIZE4M; - static void * contig_mem_span_alloc(vmem_t *vmp, size_t size, int vmflag) { @@ -348,18 +398,11 @@ contig_mem_span_alloc(vmem_t *vmp, size_t size, int vmflag) pgcnt_t npages = btopr(size); page_t **ppa; int pgflags; - int i = 0; + spgcnt_t i = 0; - /* - * The import request should be at least - * contig_mem_slab_size because that is the - * slab arena's quantum. The size can be - * further restricted since contiguous - * allocations larger than contig_mem_slab_size - * are not supported here. - */ - ASSERT(size == contig_mem_slab_size); + ASSERT(size <= contig_mem_import_size_max); + ASSERT((size & (size - 1)) == 0); if ((addr = vmem_xalloc(vmp, size, size, 0, 0, NULL, NULL, vmflag)) == NULL) { @@ -367,7 +410,7 @@ contig_mem_span_alloc(vmem_t *vmp, size_t size, int vmflag) } /* The address should be slab-size aligned. */ - ASSERT(((uintptr_t)addr & (contig_mem_slab_size - 1)) == 0); + ASSERT(((uintptr_t)addr & (size - 1)) == 0); if (page_resv(npages, vmflag & VM_KMFLAGS) == 0) { vmem_xfree(vmp, addr, size); @@ -375,12 +418,8 @@ contig_mem_span_alloc(vmem_t *vmp, size_t size, int vmflag) } pgflags = PG_EXCL; - if ((vmflag & VM_NOSLEEP) == 0) - pgflags |= PG_WAIT; - if (vmflag & VM_PANIC) - pgflags |= PG_PANIC; - if (vmflag & VM_PUSHPAGE) - pgflags |= PG_PUSHPAGE; + if (vmflag & VM_NORELOC) + pgflags |= PG_NORELOC; ppl = page_create_va_large(&kvp, (u_offset_t)(uintptr_t)addr, size, pgflags, &kvseg, addr, NULL); @@ -398,6 +437,7 @@ contig_mem_span_alloc(vmem_t *vmp, size_t size, int vmflag) ppa[i++] = pp; page_sub(&ppl, pp); ASSERT(page_iolock_assert(pp)); + ASSERT(PAGE_EXCL(pp)); page_io_unlock(pp); } @@ -408,47 +448,123 @@ contig_mem_span_alloc(vmem_t *vmp, size_t size, int vmflag) hat_memload_array(kas.a_hat, (caddr_t)rootpp->p_offset, size, ppa, (PROT_ALL & ~PROT_USER) | HAT_NOSYNC, HAT_LOAD_LOCK); + ASSERT(i == page_get_pagecnt(ppa[0]->p_szc)); for (--i; i >= 0; --i) { + ASSERT(ppa[i]->p_szc == ppa[0]->p_szc); + ASSERT(page_pptonum(ppa[i]) == page_pptonum(ppa[0]) + i); (void) page_pp_lock(ppa[i], 0, 1); - page_unlock(ppa[i]); + /* + * Leave the page share locked. For non-cage pages, + * this would prevent memory DR if it were supported + * on sun4v. + */ + page_downgrade(ppa[i]); } kmem_free(ppa, npages * sizeof (page_t *)); return (addr); } -void +/* + * Allocates a slab by first trying to use the largest slab size + * in contig_mem_import_sizes and then falling back to smaller slab + * sizes still large enough for the allocation. The sizep argument + * is a pointer to the requested size. When a slab is successfully + * allocated, the slab size, which must be >= *sizep and <= + * contig_mem_import_size_max, is returned in the *sizep argument. + * Returns the virtual address of the new slab. + */ +static void * +span_alloc_downsize(vmem_t *vmp, size_t *sizep, size_t align, int vmflag) +{ + int i; + + ASSERT(*sizep <= contig_mem_import_size_max); + + for (i = 0; i < NUM_IMPORT_SIZES; i++) { + size_t page_size = contig_mem_import_sizes[i]; + + /* + * Check that the alignment is also less than the + * import (large page) size. In the case where the + * alignment is larger than the size, a large page + * large enough for the allocation is not necessarily + * physical-address aligned to satisfy the requested + * alignment. Since alignment is required to be a + * power-of-2, any large page >= size && >= align will + * suffice. + */ + if (*sizep <= page_size && align <= page_size) { + void *addr; + addr = contig_mem_span_alloc(vmp, page_size, vmflag); + if (addr == NULL) + continue; + *sizep = page_size; + return (addr); + } + return (NULL); + } + + return (NULL); +} + +static void * +contig_mem_span_xalloc(vmem_t *vmp, size_t *sizep, size_t align, int vmflag) +{ + return (span_alloc_downsize(vmp, sizep, align, vmflag | VM_NORELOC)); +} + +static void * +contig_mem_reloc_span_xalloc(vmem_t *vmp, size_t *sizep, size_t align, + int vmflag) +{ + ASSERT((vmflag & VM_NORELOC) == 0); + return (span_alloc_downsize(vmp, sizep, align, vmflag)); +} + +/* + * Free a span, which is always exactly one large page. + */ +static void contig_mem_span_free(vmem_t *vmp, void *inaddr, size_t size) { page_t *pp; caddr_t addr = inaddr; caddr_t eaddr; pgcnt_t npages = btopr(size); - pgcnt_t pgs_left = npages; page_t *rootpp = NULL; - ASSERT(((uintptr_t)addr & (contig_mem_slab_size - 1)) == 0); + ASSERT(size <= contig_mem_import_size_max); + /* All slabs should be size aligned */ + ASSERT(((uintptr_t)addr & (size - 1)) == 0); hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK); for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) { - pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr, SE_EXCL); - if (pp == NULL) + pp = page_find(&kvp, (u_offset_t)(uintptr_t)addr); + if (pp == NULL) { panic("contig_mem_span_free: page not found"); + } + if (!page_tryupgrade(pp)) { + page_unlock(pp); + pp = page_lookup(&kvp, + (u_offset_t)(uintptr_t)addr, SE_EXCL); + if (pp == NULL) + panic("contig_mem_span_free: page not found"); + } ASSERT(PAGE_EXCL(pp)); + ASSERT(size == page_get_pagesize(pp->p_szc)); + ASSERT(rootpp == NULL || rootpp->p_szc == pp->p_szc); + ASSERT(rootpp == NULL || (page_pptonum(rootpp) + + (pgcnt_t)btop(addr - (caddr_t)inaddr) == page_pptonum(pp))); + page_pp_unlock(pp, 0, 1); if (rootpp == NULL) rootpp = pp; - if (--pgs_left == 0) { - /* - * similar logic to segspt_free_pages, but we know we - * have one large page. - */ - page_destroy_pages(rootpp); - } } + page_destroy_pages(rootpp); page_unresv(npages); if (vmp != NULL) @@ -456,29 +572,30 @@ contig_mem_span_free(vmem_t *vmp, void *inaddr, size_t size) } static void * -contig_vmem_xalloc_aligned_wrapper(vmem_t *vmp, size_t size, int vmflag) +contig_vmem_xalloc_aligned_wrapper(vmem_t *vmp, size_t *sizep, size_t align, + int vmflag) { - return (vmem_xalloc(vmp, size, size, 0, 0, NULL, NULL, vmflag)); + ASSERT((align & (align - 1)) == 0); + return (vmem_xalloc(vmp, *sizep, align, 0, 0, NULL, NULL, vmflag)); } /* - * conting_mem_alloc_align allocates real contiguous memory with the specified - * alignment upto contig_mem_slab_size. The alignment must be a power of 2. + * contig_mem_alloc, contig_mem_alloc_align + * + * Caution: contig_mem_alloc and contig_mem_alloc_align should be + * used only when physically contiguous non-relocatable memory is + * required. Furthermore, use of these allocation routines should be + * minimized as well as should the allocation size. As described in the + * contig_mem_arena comment block above, slab allocations fall back to + * being outside of the cage. Therefore, overuse of these allocation + * routines can lead to non-relocatable large pages being allocated + * outside the cage. Such pages prevent the allocation of a larger page + * occupying overlapping pages. This can impact performance for + * applications that utilize e.g. 256M large pages. */ -void * -contig_mem_alloc_align(size_t size, size_t align) -{ - ASSERT(align <= contig_mem_slab_size); - - if ((align & (align - 1)) != 0) - return (NULL); - - return (vmem_xalloc(contig_mem_arena, size, align, 0, 0, - NULL, NULL, VM_NOSLEEP)); -} /* - * Allocates size aligned contiguous memory upto contig_mem_slab_size. + * Allocates size aligned contiguous memory up to contig_mem_import_size_max. * Size must be a power of 2. */ void * @@ -488,33 +605,130 @@ contig_mem_alloc(size_t size) return (contig_mem_alloc_align(size, size)); } +/* + * contig_mem_alloc_align allocates real contiguous memory with the specified + * alignment up to contig_mem_import_size_max. The alignment must be a + * power of 2 and no greater than contig_mem_import_size_max. We assert + * the aligment is a power of 2. For non-debug, vmem_xalloc will panic + * for non power of 2 alignments. + */ +void * +contig_mem_alloc_align(size_t size, size_t align) +{ + void *buf; + + ASSERT(size <= contig_mem_import_size_max); + ASSERT(align <= contig_mem_import_size_max); + ASSERT((align & (align - 1)) == 0); + + if (align < CONTIG_MEM_ARENA_QUANTUM) + align = CONTIG_MEM_ARENA_QUANTUM; + + /* + * We take the lock here to serialize span allocations. + * We do not lose concurrency for the common case, since + * allocations that don't require new span allocations + * are serialized by vmem_xalloc. Serializing span + * allocations also prevents us from trying to allocate + * more spans that necessary. + */ + mutex_enter(&contig_mem_lock); + + buf = vmem_xalloc(contig_mem_arena, size, align, 0, 0, + NULL, NULL, VM_NOSLEEP | VM_NORELOC); + + if ((buf == NULL) && (size <= MMU_PAGESIZE)) { + mutex_exit(&contig_mem_lock); + return (vmem_xalloc(static_alloc_arena, size, align, 0, 0, + NULL, NULL, VM_NOSLEEP)); + } + + if (buf == NULL) { + buf = vmem_xalloc(contig_mem_reloc_arena, size, align, 0, 0, + NULL, NULL, VM_NOSLEEP); + } + + mutex_exit(&contig_mem_lock); + + return (buf); +} + void contig_mem_free(void *vaddr, size_t size) { - vmem_xfree(contig_mem_arena, vaddr, size); + if (vmem_contains(contig_mem_arena, vaddr, size)) { + vmem_xfree(contig_mem_arena, vaddr, size); + } else if (size > MMU_PAGESIZE) { + vmem_xfree(contig_mem_reloc_arena, vaddr, size); + } else { + vmem_xfree(static_alloc_arena, vaddr, size); + } } /* * We create a set of stacked vmem arenas to enable us to - * allocate large >PAGESIZE chucks of contiguous Real Address space - * This is what the Dynamics TSB support does for TSBs. - * The contig_mem_arena import functions are exactly the same as the - * TSB kmem_default arena import functions. + * allocate large >PAGESIZE chucks of contiguous Real Address space. + * The vmem_xcreate interface is used to create the contig_mem_arena + * allowing the import routine to downsize the requested slab size + * and return a smaller slab. */ void contig_mem_init(void) { + mutex_init(&contig_mem_lock, NULL, MUTEX_DEFAULT, NULL); - contig_mem_slab_arena = vmem_create("contig_mem_slab_arena", NULL, 0, - contig_mem_slab_size, contig_vmem_xalloc_aligned_wrapper, - vmem_xfree, heap_arena, 0, VM_SLEEP); + contig_mem_slab_arena = vmem_xcreate("contig_mem_slab_arena", NULL, 0, + CONTIG_MEM_SLAB_ARENA_QUANTUM, contig_vmem_xalloc_aligned_wrapper, + vmem_xfree, heap_arena, 0, VM_SLEEP | VMC_XALIGN); - contig_mem_arena = vmem_create("contig_mem_arena", NULL, 0, - QUANTUM_SIZE, contig_mem_span_alloc, contig_mem_span_free, - contig_mem_slab_arena, 0, VM_SLEEP | VM_BESTFIT); + contig_mem_arena = vmem_xcreate("contig_mem_arena", NULL, 0, + CONTIG_MEM_ARENA_QUANTUM, contig_mem_span_xalloc, + contig_mem_span_free, contig_mem_slab_arena, 0, + VM_SLEEP | VM_BESTFIT | VMC_XALIGN); + contig_mem_reloc_arena = vmem_xcreate("contig_mem_reloc_arena", NULL, 0, + CONTIG_MEM_ARENA_QUANTUM, contig_mem_reloc_span_xalloc, + contig_mem_span_free, contig_mem_slab_arena, 0, + VM_SLEEP | VM_BESTFIT | VMC_XALIGN); + + if (vmem_add(contig_mem_arena, prealloc_buf, prealloc_size, + VM_SLEEP) == NULL) + cmn_err(CE_PANIC, "Failed to pre-populate contig_mem_arena"); } +/* + * In calculating how much memory to pre-allocate, we include a small + * amount per-CPU to account for per-CPU buffers in line with measured + * values for different size systems. contig_mem_prealloc_base is the + * base fixed amount to be preallocated before considering per-CPU + * requirements and memory size. We take the minimum of + * contig_mem_prealloc_base and a small percentage of physical memory + * to prevent allocating too much on smaller systems. + */ +#define PREALLOC_PER_CPU (256 * 1024) /* 256K */ +#define PREALLOC_PERCENT (4) /* 4% */ +#define PREALLOC_MIN (16 * 1024 * 1024) /* 16M */ +size_t contig_mem_prealloc_base = 0; + +/* + * Called at boot-time allowing pre-allocation of contiguous memory. + * The argument 'alloc_base' is the requested base address for the + * allocation and originates in startup_memlist. + */ +caddr_t +contig_mem_prealloc(caddr_t alloc_base, pgcnt_t npages) +{ + prealloc_size = MIN((PREALLOC_PER_CPU * ncpu_guest_max) + + contig_mem_prealloc_base, (ptob(npages) * PREALLOC_PERCENT) / 100); + prealloc_size = MAX(prealloc_size, PREALLOC_MIN); + prealloc_size = P2ROUNDUP(prealloc_size, MMU_PAGESIZE4M); + + alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, MMU_PAGESIZE4M); + prealloc_buf = alloc_base; + alloc_base += prealloc_size; + + return (alloc_base); +} static uint_t sp_color_stride = 16; static uint_t sp_color_mask = 0x1f; |