diff options
author | kchow <none@none> | 2007-10-28 10:20:15 -0700 |
---|---|---|
committer | kchow <none@none> | 2007-10-28 10:20:15 -0700 |
commit | 02bc52be7430b2f7fafe1a2c981bff49ef11d6fa (patch) | |
tree | 14721ff92a74ba4be3174a552670ce7850c47633 /usr/src | |
parent | f0ed2251df2f0c744e0d198d9b79ae5ed937de7b (diff) | |
download | illumos-joyent-02bc52be7430b2f7fafe1a2c981bff49ef11d6fa.tar.gz |
6453272 ctfmerge uses the largest pagesize from getpagesizes() which can be bad on systems with giant pages
6543997 divide by 0 panic in page_geti_contig_pages during 1g page testing
6587615 1g pagesize support on AMD family 0x10 processors
6588824 throttling large page coalescing needs to be revisited for gigantic pages
6613824 legacy applications (java) should be prevented from inadvertantly using 1g pagesize
--HG--
rename : usr/src/lib/libc/common/sys/getpagesizes.s => deleted_files/usr/src/lib/libc/common/sys/getpagesizes.s
Diffstat (limited to 'usr/src')
29 files changed, 225 insertions, 120 deletions
diff --git a/usr/src/lib/common/inc/c_synonyms.h b/usr/src/lib/common/inc/c_synonyms.h index 8b8a610531..a4cbd44847 100644 --- a/usr/src/lib/common/inc/c_synonyms.h +++ b/usr/src/lib/common/inc/c_synonyms.h @@ -359,6 +359,8 @@ extern "C" { #define getopt _getopt #define getopt_long _getopt_long #define getopt_long_only _getopt_long_only +#define getpagesizes _getpagesizes +#define getpagesizes2 _getpagesizes2 #define getpass _getpass #define getpeerucred _getpeerucred #define getpflags _getpflags diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile index 0539645b34..646d7f06c0 100644 --- a/usr/src/lib/libc/amd64/Makefile +++ b/usr/src/lib/libc/amd64/Makefile @@ -206,7 +206,6 @@ COMSYSOBJS= \ gethrtime.o \ getitimer.o \ getmsg.o \ - getpagesizes.o \ getpid.o \ getpmsg.o \ getppid.o \ @@ -798,6 +797,7 @@ PORTSYS= \ fsmisc.o \ fstatat.o \ fsync.o \ + getpagesizes.o \ getpeerucred.o \ inst_sync.o \ issetugid.o \ diff --git a/usr/src/lib/libc/common/sys/getpagesizes.s b/usr/src/lib/libc/common/sys/getpagesizes.s deleted file mode 100644 index d70cede69d..0000000000 --- a/usr/src/lib/libc/common/sys/getpagesizes.s +++ /dev/null @@ -1,45 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2001 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ - -.ident "%Z%%M% %I% %E% SMI" - -/* C library -- getpagesizes */ -/* uint_t getpagesizes(size_t buf[], uint_t nelem) */ - - .file "getpagesizes.s" - -#include <sys/asm_linkage.h> - - ANSI_PRAGMA_WEAK(getpagesizes,function) - -#include "SYS.h" - - SYSCALL(getpagesizes) - RET - SET_SIZE(getpagesizes) diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com index 5fb4ffea84..5e07b84f54 100644 --- a/usr/src/lib/libc/i386/Makefile.com +++ b/usr/src/lib/libc/i386/Makefile.com @@ -231,7 +231,6 @@ COMSYSOBJS= \ gethrtime.o \ getitimer.o \ getmsg.o \ - getpagesizes.o \ getpid.o \ getpmsg.o \ getppid.o \ @@ -838,6 +837,7 @@ PORTSYS= \ fsmisc.o \ fstatat.o \ fsync.o \ + getpagesizes.o \ getpeerucred.o \ inst_sync.o \ issetugid.o \ diff --git a/usr/src/lib/libc/inc/synonyms.h b/usr/src/lib/libc/inc/synonyms.h index 4b4ce263f0..e7e69a3ce7 100644 --- a/usr/src/lib/libc/inc/synonyms.h +++ b/usr/src/lib/libc/inc/synonyms.h @@ -419,6 +419,7 @@ extern "C" { #define getopt_long _getopt_long #define getopt_long_only _getopt_long_only #define getpagesizes _getpagesizes +#define getpagesizes2 _getpagesizes2 #define getpass _getpass #define getpassphrase _getpassphrase #define getpeerucred _getpeerucred diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers index 717c4d9186..8444970351 100644 --- a/usr/src/lib/libc/port/mapfile-vers +++ b/usr/src/lib/libc/port/mapfile-vers @@ -69,6 +69,8 @@ SUNW_1.23 { # SunOS 5.11 (Solaris 11) forkx; fsetattr; getattrat; + getpagesizes2; + _getpagesizes2; htonl; htons; lio_listio; @@ -515,6 +517,7 @@ SUNW_1.21 { # SunOS 5.9 (Solaris 9) gethomelgroup; _gethomelgroup = NODYNSORT; getpagesizes; + _getpagesizes; getrctl; _getrctl; issetugid; diff --git a/usr/src/lib/libc/port/sys/getpagesizes.c b/usr/src/lib/libc/port/sys/getpagesizes.c new file mode 100644 index 0000000000..86c5944fd9 --- /dev/null +++ b/usr/src/lib/libc/port/sys/getpagesizes.c @@ -0,0 +1,60 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#pragma weak getpagesizes = _getpagesizes +#pragma weak getpagesizes2 = _getpagesizes2 + +#include "synonyms.h" +#include <sys/types.h> +#include <unistd.h> +#include <sys/syscall.h> + +/* + * mman.h contains "#pragma redefine_extname getpagesizes getpagesizes2". + * Applications that are still calling getpagesizes() instead of + * getpagesizes2() are 'legacy' applications that have not been recompiled + * since the #pragma redefine_extname change. + * + * Depending on the platform, 'legacy' applications may not be given the full + * set of supported page sizes to prevent them from inadvertantly using 'new' + * large pagesizes that might cause application failure or low system memory + * conditions. + * + * The first parameter to the SYS_getpagesizes syscall is effectively + * a 'legacy' boolean flag used as such in the kernel. + */ +int +getpagesizes(size_t pagesize[], int nelem) +{ + return (syscall(SYS_getpagesizes, 1, pagesize, nelem)); +} + +int +getpagesizes2(size_t pagesize[], int nelem) +{ + return (syscall(SYS_getpagesizes, 0, pagesize, nelem)); +} diff --git a/usr/src/lib/libc/sparc/Makefile b/usr/src/lib/libc/sparc/Makefile index 37e0818dc6..2089d2920a 100644 --- a/usr/src/lib/libc/sparc/Makefile +++ b/usr/src/lib/libc/sparc/Makefile @@ -248,7 +248,6 @@ COMSYSOBJS= \ gethrtime.o \ getitimer.o \ getmsg.o \ - getpagesizes.o \ getpid.o \ getpmsg.o \ getppid.o \ @@ -864,6 +863,7 @@ PORTSYS= \ fsmisc.o \ fstatat.o \ fsync.o \ + getpagesizes.o \ getpeerucred.o \ inst_sync.o \ issetugid.o \ diff --git a/usr/src/lib/libc/sparcv9/Makefile b/usr/src/lib/libc/sparcv9/Makefile index c837e39361..e71aef0883 100644 --- a/usr/src/lib/libc/sparcv9/Makefile +++ b/usr/src/lib/libc/sparcv9/Makefile @@ -231,7 +231,6 @@ COMSYSOBJS= \ gethrtime.o \ getitimer.o \ getmsg.o \ - getpagesizes.o \ getpid.o \ getpmsg.o \ getppid.o \ @@ -809,6 +808,7 @@ PORTSYS= \ fsmisc.o \ fstatat.o \ fsync.o \ + getpagesizes.o \ getpeerucred.o \ inst_sync.o \ issetugid.o \ diff --git a/usr/src/tools/ctf/cvt/ctfmerge.c b/usr/src/tools/ctf/cvt/ctfmerge.c index 70ca22c7fe..2def4904a6 100644 --- a/usr/src/tools/ctf/cvt/ctfmerge.c +++ b/usr/src/tools/ctf/cvt/ctfmerge.c @@ -206,6 +206,8 @@ static char *outfile = NULL; static char *tmpname = NULL; static int dynsym; int debug_level = DEBUG_LEVEL; +static size_t maxpgsize = 0x400000; + void usage(void) @@ -228,7 +230,7 @@ static void bigheap(void) { size_t big, *size; - int sizes, i; + int sizes; struct memcntl_mha mha; /* @@ -237,12 +239,16 @@ bigheap(void) if ((sizes = getpagesizes(NULL, 0)) == -1) return; - if ((size = alloca(sizeof (size_t) * sizes)) == NULL) + if (sizes == 1 || (size = alloca(sizeof (size_t) * sizes)) == NULL) return; - if (getpagesizes(size, sizes) == -1 || sizes == 1) + if (getpagesizes(size, sizes) == -1) return; + while (size[sizes - 1] > maxpgsize) + sizes--; + + /* set big to the largest allowed page size */ big = size[sizes - 1]; if (big & (big - 1)) { /* @@ -259,21 +265,13 @@ bigheap(void) return; /* - * Finally, set our heap to use the largest page size for which the - * MC_HAT_ADVISE doesn't return EAGAIN. + * set the preferred page size for the heap */ mha.mha_cmd = MHA_MAPSIZE_BSSBRK; mha.mha_flags = 0; + mha.mha_pagesize = big; - for (i = sizes - 1; i >= 0; i--) { - mha.mha_pagesize = size[i]; - - if (memcntl(NULL, 0, MC_HAT_ADVISE, (caddr_t)&mha, 0, 0) != -1) - break; - - if (errno != EAGAIN) - break; - } + (void) memcntl(NULL, 0, MC_HAT_ADVISE, (caddr_t)&mha, 0, 0); } static void diff --git a/usr/src/uts/common/os/sysent.c b/usr/src/uts/common/os/sysent.c index 38c3270f9b..b6d097b585 100644 --- a/usr/src/uts/common/os/sysent.c +++ b/usr/src/uts/common/os/sysent.c @@ -520,7 +520,7 @@ struct sysent sysent[NSYSCALL] = /* 70 */ SYSENT_CI("tasksys", tasksys, 5), /* 71 */ SYSENT_LOADABLE(), /* acctctl */ /* 72 */ SYSENT_LOADABLE(), /* exacct */ - /* 73 */ SYSENT_CI("getpagesizes", getpagesizes, 2), + /* 73 */ SYSENT_CI("getpagesizes", getpagesizes, 3), /* 74 */ SYSENT_CI("rctlsys", rctlsys, 6), /* 75 */ SYSENT_2CI("sidsys", sidsys, 4), /* 76 */ IF_LP64( @@ -921,7 +921,7 @@ struct sysent sysent32[NSYSCALL] = /* 70 */ SYSENT_CI("tasksys", tasksys, 5), /* 71 */ SYSENT_LOADABLE32(), /* acctctl */ /* 72 */ SYSENT_LOADABLE32(), /* exacct */ - /* 73 */ SYSENT_CI("getpagesizes", getpagesizes32, 2), + /* 73 */ SYSENT_CI("getpagesizes", getpagesizes32, 3), /* 74 */ SYSENT_CI("rctlsys", rctlsys, 6), /* 75 */ SYSENT_2CI("sidsys", sidsys, 4), /* 76 */ SYSENT_CI("fsat", fsat32, 6), diff --git a/usr/src/uts/common/sys/mman.h b/usr/src/uts/common/sys/mman.h index 5132833ed0..14b1f52efc 100644 --- a/usr/src/uts/common/sys/mman.h +++ b/usr/src/uts/common/sys/mman.h @@ -143,6 +143,12 @@ extern "C" { #endif #endif /* _LP64 && _LARGEFILE64_SOURCE */ +#ifdef __PRAGMA_REDEFINE_EXTNAME +#pragma redefine_extname getpagesizes getpagesizes2 +#else +#define getpagesizes getpagesizes2 +#endif + /* * Except for old binaries mmap() will return the resultant * address of mapping on success and (caddr_t)-1 on error. @@ -174,6 +180,7 @@ extern int memcntl(caddr_t, size_t, int, caddr_t, int, int); extern int madvise(caddr_t, size_t, int); #if !defined(__XOPEN_OR_POSIX) || defined(__EXTENSIONS__) extern int getpagesizes(size_t *, int); +extern int getpagesizes2(size_t *, int); /* guard visibility of uint64_t */ #if defined(_INT64_TYPE) extern int meminfo(const uint64_t *, int, const uint_t *, int, uint64_t *, @@ -205,6 +212,7 @@ extern int memcntl(); extern int msync(); extern int madvise(); extern int getpagesizes(); +extern int getpagesizes2(); extern int mlock(); extern int mlockall(); extern int munlock(); diff --git a/usr/src/uts/common/sys/syscall.h b/usr/src/uts/common/sys/syscall.h index 9fbc05f054..b04d0cf876 100644 --- a/usr/src/uts/common/sys/syscall.h +++ b/usr/src/uts/common/sys/syscall.h @@ -195,6 +195,11 @@ extern "C" { * wracct(...) :: exacct(2, ...) */ #define SYS_getpagesizes 73 + /* + * subcodes: + * getpagesizes2(...) :: getpagesizes(0, ...) + * getpagesizes(...) :: getpagesizes(1, ...) legacy + */ #define SYS_rctlsys 74 /* * subcodes: diff --git a/usr/src/uts/common/syscall/getpagesizes.c b/usr/src/uts/common/syscall/getpagesizes.c index d53e9a9936..cfef91eb6c 100644 --- a/usr/src/uts/common/syscall/getpagesizes.c +++ b/usr/src/uts/common/syscall/getpagesizes.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -35,9 +34,9 @@ * Return supported page sizes. */ int -getpagesizes(size_t *buf, int nelem) +getpagesizes(int legacy, size_t *buf, int nelem) { - int i, pagesizes = page_num_user_pagesizes(); + int i, pagesizes = page_num_user_pagesizes(legacy); size_t *pgsza; if (nelem < 0) { @@ -74,9 +73,9 @@ getpagesizes(size_t *buf, int nelem) * a 32-bit address space. */ int -getpagesizes32(size32_t *buf, int nelem) +getpagesizes32(int legacy, size32_t *buf, int nelem) { - int i, pagesizes = page_num_user_pagesizes(); + int i, pagesizes = page_num_user_pagesizes(legacy); size32_t *pgsza32; size_t pgsz; int rc; @@ -116,7 +115,8 @@ getpagesizes32(size32_t *buf, int nelem) } rc = nelem; done: - kmem_free(pgsza32, sizeof (*pgsza32) * page_num_user_pagesizes()); + kmem_free(pgsza32, sizeof (*pgsza32) * + page_num_user_pagesizes(legacy)); return (rc); } #endif diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h index 45a3811eaf..89525bd8f7 100644 --- a/usr/src/uts/common/vm/page.h +++ b/usr/src/uts/common/vm/page.h @@ -834,7 +834,7 @@ extern uint_t colorequiv; extern uchar_t colorequivszc[]; uint_t page_num_pagesizes(void); -uint_t page_num_user_pagesizes(void); +uint_t page_num_user_pagesizes(int); size_t page_get_pagesize(uint_t); size_t page_get_user_pagesize(uint_t n); pgcnt_t page_get_pagecnt(uint_t); diff --git a/usr/src/uts/common/vm/vm_pagelist.c b/usr/src/uts/common/vm/vm_pagelist.c index d45b8cd0fe..889b8ad690 100644 --- a/usr/src/uts/common/vm/vm_pagelist.c +++ b/usr/src/uts/common/vm/vm_pagelist.c @@ -419,10 +419,18 @@ page_szc_user_filtered(size_t pagesize) * Return how many page sizes are available for the user to use. This is * what the hardware supports and not based upon how the OS implements the * support of different page sizes. + * + * If legacy is non-zero, return the number of pagesizes available to legacy + * applications. The number of legacy page sizes might be less than the + * exported user page sizes. This is to prevent legacy applications that + * use the largest page size returned from getpagesizes(3c) from inadvertantly + * using the 'new' large pagesizes. */ uint_t -page_num_user_pagesizes(void) +page_num_user_pagesizes(int legacy) { + if (legacy) + return (mmu_legacy_page_sizes); return (mmu_exported_page_sizes); } @@ -3311,7 +3319,6 @@ trimkcage(struct memseg *mseg, pfn_t *lo, pfn_t *hi, pfn_t pfnlo, pfn_t pfnhi) * 'pfnflag' specifies the subset of the pfn range to search. */ - static page_t * page_geti_contig_pages(int mnode, uint_t bin, uchar_t szc, int flags, pfn_t pfnlo, pfn_t pfnhi, pgcnt_t pfnflag) @@ -3330,7 +3337,9 @@ page_geti_contig_pages(int mnode, uint_t bin, uchar_t szc, int flags, ASSERT(szc != 0 || (flags & PGI_PGCPSZC0)); - if ((pfnhi - pfnlo) + 1 < szcpgcnt) + pfnlo = P2ROUNDUP(pfnlo, szcpgcnt); + + if ((pfnhi - pfnlo) + 1 < szcpgcnt || pfnlo >= pfnhi) return (NULL); ASSERT(szc < mmu_page_sizes); @@ -3368,15 +3377,16 @@ page_geti_contig_pages(int mnode, uint_t bin, uchar_t szc, int flags, pgcnt_t szcpages; int slotlen; - pfnlo = P2ROUNDUP(pfnlo, szcpgcnt); - pfnhi = pfnhi & ~(szcpgcnt - 1); - + pfnhi = P2ALIGN((pfnhi + 1), szcpgcnt) - 1; szcpages = ((pfnhi - pfnlo) + 1) / szcpgcnt; slotlen = howmany(szcpages, slots); + /* skip if 'slotid' slot is empty */ + if (slotid * slotlen >= szcpages) + return (NULL); pfnlo = pfnlo + (((slotid * slotlen) % szcpages) * szcpgcnt); ASSERT(pfnlo < pfnhi); if (pfnhi > pfnlo + (slotlen * szcpgcnt)) - pfnhi = pfnlo + (slotlen * szcpgcnt); + pfnhi = pfnlo + (slotlen * szcpgcnt) - 1; } memsegs_lock(0); @@ -3406,8 +3416,9 @@ page_geti_contig_pages(int mnode, uint_t bin, uchar_t szc, int flags, /* round to szcpgcnt boundaries */ lo = P2ROUNDUP(lo, szcpgcnt); + MEM_NODE_ITERATOR_INIT(lo, mnode, &it); - hi = hi & ~(szcpgcnt - 1); + hi = P2ALIGN((hi + 1), szcpgcnt) - 1; if (hi <= lo) continue; @@ -3449,7 +3460,7 @@ page_geti_contig_pages(int mnode, uint_t bin, uchar_t szc, int flags, ASSERT(randpp->p_pagenum == randpfn); pp = randpp; - endpp = mseg->pages + (hi - mseg->pages_base); + endpp = mseg->pages + (hi - mseg->pages_base) + 1; ASSERT(randpp + szcpgcnt <= endpp); diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c index 6ee6b2c737..7373bf45c1 100644 --- a/usr/src/uts/i86pc/os/cpuid.c +++ b/usr/src/uts/i86pc/os/cpuid.c @@ -1025,6 +1025,12 @@ cpuid_pass1(cpu_t *cpu) if (cp->cp_edx & CPUID_AMD_EDX_NX) feature |= X86_NX; +#if defined(__amd64) + /* 1 GB large page - enable only for 64 bit kernel */ + if (cp->cp_edx & CPUID_AMD_EDX_1GPG) + feature |= X86_1GPG; +#endif + if ((cpi->cpi_vendor == X86_VENDOR_AMD) && (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) && (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c index 1730b1ce41..f1065b49e8 100644 --- a/usr/src/uts/i86pc/os/startup.c +++ b/usr/src/uts/i86pc/os/startup.c @@ -1823,6 +1823,8 @@ startup_vm(void) /* * disable automatic large pages for small memory systems or * when the disable flag is set. + * + * Do not yet consider page sizes larger than 2m/4m. */ if (!auto_lpg_disable && mmu.max_page_level > 0) { max_uheap_lpsize = LEVEL_SIZE(1); @@ -1837,9 +1839,7 @@ startup_vm(void) use_brk_lpg = 0; use_stk_lpg = 0; } - if (mmu.max_page_level > 0) { - mcntl0_lpsize = LEVEL_SIZE(1); - } + mcntl0_lpsize = LEVEL_SIZE(mmu.umax_page_level); PRM_POINT("Calling hat_init_finish()..."); hat_init_finish(); diff --git a/usr/src/uts/i86pc/sys/machparam.h b/usr/src/uts/i86pc/sys/machparam.h index 218142c05c..2406a9a898 100644 --- a/usr/src/uts/i86pc/sys/machparam.h +++ b/usr/src/uts/i86pc/sys/machparam.h @@ -77,7 +77,7 @@ extern "C" { */ /* supported page sizes */ -#define MMU_PAGE_SIZES 2 +#define MMU_PAGE_SIZES 3 /* * MMU_PAGES* describes the physical page size used by the mapping hardware. diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c index 217cb9aa0b..57b54e166b 100644 --- a/usr/src/uts/i86pc/vm/hat_i86.c +++ b/usr/src/uts/i86pc/vm/hat_i86.c @@ -130,6 +130,14 @@ static int num_kernel_ranges; uint_t use_boot_reserve = 1; /* cleared after early boot process */ uint_t can_steal_post_boot = 0; /* set late in boot to enable stealing */ +/* export 1g page size to user applications if set */ +int enable_1gpg; + +#ifdef DEBUG +uint_t map1gcnt; +#endif + + /* * A cpuset for all cpus. This is used for kernel address cross calls, since * the kernel addresses apply to all cpus. @@ -525,12 +533,25 @@ mmu_init(void) * Initialize parameters based on the 64 or 32 bit kernels and * for the 32 bit kernel decide if we should use PAE. */ - if (kbm_largepage_support) - mmu.max_page_level = 1; - else + if (kbm_largepage_support) { + if (x86_feature & X86_1GPG) { + mmu.max_page_level = 2; + mmu.umax_page_level = (enable_1gpg) ? 2 : 1; + } else { + mmu.max_page_level = 1; + mmu.umax_page_level = 1; + } + } else { mmu.max_page_level = 0; + mmu.umax_page_level = 0; + } mmu_page_sizes = mmu.max_page_level + 1; - mmu_exported_page_sizes = mmu_page_sizes; + mmu_exported_page_sizes = mmu.umax_page_level + 1; + + /* restrict legacy applications from using pagesizes 1g and above */ + mmu_legacy_page_sizes = + (mmu_exported_page_sizes > 2) ? 2 : mmu_exported_page_sizes; + #if defined(__amd64) @@ -1296,6 +1317,13 @@ hati_pte_map( } else { ASSERT(flags & HAT_LOAD_NOCONSIST); } +#if defined(__amd64) + if (ht->ht_flags & HTABLE_VLP) { + cpu_t *cpu = CPU; + x86pte_t *vlpptep = cpu->cpu_hat_info->hci_vlp_l2ptes; + VLP_COPY(hat->hat_vlp_ptes, vlpptep); + } +#endif HTABLE_INC(ht->ht_valid_cnt); PGCNT_INC(hat, l); return (rv); @@ -1616,8 +1644,13 @@ hat_memload_array( ASSERT(pages[pgindx] + i == pages[pgindx + i]); } - if (i == mmu_btop(pgsize)) + if (i == mmu_btop(pgsize)) { +#ifdef DEBUG + if (level == 2) + map1gcnt++; +#endif break; + } } } @@ -1720,8 +1753,13 @@ hat_devload( break; if (IS_P2ALIGNED(va, pgsize) && (eva - va) >= pgsize && - IS_P2ALIGNED(pfn, mmu_btop(pgsize))) + IS_P2ALIGNED(pfn, mmu_btop(pgsize))) { +#ifdef DEBUG + if (level == 2) + map1gcnt++; +#endif break; + } } /* diff --git a/usr/src/uts/i86pc/vm/hat_pte.h b/usr/src/uts/i86pc/vm/hat_pte.h index cc54b4b7a6..8e5686f4ff 100644 --- a/usr/src/uts/i86pc/vm/hat_pte.h +++ b/usr/src/uts/i86pc/vm/hat_pte.h @@ -139,7 +139,7 @@ extern "C" { * ... */ #define MAX_NUM_LEVEL 4 -#define MAX_PAGE_LEVEL 1 /* for now.. sigh */ +#define MAX_PAGE_LEVEL 2 typedef int8_t level_t; #define LEVEL_SHIFT(l) (mmu.level_shift[l]) #define LEVEL_SIZE(l) (mmu.level_size[l]) @@ -173,6 +173,7 @@ struct hat_mmu_info { uint_t num_level; /* number of page table levels in use */ uint_t max_level; /* just num_level - 1 */ uint_t max_page_level; /* maximum level at which we can map a page */ + uint_t umax_page_level; /* max user page map level */ uint_t ptes_per_table; /* # of entries in lower level page tables */ uint_t top_level_count; /* # of entries in top most level page table */ diff --git a/usr/src/uts/i86pc/vm/htable.c b/usr/src/uts/i86pc/vm/htable.c index b5cc9074e5..d2dff9c55d 100644 --- a/usr/src/uts/i86pc/vm/htable.c +++ b/usr/src/uts/i86pc/vm/htable.c @@ -1655,7 +1655,7 @@ htable_walk( * Find the level of the largest pagesize used by this HAT. */ if (hat->hat_ism_pgcnt > 0) { - max_mapped_level = mmu.max_page_level; + max_mapped_level = mmu.umax_page_level; } else { max_mapped_level = 0; for (l = 1; l <= mmu.max_page_level; ++l) @@ -2175,7 +2175,7 @@ x86pte_inval( x86pte_t found; ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN)); - ASSERT(ht->ht_level != VLP_LEVEL); + ASSERT(ht->ht_level <= mmu.max_page_level); if (pte_ptr != NULL) ptep = pte_ptr; @@ -2241,7 +2241,7 @@ x86pte_update( ASSERT(new != 0); ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN)); - ASSERT(ht->ht_level != VLP_LEVEL); + ASSERT(ht->ht_level <= mmu.max_page_level); ptep = x86pte_access_pagetable(ht, entry); XPV_ALLOW_PAGETABLE_UPDATES(); diff --git a/usr/src/uts/i86pc/vm/i86_mmu.c b/usr/src/uts/i86pc/vm/i86_mmu.c index 5b6aed8b37..b1cfcaeccf 100644 --- a/usr/src/uts/i86pc/vm/i86_mmu.c +++ b/usr/src/uts/i86pc/vm/i86_mmu.c @@ -337,11 +337,13 @@ hat_kern_alloc( paddr = pmem->address; psize = pmem->size; while (psize >= MMU_PAGESIZE) { - if ((paddr & LEVEL_OFFSET(lpagel)) == 0 && - psize > LEVEL_SIZE(lpagel)) - l = lpagel; - else - l = 0; + /* find the largest page size */ + for (l = lpagel; l > 0; l--) { + if ((paddr & LEVEL_OFFSET(l)) == 0 && + psize > LEVEL_SIZE(l)) + break; + } + #if defined(__xpv) /* * Create read/only mappings to avoid @@ -410,10 +412,12 @@ hat_kern_alloc( * increment table_cnt. We can stop at the 1st level where * they are in the same htable. */ - if (size == MMU_PAGESIZE) - start_level = 0; - else - start_level = 1; + start_level = 0; + while (start_level <= mmu.max_page_level) { + if (size == LEVEL_SIZE(start_level)) + break; + start_level++; + } for (l = start_level; l < mmu.max_level; ++l) { if (va >> LEVEL_SHIFT(l + 1) == @@ -422,7 +426,8 @@ hat_kern_alloc( ++table_cnt; } last_va = va; - va = (va & LEVEL_MASK(1)) + LEVEL_SIZE(1); + l = (start_level == 0) ? 1 : start_level; + va = (va & LEVEL_MASK(l)) + LEVEL_SIZE(l); } /* diff --git a/usr/src/uts/i86pc/vm/kboot_mmu.c b/usr/src/uts/i86pc/vm/kboot_mmu.c index ae96231f46..b87887a25f 100644 --- a/usr/src/uts/i86pc/vm/kboot_mmu.c +++ b/usr/src/uts/i86pc/vm/kboot_mmu.c @@ -167,7 +167,7 @@ kbm_map(uintptr_t va, paddr_t pa, uint_t level, uint_t is_kernel) panic("kbm_map() called too late"); pteval = pa_to_ma(pa) | PT_NOCONSIST | PT_VALID | PT_WRITABLE; - if (level == 1) + if (level >= 1) pteval |= PT_PAGESIZE; if (kbm_pge_support && is_kernel) pteval |= PT_GLOBAL; diff --git a/usr/src/uts/i86pc/vm/vm_dep.h b/usr/src/uts/i86pc/vm/vm_dep.h index 43bbd19d6a..fd3f938ab7 100644 --- a/usr/src/uts/i86pc/vm/vm_dep.h +++ b/usr/src/uts/i86pc/vm/vm_dep.h @@ -323,6 +323,12 @@ extern void chk_lpg(page_t *, uchar_t); */ extern uint_t mmu_page_sizes; extern uint_t mmu_exported_page_sizes; +/* + * page sizes that legacy applications can see via getpagesizes(3c). + * Used to prevent legacy applications from inadvertantly using the + * 'new' large pagesizes (1g and above). + */ +extern uint_t mmu_legacy_page_sizes; /* For x86, userszc is the same as the kernel's szc */ #define USERSZC_2_SZC(userszc) (userszc) diff --git a/usr/src/uts/i86pc/vm/vm_machdep.c b/usr/src/uts/i86pc/vm/vm_machdep.c index b97f459245..fcb0e24b5d 100644 --- a/usr/src/uts/i86pc/vm/vm_machdep.c +++ b/usr/src/uts/i86pc/vm/vm_machdep.c @@ -283,6 +283,9 @@ uint_t mmu_page_sizes; /* How many page sizes the users can see */ uint_t mmu_exported_page_sizes; +/* page sizes that legacy applications can see */ +uint_t mmu_legacy_page_sizes; + /* * Number of pages in 1 GB. Don't enable automatic large pages if we have * fewer than this many pages. @@ -383,7 +386,7 @@ map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int memcntl) /* * use the pages size that best fits len */ - for (l = mmu.max_page_level; l > 0; --l) { + for (l = mmu.umax_page_level; l > 0; --l) { if (LEVEL_SIZE(l) > max_lpsize || len < LEVEL_SIZE(l)) { continue; } else { @@ -399,13 +402,12 @@ map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int memcntl) } return (pgsz); - /* - * for ISM use the 1st large page size. - */ case MAPPGSZ_ISM: - if (mmu.max_page_level == 0) - return (MMU_PAGESIZE); - return (LEVEL_SIZE(1)); + for (l = mmu.umax_page_level; l > 0; --l) { + if (len >= LEVEL_SIZE(l)) + return (LEVEL_SIZE(l)); + } + return (LEVEL_SIZE(0)); } return (pgsz); } @@ -425,7 +427,7 @@ map_szcvec(caddr_t addr, size_t size, uintptr_t off, size_t max_lpsize, return (0); } - for (i = mmu_page_sizes - 1; i > 0; i--) { + for (i = mmu_exported_page_sizes - 1; i > 0; i--) { pgsz = page_get_pagesize(i); if (pgsz > max_lpsize) { continue; @@ -719,7 +721,7 @@ map_addr_proc( */ align_amount = ELF_386_MAXPGSZ; } else { - int l = mmu.max_page_level; + int l = mmu.umax_page_level; while (l && len < LEVEL_SIZE(l)) --l; @@ -1482,6 +1484,8 @@ page_coloring_init(uint_t l2_sz, int l2_linesz, int l2_assoc) memranges += i; nranges -= i; + ASSERT(mmu_page_sizes <= MMU_PAGE_SIZES); + ASSERT(ISP2(l2_sz)); ASSERT(ISP2(l2_linesz)); ASSERT(l2_sz > MMU_PAGESIZE); diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h index 78df3a90c8..8a8f34e79c 100644 --- a/usr/src/uts/intel/sys/x86_archext.h +++ b/usr/src/uts/intel/sys/x86_archext.h @@ -147,7 +147,7 @@ extern "C" { #define CPUID_AMD_EDX_MMX 0x00800000 /* MMX instructions */ #define CPUID_AMD_EDX_FXSR 0x01000000 /* fxsave and fxrstor */ #define CPUID_AMD_EDX_FFXSR 0x02000000 /* fast fxsave/fxrstor */ - /* 0x04000000 - reserved */ +#define CPUID_AMD_EDX_1GPG 0x04000000 /* 1GB page */ #define CPUID_AMD_EDX_TSCP 0x08000000 /* rdtscp instruction */ /* 0x10000000 - reserved */ #define CPUID_AMD_EDX_LM 0x20000000 /* AMD: long mode */ @@ -329,6 +329,7 @@ extern "C" { #define X86_SSSE3 0x02000000 #define X86_SSE4_1 0x04000000 #define X86_SSE4_2 0x08000000 +#define X86_1GPG 0x10000000 /* * flags to patch tsc_read routine. diff --git a/usr/src/uts/sun4/io/trapstat.c b/usr/src/uts/sun4/io/trapstat.c index 73521a87c6..2258505b5a 100644 --- a/usr/src/uts/sun4/io/trapstat.c +++ b/usr/src/uts/sun4/io/trapstat.c @@ -2350,7 +2350,7 @@ trapstat_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) tstat_devi = devi; tstat_pgszs = page_num_pagesizes(); - tstat_user_pgszs = page_num_user_pagesizes(); + tstat_user_pgszs = page_num_user_pagesizes(0); tstat_data_t_size = sizeof (tstat_data_t) + (tstat_pgszs - 1) * sizeof (tstat_pgszdata_t); tstat_data_t_exported_size = sizeof (tstat_data_t) + diff --git a/usr/src/uts/sun4/vm/vm_dep.h b/usr/src/uts/sun4/vm/vm_dep.h index 357f9ba0a3..87f4b1619d 100644 --- a/usr/src/uts/sun4/vm/vm_dep.h +++ b/usr/src/uts/sun4/vm/vm_dep.h @@ -789,6 +789,7 @@ extern uint_t mmu_exported_page_sizes; extern uint_t szc_2_userszc[]; extern uint_t userszc_2_szc[]; +#define mmu_legacy_page_sizes mmu_exported_page_sizes #define USERSZC_2_SZC(userszc) (userszc_2_szc[userszc]) #define SZC_2_USERSZC(szc) (szc_2_userszc[szc]) |