diff options
author | susans <none@none> | 2006-10-26 16:44:53 -0700 |
---|---|---|
committer | susans <none@none> | 2006-10-26 16:44:53 -0700 |
commit | ec25b48f5e0576a68280c5e549673a266f0be346 (patch) | |
tree | 0809083d34488bf4261bc5614b23c9d61a8d1601 | |
parent | d7d10855241d89119833d1122507da070aaa6a9a (diff) | |
download | illumos-gate-ec25b48f5e0576a68280c5e549673a266f0be346.tar.gz |
6254029 memcntl() MC_HAT_ADVISE with page size 0 may cause segment page sizes to be demoted
6325885 map_pgszstk() uses p->p_brkpageszc rather than p->p_stkpageszc
6371967 assign large pages to anon segment created using mmap /dev/zero
6483208 unify and cleanup OOB (out of the box) large pagesize selection code
6483216 use intermediate pagesizes to map the beginning of bss/heap and stack when it may help performance
6483226 bss size is not properly taken into account by LP OOB policy at exec() time
6483230 grow_internal() doesn't properly align stack bottom for large pages
6483231 memcntl.c: ASSERT(IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz));
6483233 provide a mechanism to enable the use of 32M text pages on OPL by default
6485171 memcntl() shouldn't silently fail when stack space is unavailable with requested pagesize
28 files changed, 1096 insertions, 952 deletions
diff --git a/usr/src/uts/common/exec/aout/aout.c b/usr/src/uts/common/exec/aout/aout.c index 4e814b339b..178862e150 100644 --- a/usr/src/uts/common/exec/aout/aout.c +++ b/usr/src/uts/common/exec/aout/aout.c @@ -226,6 +226,7 @@ aoutexec(vnode_t *vp, struct execa *uap, struct uarg *args, edp.ux_bsize, edp.ux_doffset, dataprot, pagedata, 0)) goto done; + exenv.ex_bssbase = (caddr_t)edp.ux_datorg; exenv.ex_brkbase = (caddr_t)edp.ux_datorg; exenv.ex_brksize = edp.ux_dsize + edp.ux_bsize; exenv.ex_magic = edp.ux_mag; diff --git a/usr/src/uts/common/exec/elf/elf.c b/usr/src/uts/common/exec/elf/elf.c index 6508cdae85..53905c1013 100644 --- a/usr/src/uts/common/exec/elf/elf.c +++ b/usr/src/uts/common/exec/elf/elf.c @@ -1096,6 +1096,7 @@ mapelfexec( off_t offset; int hsize = ehdr->e_phentsize; caddr_t mintmp = (caddr_t)-1; + extern int use_brk_lpg; if (ehdr->e_type == ET_DYN) { /* @@ -1145,47 +1146,41 @@ mapelfexec( page = 0; } + /* + * Set the heap pagesize for OOB when the bss size + * is known and use_brk_lpg is not 0. + */ + if (brksize != NULL && use_brk_lpg && + zfodsz != 0 && phdr == dataphdrp && + (prot & PROT_WRITE)) { + size_t tlen = P2NPHASE((uintptr_t)addr + + phdr->p_filesz, PAGESIZE); + + if (zfodsz > tlen) { + curproc->p_brkpageszc = + page_szc(map_pgsz(MAPPGSZ_HEAP, + curproc, addr + phdr->p_filesz + + tlen, zfodsz - tlen, 0)); + } + } + if (curproc->p_brkpageszc != 0 && phdr == dataphdrp && (prot & PROT_WRITE)) { - /* - * segvn only uses large pages for segments - * that have the requested large page size - * aligned base and size. To insure the part - * of bss that starts at heap large page size - * boundary gets mapped by large pages create - * 2 bss segvn segments which is accomplished - * by calling execmap twice. First execmap - * will create the bss segvn segment that is - * before the large page boundary and it will - * be mapped with base pages. If bss start is - * already large page aligned only 1 bss - * segment will be created. The second bss - * segment's size is large page size aligned - * so that segvn uses large pages for that - * segment and it also makes the heap that - * starts right after bss to start at large - * page boundary. - */ uint_t szc = curproc->p_brkpageszc; size_t pgsz = page_get_pagesize(szc); - caddr_t zaddr = addr + phdr->p_filesz; - size_t zlen = P2NPHASE((uintptr_t)zaddr, pgsz); + caddr_t ebss = addr + phdr->p_memsz; + size_t extra_zfodsz; ASSERT(pgsz > PAGESIZE); + extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz); + if (error = execmap(vp, addr, phdr->p_filesz, - zlen, phdr->p_offset, prot, page, szc)) + zfodsz + extra_zfodsz, phdr->p_offset, + prot, page, szc)) goto bad; - if (zfodsz > zlen) { - zfodsz -= zlen; - zaddr += zlen; - zlen = P2ROUNDUP(zfodsz, pgsz); - if (error = execmap(vp, zaddr, 0, zlen, - phdr->p_offset, prot, page, szc)) - goto bad; - } if (brksize != NULL) - *brksize = zlen - zfodsz; + *brksize = extra_zfodsz; } else { if (error = execmap(vp, addr, phdr->p_filesz, zfodsz, phdr->p_offset, prot, page, 0)) diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c index 657d87300f..92e1c8402d 100644 --- a/usr/src/uts/common/os/exec.c +++ b/usr/src/uts/common/os/exec.c @@ -89,7 +89,6 @@ uint_t auxv_hwcap = 0; /* auxv AT_SUN_HWCAP value; determined on the fly */ uint_t auxv_hwcap32 = 0; /* 32-bit version of auxv_hwcap */ #endif -int exec_lpg_disable = 0; #define PSUIDFLAGS (SNOCD|SUGID) /* @@ -1114,7 +1113,23 @@ execmap(struct vnode *vp, caddr_t addr, size_t len, size_t zfodlen, error = ENOMEM; goto bad; } - crargs.szc = szc; + if (szc > 0) { + /* + * ASSERT alignment because the mapelfexec() + * caller for the szc > 0 case extended zfod + * so it's end is pgsz aligned. + */ + size_t pgsz = page_get_pagesize(szc); + ASSERT(IS_P2ALIGNED(zfodbase + zfodlen, pgsz)); + + if (IS_P2ALIGNED(zfodbase, pgsz)) { + crargs.szc = szc; + } else { + crargs.szc = AS_MAP_HEAP; + } + } else { + crargs.szc = AS_MAP_NO_LPOOB; + } if (error = as_map(p->p_as, (caddr_t)zfodbase, zfodlen, segvn_create, &crargs)) goto bad; @@ -1555,11 +1570,6 @@ stk_copyout(uarg_t *args, char *usrstack, void **auxvpp, user_t *up) return (0); } -#ifdef DEBUG -int mpss_brkpgszsel = 0; -int mpss_stkpgszsel = 0; -#endif - /* * Initialize a new user stack with the specified arguments and environment. * The initial user stack layout is as follows: @@ -1614,6 +1624,7 @@ exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp) rctl_entity_p_t e; struct as *as; + extern int use_stk_lpg; args->from_model = p->p_model; if (p->p_model == DATAMODEL_NATIVE) { @@ -1751,7 +1762,9 @@ exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp) p->p_brkbase = NULL; p->p_brksize = 0; + p->p_brkpageszc = 0; p->p_stksize = 0; + p->p_stkpageszc = 0; p->p_model = args->to_model; p->p_usrstack = usrstack; p->p_stkprot = args->stk_prot; @@ -1766,52 +1779,15 @@ exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp) e.rcep_t = RCENTITY_PROCESS; rctl_set_reset(p->p_rctls, p, &e); - if (exec_lpg_disable == 0) { -#ifdef DEBUG - uint_t pgsizes = page_num_pagesizes(); - uint_t szc; -#endif - p->p_brkpageszc = args->brkpageszc; - p->p_stkpageszc = args->stkpageszc; - - if (p->p_brkpageszc == 0) { - p->p_brkpageszc = page_szc(map_pgsz(MAPPGSZ_HEAP, - p, 0, 0, NULL)); - } - if (p->p_stkpageszc == 0) { - p->p_stkpageszc = page_szc(map_pgsz(MAPPGSZ_STK, - p, 0, 0, NULL)); - } - -#ifdef DEBUG - if (mpss_brkpgszsel != 0) { - if (mpss_brkpgszsel == -1) { - szc = ((uint_t)gethrtime() >> 8) % pgsizes; - } else { - szc = mpss_brkpgszsel % pgsizes; - } - p->p_brkpageszc = szc; - } - - if (mpss_stkpgszsel != 0) { - if (mpss_stkpgszsel == -1) { - szc = ((uint_t)gethrtime() >> 7) % pgsizes; - } else { - szc = mpss_stkpgszsel % pgsizes; - } - p->p_stkpageszc = szc; - } - -#endif - mutex_enter(&p->p_lock); - p->p_flag |= SAUTOLPG; /* kernel controls page sizes */ - mutex_exit(&p->p_lock); - - } else { - p->p_brkpageszc = 0; - p->p_stkpageszc = 0; + /* Too early to call map_pgsz for the heap */ + if (use_stk_lpg) { + p->p_stkpageszc = page_szc(map_pgsz(MAPPGSZ_STK, p, 0, 0, 0)); } + mutex_enter(&p->p_lock); + p->p_flag |= SAUTOLPG; /* kernel controls page sizes */ + mutex_exit(&p->p_lock); + exec_set_sp(size); as = as_alloc(); diff --git a/usr/src/uts/common/os/grow.c b/usr/src/uts/common/os/grow.c index c1e3326c67..115a1312fa 100644 --- a/usr/src/uts/common/os/grow.c +++ b/usr/src/uts/common/os/grow.c @@ -60,7 +60,6 @@ int use_brk_lpg = 1; int use_stk_lpg = 1; -int use_zmap_lpg = 1; static int brk_lpg(caddr_t nva); static int grow_lpg(caddr_t sp); @@ -96,12 +95,11 @@ brk_lpg(caddr_t nva) { struct proc *p = curproc; size_t pgsz, len; - caddr_t addr; + caddr_t addr, brkend; caddr_t bssbase = p->p_bssbase; caddr_t brkbase = p->p_brkbase; int oszc, szc; int err; - int remap = 0; oszc = p->p_brkpageszc; @@ -115,7 +113,7 @@ brk_lpg(caddr_t nva) len = nva - bssbase; - pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, &remap); + pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0); szc = page_szc(pgsz); /* @@ -133,28 +131,6 @@ brk_lpg(caddr_t nva) return (err); } - if (remap == 0) { - /* - * Map from the current brk end up to the new page size - * alignment using the current page size. - */ - addr = brkbase + p->p_brksize; - addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); - if (addr < nva) { - err = brk_internal(addr, oszc); - /* - * In failure case, try again if oszc is not base page - * size, then return err. - */ - if (err != 0) { - if (oszc != 0) { - err = brk_internal(nva, 0); - } - return (err); - } - } - } - err = brk_internal(nva, szc); /* If using szc failed, map with base page size and return. */ if (err != 0) { @@ -164,16 +140,18 @@ brk_lpg(caddr_t nva) return (err); } - if (remap != 0) { - /* - * Round up brk base to a large page boundary and remap - * anything in the segment already faulted in beyond that - * point. - */ - addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); - len = (brkbase + p->p_brksize) - addr; - /* advisory, so ignore errors */ + /* + * Round up brk base to a large page boundary and remap + * anything in the segment already faulted in beyond that + * point. + */ + addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); + brkend = brkbase + p->p_brksize; + len = brkend - addr; + /* Check that len is not negative. Update page size code for heap. */ + if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) { (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); + p->p_brkpageszc = szc; } ASSERT(err == 0); @@ -272,8 +250,26 @@ brk_internal(caddr_t nva, uint_t brkszc) /* * Add new zfod mapping to extend UNIX data segment + * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies + * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate + * page sizes if ova is not aligned to szc's pgsz. */ - crargs.szc = szc; + if (szc > 0) { + caddr_t rbss; + + rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, + pgsz); + if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) { + crargs.szc = p->p_brkpageszc ? p->p_brkpageszc : + AS_MAP_NO_LPOOB; + } else if (ova == rbss) { + crargs.szc = szc; + } else { + crargs.szc = AS_MAP_HEAP; + } + } else { + crargs.szc = AS_MAP_NO_LPOOB; + } crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP; error = as_map(as, ova, (size_t)(nva - ova), segvn_create, &crargs); @@ -288,7 +284,6 @@ brk_internal(caddr_t nva, uint_t brkszc) (void) as_unmap(as, nva, (size_t)(ova - nva)); } p->p_brksize = size; - p->p_brkpageszc = szc; return (0); } @@ -300,6 +295,9 @@ int grow(caddr_t sp) { struct proc *p = curproc; + struct as *as = p->p_as; + size_t oldsize = p->p_stksize; + size_t newsize; int err; /* @@ -307,13 +305,24 @@ grow(caddr_t sp) * This also serves as the lock protecting p_stksize * and p_stkpageszc. */ - as_rangelock(p->p_as); + as_rangelock(as); if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) { err = grow_lpg(sp); } else { err = grow_internal(sp, p->p_stkpageszc); } - as_rangeunlock(p->p_as); + as_rangeunlock(as); + + if (err == 0 && (newsize = p->p_stksize) > oldsize) { + ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE)); + ASSERT(IS_P2ALIGNED(newsize, PAGESIZE)); + /* + * Set up translations so the process doesn't have to fault in + * the stack pages we just gave it. + */ + (void) as_fault(as->a_hat, as, p->p_usrstack - newsize, + newsize - oldsize, F_INVAL, S_WRITE); + } return ((err == 0 ? 1 : 0)); } @@ -328,15 +337,15 @@ grow_lpg(caddr_t sp) struct proc *p = curproc; size_t pgsz; size_t len, newsize; - caddr_t addr, oldsp; + caddr_t addr, saddr; + caddr_t growend; int oszc, szc; int err; - int remap = 0; newsize = p->p_usrstack - sp; oszc = p->p_stkpageszc; - pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, &remap); + pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0); szc = page_szc(pgsz); /* @@ -357,30 +366,8 @@ grow_lpg(caddr_t sp) /* * We've grown sufficiently to switch to a new page size. - * If we're not going to remap the whole segment with the new - * page size, split the grow into two operations: map to the new - * page size alignment boundary with the existing page size, then - * map the rest with the new page size. + * So we are going to remap the whole segment with the new page size. */ - err = 0; - if (remap == 0) { - oldsp = p->p_usrstack - p->p_stksize; - addr = (caddr_t)P2ALIGN((uintptr_t)oldsp, pgsz); - if (addr > sp) { - err = grow_internal(addr, oszc); - /* - * In this case, grow with oszc failed, so grow all the - * way to sp with base page size. - */ - if (err != 0) { - if (oszc != 0) { - err = grow_internal(sp, 0); - } - return (err); - } - } - } - err = grow_internal(sp, szc); /* The grow with szc failed, so fall back to base page size. */ if (err != 0) { @@ -390,22 +377,21 @@ grow_lpg(caddr_t sp) return (err); } - if (remap) { - /* - * Round up stack pointer to a large page boundary and remap - * any pgsz pages in the segment already faulted in beyond that - * point. - */ - addr = p->p_usrstack - p->p_stksize; - addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); - len = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz) - addr; - /* advisory, so ignore errors */ + /* + * Round up stack pointer to a large page boundary and remap + * any pgsz pages in the segment already faulted in beyond that + * point. + */ + saddr = p->p_usrstack - p->p_stksize; + addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz); + growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz); + len = growend - addr; + /* Check that len is not negative. Update page size code for stack. */ + if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) { (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); + p->p_stkpageszc = szc; } - /* Update page size code for stack. */ - p->p_stkpageszc = szc; - ASSERT(err == 0); return (err); /* should always be 0 */ } @@ -418,8 +404,7 @@ int grow_internal(caddr_t sp, uint_t growszc) { struct proc *p = curproc; - struct as *as = p->p_as; - size_t newsize = p->p_usrstack - sp; + size_t newsize; size_t oldsize; int error; size_t pgsz; @@ -427,6 +412,7 @@ grow_internal(caddr_t sp, uint_t growszc) struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); ASSERT(sp < p->p_usrstack); + sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE); /* * grow to growszc alignment but use current p->p_stkpageszc for @@ -437,7 +423,7 @@ grow_internal(caddr_t sp, uint_t growszc) if ((szc = growszc) != 0) { pgsz = page_get_pagesize(szc); ASSERT(pgsz > PAGESIZE); - newsize = P2ROUNDUP(newsize, pgsz); + newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz); if (newsize > (size_t)p->p_stk_ctl) { szc = 0; pgsz = PAGESIZE; @@ -445,6 +431,7 @@ grow_internal(caddr_t sp, uint_t growszc) } } else { pgsz = PAGESIZE; + newsize = p->p_usrstack - sp; } if (newsize > (size_t)p->p_stk_ctl) { @@ -455,7 +442,6 @@ grow_internal(caddr_t sp, uint_t growszc) } oldsize = p->p_stksize; - newsize = P2ROUNDUP(newsize, pgsz); ASSERT(P2PHASE(oldsize, PAGESIZE) == 0); if (newsize <= oldsize) { /* prevent the stack from shrinking */ @@ -466,13 +452,31 @@ grow_internal(caddr_t sp, uint_t growszc) crargs.prot &= ~PROT_EXEC; } /* - * extend stack with the p_stkpageszc. growszc is different than - * p_stkpageszc only on a memcntl to increase the stack pagesize. + * extend stack with the proposed new growszc, which is different + * than p_stkpageszc only on a memcntl to increase the stack pagesize. + * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via + * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes + * if not aligned to szc's pgsz. */ - crargs.szc = p->p_stkpageszc; + if (szc > 0) { + caddr_t oldsp = p->p_usrstack - oldsize; + caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, + pgsz); + + if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) { + crargs.szc = p->p_stkpageszc ? p->p_stkpageszc : + AS_MAP_NO_LPOOB; + } else if (oldsp == austk) { + crargs.szc = szc; + } else { + crargs.szc = AS_MAP_STACK; + } + } else { + crargs.szc = AS_MAP_NO_LPOOB; + } crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN; - if ((error = as_map(as, p->p_usrstack - newsize, newsize - oldsize, + if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize, segvn_create, &crargs)) != 0) { if (error == EAGAIN) { cmn_err(CE_WARN, "Sorry, no swap space to grow stack " @@ -481,15 +485,6 @@ grow_internal(caddr_t sp, uint_t growszc) return (error); } p->p_stksize = newsize; - - - /* - * Set up translations so the process doesn't have to fault in - * the stack pages we just gave it. - */ - (void) as_fault(as->a_hat, as, - p->p_usrstack - newsize, newsize - oldsize, F_INVAL, S_WRITE); - return (0); } @@ -500,13 +495,7 @@ static int zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags, offset_t pos) { - struct segvn_crargs a, b; - struct proc *p = curproc; - int err; - size_t pgsz; - size_t l0, l1, l2, l3, l4; /* 0th through 5th chunks */ - caddr_t ruaddr, ruaddr0; /* rounded up addresses */ - extern size_t auto_lpg_va_default; + struct segvn_crargs vn_a; if (((PROT_ALL & uprot) != uprot)) return (EACCES); @@ -549,130 +538,18 @@ zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags, * Use the seg_vn segment driver; passing in the NULL amp * gives the desired "cloning" effect. */ - a.vp = NULL; - a.offset = 0; - a.type = flags & MAP_TYPE; - a.prot = uprot; - a.maxprot = PROT_ALL; - a.flags = flags & ~MAP_TYPE; - a.cred = CRED(); - a.amp = NULL; - a.szc = 0; - a.lgrp_mem_policy_flags = 0; - - /* - * Call arch-specific map_pgsz routine to pick best page size to map - * this segment, and break the mapping up into parts if required. - * - * The parts work like this: - * - * addr --------- - * | | l0 - * --------- - * | | l1 - * --------- - * | | l2 - * --------- - * | | l3 - * --------- - * | | l4 - * --------- - * addr+len - * - * Starting from the middle, l2 is the number of bytes mapped by the - * selected large page. l1 and l3 are mapped by auto_lpg_va_default - * page size pages, and l0 and l4 are mapped by base page size pages. - * If auto_lpg_va_default is the base page size, then l0 == l4 == 0. - * If the requested address or length are aligned to the selected large - * page size, l1 or l3 may also be 0. - */ - if (use_zmap_lpg && a.type == MAP_PRIVATE) { - - pgsz = map_pgsz(MAPPGSZ_VA, p, *addrp, len, NULL); - if (pgsz <= PAGESIZE || len < pgsz) { - return (as_map(as, *addrp, len, segvn_create, &a)); - } - - ruaddr = (caddr_t)P2ROUNDUP((uintptr_t)*addrp, pgsz); - if (auto_lpg_va_default != MMU_PAGESIZE) { - ruaddr0 = (caddr_t)P2ROUNDUP((uintptr_t)*addrp, - auto_lpg_va_default); - l0 = ruaddr0 - *addrp; - } else { - l0 = 0; - ruaddr0 = *addrp; - } - l1 = ruaddr - ruaddr0; - l3 = P2PHASE(len - l0 - l1, pgsz); - if (auto_lpg_va_default == MMU_PAGESIZE) { - l4 = 0; - } else { - l4 = P2PHASE(l3, auto_lpg_va_default); - l3 -= l4; - } - l2 = len - l0 - l1 - l3 - l4; - - if (l0) { - b = a; - err = as_map(as, *addrp, l0, segvn_create, &b); - if (err) { - return (err); - } - } - - if (l1) { - b = a; - b.szc = page_szc(auto_lpg_va_default); - err = as_map(as, ruaddr0, l1, segvn_create, &b); - if (err) { - goto error1; - } - } - - if (l2) { - b = a; - b.szc = page_szc(pgsz); - err = as_map(as, ruaddr, l2, segvn_create, &b); - if (err) { - goto error2; - } - } - - if (l3) { - b = a; - b.szc = page_szc(auto_lpg_va_default); - err = as_map(as, ruaddr + l2, l3, segvn_create, &b); - if (err) { - goto error3; - } - } - if (l4) { - err = as_map(as, ruaddr + l2 + l3, l4, segvn_create, - &a); - if (err) { -error3: - if (l3) { - (void) as_unmap(as, ruaddr + l2, l3); - } -error2: - if (l2) { - (void) as_unmap(as, ruaddr, l2); - } -error1: - if (l1) { - (void) as_unmap(as, ruaddr0, l1); - } - if (l0) { - (void) as_unmap(as, *addrp, l0); - } - return (err); - } - } - - return (0); - } - - return (as_map(as, *addrp, len, segvn_create, &a)); + vn_a.vp = NULL; + vn_a.offset = 0; + vn_a.type = flags & MAP_TYPE; + vn_a.prot = uprot; + vn_a.maxprot = PROT_ALL; + vn_a.flags = flags & ~MAP_TYPE; + vn_a.cred = CRED(); + vn_a.amp = NULL; + vn_a.szc = 0; + vn_a.lgrp_mem_policy_flags = 0; + + return (as_map(as, *addrp, len, segvn_create, &vn_a)); } static int diff --git a/usr/src/uts/common/os/shm.c b/usr/src/uts/common/os/shm.c index 5c03ab7803..5650eb3b2e 100644 --- a/usr/src/uts/common/os/shm.c +++ b/usr/src/uts/common/os/shm.c @@ -341,8 +341,7 @@ shmat(int shmid, caddr_t uaddr, int uflags, uintptr_t *rvp) * [D]ISM segment, then use the previously selected page size. */ if (!isspt(sp)) { - share_size = map_pgsz(MAPPGSZ_ISM, - pp, addr, size, NULL); + share_size = map_pgsz(MAPPGSZ_ISM, pp, addr, size, 0); if (share_size == 0) { as_rangeunlock(as); error = EINVAL; diff --git a/usr/src/uts/common/sys/exec.h b/usr/src/uts/common/sys/exec.h index a5eaf18edd..de8a940db7 100644 --- a/usr/src/uts/common/sys/exec.h +++ b/usr/src/uts/common/sys/exec.h @@ -101,8 +101,6 @@ typedef struct uarg { size_t from_ptrsize; size_t ncargs; struct execsw *execswp; - uint_t stkpageszc; - uint_t brkpageszc; uintptr_t entry; uintptr_t thrptr; char *emulator; diff --git a/usr/src/uts/common/sys/vmsystm.h b/usr/src/uts/common/sys/vmsystm.h index 1f0aea0235..f00941f17e 100644 --- a/usr/src/uts/common/sys/vmsystm.h +++ b/usr/src/uts/common/sys/vmsystm.h @@ -105,6 +105,14 @@ extern pgcnt_t pages_before_pager; /* XXX */ #define MAPPGSZ_HEAP 0x04 #define MAPPGSZ_ISM 0x08 +/* + * Flags for map_pgszcvec + */ +#define MAPPGSZC_SHM 0x01 +#define MAPPGSZC_PRIVM 0x02 +#define MAPPGSZC_STACK 0x04 +#define MAPPGSZC_HEAP 0x08 + struct as; struct page; struct anon; @@ -118,10 +126,10 @@ extern int valid_va_range(caddr_t *basep, size_t *lenp, size_t minlen, int dir); extern int valid_usr_range(caddr_t, size_t, uint_t, struct as *, caddr_t); extern int useracc(void *, size_t, int); -extern size_t map_pgsz(int maptype, struct proc *p, caddr_t addr, - size_t len, int *remap); -extern uint_t map_execseg_pgszcvec(int, caddr_t, size_t); -extern uint_t map_shm_pgszcvec(caddr_t, size_t, uintptr_t); +extern size_t map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, + int memcntl); +extern uint_t map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, + int type, int memcntl); extern void map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags); extern int map_addr_vacalign_check(caddr_t, u_offset_t); diff --git a/usr/src/uts/common/syscall/memcntl.c b/usr/src/uts/common/syscall/memcntl.c index 6bdf5a1cc2..5bff588641 100644 --- a/usr/src/uts/common/syscall/memcntl.c +++ b/usr/src/uts/common/syscall/memcntl.c @@ -199,31 +199,36 @@ memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask) else type = MAPPGSZ_STK; - pgsz = map_pgsz(type, p, 0, 0, NULL); + pgsz = map_pgsz(type, p, 0, 0, 1); } } else { /* + * addr and len must be valid for range specified. + */ + if (valid_usr_range(addr, len, 0, as, + as->a_userlimit) != RANGE_OKAY) { + return (set_errno(ENOMEM)); + } + /* * Note that we don't disable automatic large page * selection for anon segments based on use of * memcntl(). */ if (pgsz == 0) { - pgsz = map_pgsz(MAPPGSZ_VA, p, addr, len, - NULL); + error = as_set_default_lpsize(as, addr, len); + if (error) { + (void) set_errno(error); + } + return (error); } /* * addr and len must be prefered page size aligned - * and valid for range specified. */ if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(len, pgsz)) { return (set_errno(EINVAL)); } - if (valid_usr_range(addr, len, 0, as, - as->a_userlimit) != RANGE_OKAY) { - return (set_errno(ENOMEM)); - } } szc = mem_getpgszc(pgsz); @@ -257,10 +262,17 @@ memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask) return (set_errno(error)); } } + /* + * It is possible for brk_internal to silently fail to + * promote the heap size, so don't panic or ASSERT. + */ + if (!IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz)) { + as_rangeunlock(as); + return (set_errno(ENOMEM)); + } oszc = p->p_brkpageszc; p->p_brkpageszc = szc; - ASSERT(IS_P2ALIGNED(p->p_brkbase + p->p_brksize, pgsz)); addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); len = (p->p_brkbase + p->p_brksize) - addr; @@ -292,17 +304,24 @@ memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask) } if (szc > p->p_stkpageszc) { - error = grow_internal(p->p_usrstack - - p->p_stksize, szc); + error = grow_internal(p->p_usrstack - + p->p_stksize, szc); if (error) { as_rangeunlock(as); return (set_errno(error)); } } + /* + * It is possible for grow_internal to silently fail to + * promote the stack size, so don't panic or ASSERT. + */ + if (!IS_P2ALIGNED(p->p_usrstack - p->p_stksize, pgsz)) { + as_rangeunlock(as); + return (set_errno(ENOMEM)); + } oszc = p->p_stkpageszc; p->p_stkpageszc = szc; - ASSERT(IS_P2ALIGNED(p->p_usrstack, pgsz)); addr = p->p_usrstack - p->p_stksize; len = p->p_stksize; diff --git a/usr/src/uts/common/vm/as.h b/usr/src/uts/common/vm/as.h index f1c7ea3cfa..6272f3aa91 100644 --- a/usr/src/uts/common/vm/as.h +++ b/usr/src/uts/common/vm/as.h @@ -153,6 +153,13 @@ struct as { (((as)->a_userlimit > (caddr_t)UINT32_MAX) ? 1 : 0) /* + * Flags for as_map/as_map_ansegs + */ +#define AS_MAP_NO_LPOOB ((uint_t)-1) +#define AS_MAP_HEAP ((uint_t)-2) +#define AS_MAP_STACK ((uint_t)-3) + +/* * The as_callback is the basic structure which supports the ability to * inform clients of specific events pertaining to address space management. * A user calls as_add_callback to register an address space callback @@ -274,6 +281,7 @@ void as_pagereclaim(struct as *as, struct page **pp, caddr_t addr, size_t size, enum seg_rw rw); int as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc, boolean_t wait); +int as_set_default_lpsize(struct as *as, caddr_t addr, size_t size); void as_setwatch(struct as *as); void as_clearwatch(struct as *as); int as_getmemid(struct as *, caddr_t, memid_t *); diff --git a/usr/src/uts/common/vm/hat.h b/usr/src/uts/common/vm/hat.h index fcf856bb9b..dd0aaed47d 100644 --- a/usr/src/uts/common/vm/hat.h +++ b/usr/src/uts/common/vm/hat.h @@ -345,7 +345,7 @@ void hat_setstat(struct as *, caddr_t, size_t, uint_t); * hat layer data structures. This flag forces hat layer * to tap its reserves in order to prevent infinite * recursion. - * HAT_LOAD_AUTOLPG Get MMU specific disable_auto_large_pages + * HAT_LOAD_TEXT A flag to hat_memload() to indicate loading text pages. */ /* @@ -362,7 +362,15 @@ void hat_setstat(struct as *, caddr_t, size_t, uint_t); #define HAT_RELOAD_SHARE 0x100 #define HAT_NO_KALLOC 0x200 #define HAT_LOAD_TEXT 0x400 -#define HAT_LOAD_AUTOLPG 0x800 + +/* + * Flags for initializing disable_*large_pages. + * + * HAT_AUTO_TEXT Get MMU specific disable_auto_text_large_pages + * HAT_AUTO_DATA Get MMU specific disable_auto_data_large_pages + */ +#define HAT_AUTO_TEXT 0x800 +#define HAT_AUTO_DATA 0x1000 /* * Attributes for hat_memload/hat_devload/hat_*attr diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c index cf1de64089..f48db44acc 100644 --- a/usr/src/uts/common/vm/seg_vn.c +++ b/usr/src/uts/common/vm/seg_vn.c @@ -395,7 +395,7 @@ segvn_create(struct seg *seg, void *argsp) a->flags &= ~MAP_NORESERVE; if (a->szc != 0) { - if (segvn_lpg_disable != 0 || + if (segvn_lpg_disable != 0 || (a->szc == AS_MAP_NO_LPOOB) || (a->amp != NULL && a->type == MAP_PRIVATE) || (a->flags & MAP_NORESERVE) || seg->s_as == &kas) { a->szc = 0; @@ -5270,8 +5270,9 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) err = segvn_demote_range(seg, addr, len, SDR_END, 0); } else { - uint_t szcvec = map_shm_pgszcvec(seg->s_base, - pgsz, (uintptr_t)seg->s_base); + uint_t szcvec = map_pgszcvec(seg->s_base, + pgsz, (uintptr_t)seg->s_base, + (svd->flags & MAP_TEXT), MAPPGSZC_SHM, 0); err = segvn_demote_range(seg, addr, len, SDR_END, szcvec); } @@ -6267,7 +6268,8 @@ segvn_gettype(struct seg *seg, caddr_t addr) ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); - return (svd->type | (svd->flags & MAP_NORESERVE)); + return (svd->type | (svd->flags & (MAP_NORESERVE | MAP_TEXT | + MAP_INITDATA))); } /*ARGSUSED*/ diff --git a/usr/src/uts/common/vm/seg_vn.h b/usr/src/uts/common/vm/seg_vn.h index 10cd0f1835..1ef18ee142 100644 --- a/usr/src/uts/common/vm/seg_vn.h +++ b/usr/src/uts/common/vm/seg_vn.h @@ -137,16 +137,18 @@ typedef struct segvn_data { #define SEGVN_ZFOD_ARGS(prot, max) \ { NULL, NULL, 0, MAP_PRIVATE, prot, max, 0, NULL, 0, 0 } -#define AS_MAP_VNSEGS_USELPGS(crfp, argsp) \ +#define AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp) \ ((crfp) == (int (*)())segvn_create && \ (((struct segvn_crargs *)(argsp))->flags & \ (MAP_TEXT | MAP_INITDATA)) && \ - ((struct segvn_crargs *)(argsp))->vp != NULL && \ - ((struct segvn_crargs *)(argsp))->amp == NULL) + ((struct segvn_crargs *)(argsp))->szc == 0 && \ + ((struct segvn_crargs *)(argsp))->vp != NULL) -#define AS_MAP_SHAMP(crfp, argsp) \ +#define AS_MAP_CHECK_ANON_LPOOB(crfp, argsp) \ ((crfp) == (int (*)())segvn_create && \ - ((struct segvn_crargs *)(argsp))->type == MAP_SHARED && \ + (((struct segvn_crargs *)(argsp))->szc == 0 || \ + ((struct segvn_crargs *)(argsp))->szc == AS_MAP_HEAP || \ + ((struct segvn_crargs *)(argsp))->szc == AS_MAP_STACK) && \ ((struct segvn_crargs *)(argsp))->vp == NULL) extern void segvn_init(void); diff --git a/usr/src/uts/common/vm/vm_as.c b/usr/src/uts/common/vm/vm_as.c index 4e807fd670..f0e09e3ee3 100644 --- a/usr/src/uts/common/vm/vm_as.c +++ b/usr/src/uts/common/vm/vm_as.c @@ -1573,8 +1573,10 @@ static int as_map_vnsegs(struct as *as, caddr_t addr, size_t size, int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) { - int text = vn_a->flags & MAP_TEXT; - uint_t szcvec = map_execseg_pgszcvec(text, addr, size); + uint_t mapflags = vn_a->flags & (MAP_TEXT | MAP_INITDATA); + int type = (vn_a->type == MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM; + uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags, + type, 0); int error; struct seg *seg; struct vattr va; @@ -1616,7 +1618,8 @@ again: save_size = size; size = va.va_size - (vn_a->offset & PAGEMASK); size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t); - szcvec = map_execseg_pgszcvec(text, addr, size); + szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, mapflags, + type, 0); if (szcvec <= 1) { size = save_size; goto again; @@ -1637,14 +1640,32 @@ again: return (0); } +/* + * as_map_ansegs: shared or private anonymous memory. Note that the flags + * passed to map_pgszvec cannot be MAP_INITDATA, for anon. + */ static int -as_map_sham(struct as *as, caddr_t addr, size_t size, +as_map_ansegs(struct as *as, caddr_t addr, size_t size, int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) { - uint_t szcvec = map_shm_pgszcvec(addr, size, - vn_a->amp == NULL ? (uintptr_t)addr : - (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE)); - + uint_t szcvec; + uchar_t type; + + ASSERT(vn_a->type == MAP_SHARED || vn_a->type == MAP_PRIVATE); + if (vn_a->type == MAP_SHARED) { + type = MAPPGSZC_SHM; + } else if (vn_a->type == MAP_PRIVATE) { + if (vn_a->szc == AS_MAP_HEAP) { + type = MAPPGSZC_HEAP; + } else if (vn_a->szc == AS_MAP_STACK) { + type = MAPPGSZC_STACK; + } else { + type = MAPPGSZC_PRIVM; + } + } + szcvec = map_pgszcvec(addr, size, vn_a->amp == NULL ? + (uintptr_t)addr : (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE), + (vn_a->flags & MAP_TEXT), type, 0); ASSERT(AS_WRITE_HELD(as, &as->a_lock)); ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); ASSERT(IS_P2ALIGNED(size, PAGESIZE)); @@ -1669,6 +1690,7 @@ as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(), caddr_t raddr; /* rounded down addr */ size_t rsize; /* rounded up size */ int error; + int unmap = 0; struct proc *p = curproc; raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); @@ -1695,15 +1717,19 @@ as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(), return (ENOMEM); } - if (AS_MAP_VNSEGS_USELPGS(crfp, argsp) || AS_MAP_SHAMP(crfp, argsp)) { - int unmap = 0; - if (AS_MAP_SHAMP(crfp, argsp)) { - error = as_map_sham(as, raddr, rsize, crfp, - (struct segvn_crargs *)argsp, &unmap); - } else { - error = as_map_vnsegs(as, raddr, rsize, crfp, - (struct segvn_crargs *)argsp, &unmap); + if (AS_MAP_CHECK_VNODE_LPOOB(crfp, argsp)) { + error = as_map_vnsegs(as, raddr, rsize, crfp, + (struct segvn_crargs *)argsp, &unmap); + if (error != 0) { + AS_LOCK_EXIT(as, &as->a_lock); + if (unmap) { + (void) as_unmap(as, addr, size); + } + return (error); } + } else if (AS_MAP_CHECK_ANON_LPOOB(crfp, argsp)) { + error = as_map_ansegs(as, raddr, rsize, crfp, + (struct segvn_crargs *)argsp, &unmap); if (error != 0) { AS_LOCK_EXIT(as, &as->a_lock); if (unmap) { @@ -2741,6 +2767,377 @@ setpgsz_top: } /* + * as_iset3_default_lpsize() just calls SEGOP_SETPAGESIZE() on all segments + * in its chunk where s_szc is less than the szc we want to set. + */ +static int +as_iset3_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc, + int *retry) +{ + struct seg *seg; + size_t ssize; + int error; + + seg = as_segat(as, raddr); + if (seg == NULL) { + panic("as_iset3_default_lpsize: no seg"); + } + + for (; rsize != 0; rsize -= ssize, raddr += ssize) { + if (raddr >= seg->s_base + seg->s_size) { + seg = AS_SEGNEXT(as, seg); + if (seg == NULL || raddr != seg->s_base) { + panic("as_iset3_default_lpsize: as changed"); + } + } + if ((raddr + rsize) > (seg->s_base + seg->s_size)) { + ssize = seg->s_base + seg->s_size - raddr; + } else { + ssize = rsize; + } + + if (szc > seg->s_szc) { + error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc); + /* Only retry on EINVAL segments that have no vnode. */ + if (error == EINVAL) { + vnode_t *vp = NULL; + if ((SEGOP_GETTYPE(seg, raddr) & MAP_SHARED) && + (SEGOP_GETVP(seg, raddr, &vp) != 0 || + vp == NULL)) { + *retry = 1; + } else { + *retry = 0; + } + } + if (error) { + return (error); + } + } + } + return (0); +} + +/* + * as_iset2_default_lpsize() calls as_iset3_default_lpsize() to set the + * pagesize on each segment in its range, but if any fails with EINVAL, + * then it reduces the pagesizes to the next size in the bitmap and + * retries as_iset3_default_lpsize(). The reason why the code retries + * smaller allowed sizes on EINVAL is because (a) the anon offset may not + * match the bigger sizes, and (b) it's hard to get this offset (to begin + * with) to pass to map_pgszcvec(). + */ +static int +as_iset2_default_lpsize(struct as *as, caddr_t addr, size_t size, uint_t szc, + uint_t szcvec) +{ + int error; + int retry; + + for (;;) { + error = as_iset3_default_lpsize(as, addr, size, szc, &retry); + if (error == EINVAL && retry) { + szcvec &= ~(1 << szc); + if (szcvec <= 1) { + return (EINVAL); + } + szc = highbit(szcvec) - 1; + } else { + return (error); + } + } +} + +/* + * as_iset1_default_lpsize() breaks its chunk into areas where existing + * segments have a smaller szc than we want to set. For each such area, + * it calls as_iset2_default_lpsize() + */ +static int +as_iset1_default_lpsize(struct as *as, caddr_t raddr, size_t rsize, uint_t szc, + uint_t szcvec) +{ + struct seg *seg; + size_t ssize; + caddr_t setaddr = raddr; + size_t setsize = 0; + int set; + int error; + + ASSERT(AS_WRITE_HELD(as, &as->a_lock)); + + seg = as_segat(as, raddr); + if (seg == NULL) { + panic("as_iset1_default_lpsize: no seg"); + } + if (seg->s_szc < szc) { + set = 1; + } else { + set = 0; + } + + for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) { + if (raddr >= seg->s_base + seg->s_size) { + seg = AS_SEGNEXT(as, seg); + if (seg == NULL || raddr != seg->s_base) { + panic("as_iset1_default_lpsize: as changed"); + } + if (seg->s_szc >= szc && set) { + ASSERT(setsize != 0); + error = as_iset2_default_lpsize(as, + setaddr, setsize, szc, szcvec); + if (error) { + return (error); + } + set = 0; + } else if (seg->s_szc < szc && !set) { + setaddr = raddr; + setsize = 0; + set = 1; + } + } + if ((raddr + rsize) > (seg->s_base + seg->s_size)) { + ssize = seg->s_base + seg->s_size - raddr; + } else { + ssize = rsize; + } + } + error = 0; + if (set) { + ASSERT(setsize != 0); + error = as_iset2_default_lpsize(as, setaddr, setsize, + szc, szcvec); + } + return (error); +} + +/* + * as_iset_default_lpsize() breaks its chunk according to the size code bitmap + * returned by map_pgszcvec() (similar to as_map_segvn_segs()), and passes each + * chunk to as_iset1_default_lpsize(). + */ +static int +as_iset_default_lpsize(struct as *as, caddr_t addr, size_t size, int flags, + int type) +{ + int rtype = (type & MAP_SHARED) ? MAPPGSZC_SHM : MAPPGSZC_PRIVM; + uint_t szcvec = map_pgszcvec(addr, size, (uintptr_t)addr, + flags, rtype, 1); + uint_t szc; + uint_t nszc; + int error; + caddr_t a; + caddr_t eaddr; + size_t segsize; + size_t pgsz; + uint_t save_szcvec; + + ASSERT(AS_WRITE_HELD(as, &as->a_lock)); + ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); + ASSERT(IS_P2ALIGNED(size, PAGESIZE)); + + szcvec &= ~1; + if (szcvec <= 1) { /* skip if base page size */ + return (0); + } + + /* Get the pagesize of the first larger page size. */ + szc = lowbit(szcvec) - 1; + pgsz = page_get_pagesize(szc); + eaddr = addr + size; + addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); + eaddr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); + + save_szcvec = szcvec; + szcvec >>= (szc + 1); + nszc = szc; + while (szcvec) { + if ((szcvec & 0x1) == 0) { + nszc++; + szcvec >>= 1; + continue; + } + nszc++; + pgsz = page_get_pagesize(nszc); + a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); + if (a != addr) { + ASSERT(szc > 0); + ASSERT(a < eaddr); + segsize = a - addr; + error = as_iset1_default_lpsize(as, addr, segsize, szc, + save_szcvec); + if (error) { + return (error); + } + addr = a; + } + szc = nszc; + szcvec >>= 1; + } + + ASSERT(addr < eaddr); + szcvec = save_szcvec; + while (szcvec) { + a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); + ASSERT(a >= addr); + if (a != addr) { + ASSERT(szc > 0); + segsize = a - addr; + error = as_iset1_default_lpsize(as, addr, segsize, szc, + save_szcvec); + if (error) { + return (error); + } + addr = a; + } + szcvec &= ~(1 << szc); + if (szcvec) { + szc = highbit(szcvec) - 1; + pgsz = page_get_pagesize(szc); + } + } + ASSERT(addr == eaddr); + + return (0); +} + +/* + * Set the default large page size for the range. Called via memcntl with + * page size set to 0. as_set_default_lpsize breaks the range down into + * chunks with the same type/flags, ignores-non segvn segments, and passes + * each chunk to as_iset_default_lpsize(). + */ +int +as_set_default_lpsize(struct as *as, caddr_t addr, size_t size) +{ + struct seg *seg; + caddr_t raddr; + size_t rsize; + size_t ssize; + int rtype, rflags; + int stype, sflags; + int error; + caddr_t setaddr; + size_t setsize; + int segvn; + + if (size == 0) + return (0); + + AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); +again: + error = 0; + + raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); + rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - + (size_t)raddr; + + if (raddr + rsize < raddr) { /* check for wraparound */ + AS_LOCK_EXIT(as, &as->a_lock); + return (ENOMEM); + } + as_clearwatchprot(as, raddr, rsize); + seg = as_segat(as, raddr); + if (seg == NULL) { + as_setwatch(as); + AS_LOCK_EXIT(as, &as->a_lock); + return (ENOMEM); + } + if (seg->s_ops == &segvn_ops) { + rtype = SEGOP_GETTYPE(seg, addr); + rflags = rtype & (MAP_TEXT | MAP_INITDATA); + rtype = rtype & (MAP_SHARED | MAP_PRIVATE); + segvn = 1; + } else { + segvn = 0; + } + setaddr = raddr; + setsize = 0; + + for (; rsize != 0; rsize -= ssize, raddr += ssize, setsize += ssize) { + if (raddr >= (seg->s_base + seg->s_size)) { + seg = AS_SEGNEXT(as, seg); + if (seg == NULL || raddr != seg->s_base) { + error = ENOMEM; + break; + } + if (seg->s_ops == &segvn_ops) { + stype = SEGOP_GETTYPE(seg, raddr); + sflags = stype & (MAP_TEXT | MAP_INITDATA); + stype &= (MAP_SHARED | MAP_PRIVATE); + if (segvn && (rflags != sflags || + rtype != stype)) { + /* + * The next segment is also segvn but + * has different flags and/or type. + */ + ASSERT(setsize != 0); + error = as_iset_default_lpsize(as, + setaddr, setsize, rflags, rtype); + if (error) { + break; + } + rflags = sflags; + rtype = stype; + setaddr = raddr; + setsize = 0; + } else if (!segvn) { + rflags = sflags; + rtype = stype; + setaddr = raddr; + setsize = 0; + segvn = 1; + } + } else if (segvn) { + /* The next segment is not segvn. */ + ASSERT(setsize != 0); + error = as_iset_default_lpsize(as, + setaddr, setsize, rflags, rtype); + if (error) { + break; + } + segvn = 0; + } + } + if ((raddr + rsize) > (seg->s_base + seg->s_size)) { + ssize = seg->s_base + seg->s_size - raddr; + } else { + ssize = rsize; + } + } + if (error == 0 && segvn) { + /* The last chunk when rsize == 0. */ + ASSERT(setsize != 0); + error = as_iset_default_lpsize(as, setaddr, setsize, + rflags, rtype); + } + + if (error == IE_RETRY) { + goto again; + } else if (error == IE_NOMEM) { + error = EAGAIN; + } else if (error == ENOTSUP) { + error = EINVAL; + } else if (error == EAGAIN) { + mutex_enter(&as->a_contents); + if (AS_ISUNMAPWAIT(as) == 0) { + cv_broadcast(&as->a_cv); + } + AS_SETUNMAPWAIT(as); + AS_LOCK_EXIT(as, &as->a_lock); + while (AS_ISUNMAPWAIT(as)) { + cv_wait(&as->a_cv, &as->a_contents); + } + mutex_exit(&as->a_contents); + AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); + goto again; + } + + as_setwatch(as); + AS_LOCK_EXIT(as, &as->a_lock); + return (error); +} + +/* * Setup all of the uninitialized watched pages that we can. */ void diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c index f050d93b2a..6aa7b40ecb 100644 --- a/usr/src/uts/i86pc/os/startup.c +++ b/usr/src/uts/i86pc/os/startup.c @@ -1475,8 +1475,7 @@ startup_vm(void) extern void hat_kern_setup(void); pgcnt_t pages_left; - extern int exec_lpg_disable, use_brk_lpg, use_stk_lpg, use_zmap_lpg; - extern pgcnt_t auto_lpg_min_physmem; + extern int use_brk_lpg, use_stk_lpg; PRM_POINT("startup_vm() starting..."); @@ -1729,11 +1728,21 @@ startup_vm(void) * disable automatic large pages for small memory systems or * when the disable flag is set. */ - if (physmem < auto_lpg_min_physmem || auto_lpg_disable) { - exec_lpg_disable = 1; + if (!auto_lpg_disable && mmu.max_page_level > 0) { + max_uheap_lpsize = LEVEL_SIZE(1); + max_ustack_lpsize = LEVEL_SIZE(1); + max_privmap_lpsize = LEVEL_SIZE(1); + max_uidata_lpsize = LEVEL_SIZE(1); + max_utext_lpsize = LEVEL_SIZE(1); + max_shm_lpsize = LEVEL_SIZE(1); + } + if (physmem < privm_lpg_min_physmem || mmu.max_page_level == 0 || + auto_lpg_disable) { use_brk_lpg = 0; use_stk_lpg = 0; - use_zmap_lpg = 0; + } + if (mmu.max_page_level > 0) { + mcntl0_lpsize = LEVEL_SIZE(1); } PRM_POINT("Calling hat_init_finish()..."); diff --git a/usr/src/uts/i86pc/vm/vm_dep.h b/usr/src/uts/i86pc/vm/vm_dep.h index 5b4bbb41d5..5435580746 100644 --- a/usr/src/uts/i86pc/vm/vm_dep.h +++ b/usr/src/uts/i86pc/vm/vm_dep.h @@ -569,6 +569,29 @@ extern int l2cache_sz, l2cache_linesz, l2cache_assoc; #define PGI_MT_RANGE (PGI_MT_RANGE0 | PGI_MT_RANGE16M | PGI_MT_RANGE4G) /* + * Maximum and default values for user heap, stack, private and shared + * anonymous memory, and user text and initialized data. + * Used by map_pgsz*() routines. + */ +extern size_t max_uheap_lpsize; +extern size_t default_uheap_lpsize; +extern size_t max_ustack_lpsize; +extern size_t default_ustack_lpsize; +extern size_t max_privmap_lpsize; +extern size_t max_uidata_lpsize; +extern size_t max_utext_lpsize; +extern size_t max_shm_lpsize; +extern size_t mcntl0_lpsize; + +/* + * Sanity control. Don't use large pages regardless of user + * settings if there's less than priv or shm_lpg_min_physmem memory installed. + * The units for this variable are 8K pages. + */ +extern pgcnt_t privm_lpg_min_physmem; +extern pgcnt_t shm_lpg_min_physmem; + +/* * hash as and addr to get a bin. */ diff --git a/usr/src/uts/i86pc/vm/vm_machdep.c b/usr/src/uts/i86pc/vm/vm_machdep.c index a239b5b177..988e36654f 100644 --- a/usr/src/uts/i86pc/vm/vm_machdep.c +++ b/usr/src/uts/i86pc/vm/vm_machdep.c @@ -55,6 +55,7 @@ #include <sys/exec.h> #include <sys/exechdr.h> #include <sys/debug.h> +#include <sys/vmsystm.h> #include <vm/hat.h> #include <vm/as.h> @@ -122,39 +123,80 @@ uint_t mmu_page_sizes; /* How many page sizes the users can see */ uint_t mmu_exported_page_sizes; -size_t auto_lpg_va_default = MMU_PAGESIZE; /* used by zmap() */ /* * Number of pages in 1 GB. Don't enable automatic large pages if we have * fewer than this many pages. */ -pgcnt_t auto_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); +pgcnt_t shm_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); +pgcnt_t privm_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); + +/* + * Maximum and default segment size tunables for user private + * and shared anon memory, and user text and initialized data. + * These can be patched via /etc/system to allow large pages + * to be used for mapping application private and shared anon memory. + */ +size_t mcntl0_lpsize = MMU_PAGESIZE; +size_t max_uheap_lpsize = MMU_PAGESIZE; +size_t default_uheap_lpsize = MMU_PAGESIZE; +size_t max_ustack_lpsize = MMU_PAGESIZE; +size_t default_ustack_lpsize = MMU_PAGESIZE; +size_t max_privmap_lpsize = MMU_PAGESIZE; +size_t max_uidata_lpsize = MMU_PAGESIZE; +size_t max_utext_lpsize = MMU_PAGESIZE; +size_t max_shm_lpsize = MMU_PAGESIZE; /* * Return the optimum page size for a given mapping */ /*ARGSUSED*/ size_t -map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int *remap) +map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int memcntl) { - level_t l; + level_t l = 0; + size_t pgsz = MMU_PAGESIZE; + size_t max_lpsize; + uint_t mszc; - if (remap) - *remap = 0; + ASSERT(maptype != MAPPGSZ_VA); - switch (maptype) { + if (maptype != MAPPGSZ_ISM && physmem < privm_lpg_min_physmem) { + return (MMU_PAGESIZE); + } - case MAPPGSZ_STK: + switch (maptype) { case MAPPGSZ_HEAP: - case MAPPGSZ_VA: + case MAPPGSZ_STK: + max_lpsize = memcntl ? mcntl0_lpsize : (maptype == + MAPPGSZ_HEAP ? max_uheap_lpsize : max_ustack_lpsize); + if (max_lpsize == MMU_PAGESIZE) { + return (MMU_PAGESIZE); + } + if (len == 0) { + len = (maptype == MAPPGSZ_HEAP) ? p->p_brkbase + + p->p_brksize - p->p_bssbase : p->p_stksize; + } + len = (maptype == MAPPGSZ_HEAP) ? MAX(len, + default_uheap_lpsize) : MAX(len, default_ustack_lpsize); + /* * use the pages size that best fits len */ for (l = mmu.max_page_level; l > 0; --l) { - if (len < LEVEL_SIZE(l)) + if (LEVEL_SIZE(l) > max_lpsize || len < LEVEL_SIZE(l)) { continue; + } else { + pgsz = LEVEL_SIZE(l); + } break; } - return (LEVEL_SIZE(l)); + + mszc = (maptype == MAPPGSZ_HEAP ? p->p_brkpageszc : + p->p_stkpageszc); + if (addr == 0 && (pgsz < hw_page_array[mszc].hp_size)) { + pgsz = hw_page_array[mszc].hp_size; + } + return (pgsz); /* * for ISM use the 1st large page size. @@ -164,65 +206,96 @@ map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int *remap) return (MMU_PAGESIZE); return (LEVEL_SIZE(1)); } - return (0); + return (pgsz); } -/* - * This can be patched via /etc/system to allow large pages - * to be used for mapping application and libraries text segments. - */ -int use_text_largepages = 0; -int use_shm_largepages = 0; - -/* - * Return a bit vector of large page size codes that - * can be used to map [addr, addr + len) region. - */ - -/*ARGSUSED*/ -uint_t -map_execseg_pgszcvec(int text, caddr_t addr, size_t len) +static uint_t +map_szcvec(caddr_t addr, size_t size, uintptr_t off, size_t max_lpsize, + size_t min_physmem) { + caddr_t eaddr = addr + size; + uint_t szcvec = 0; + caddr_t raddr; + caddr_t readdr; size_t pgsz; - caddr_t a; - - if (!text || !use_text_largepages || - mmu.max_page_level == 0) - return (0); + int i; - pgsz = LEVEL_SIZE(1); - a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); - if (a < addr || a >= addr + len) { + if (physmem < min_physmem || max_lpsize <= MMU_PAGESIZE) { return (0); } - len -= (a - addr); - if (len < pgsz) { - return (0); + + for (i = mmu_page_sizes - 1; i > 0; i--) { + pgsz = page_get_pagesize(i); + if (pgsz > max_lpsize) { + continue; + } + raddr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); + readdr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); + if (raddr < addr || raddr >= readdr) { + continue; + } + if (P2PHASE((uintptr_t)addr ^ off, pgsz)) { + continue; + } + /* + * Set szcvec to the remaining page sizes. + */ + szcvec = ((1 << (i + 1)) - 1) & ~1; + break; } - return (1 << 1); + return (szcvec); } +/* + * Return a bit vector of large page size codes that + * can be used to map [addr, addr + len) region. + */ +/*ARGSUSED*/ uint_t -map_shm_pgszcvec(caddr_t addr, size_t len, uintptr_t off) +map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, int type, + int memcntl) { - size_t pgsz; - caddr_t a; + size_t max_lpsize = mcntl0_lpsize; - if (!use_shm_largepages || mmu.max_page_level == 0) { + if (mmu.max_page_level == 0) return (0); - } - pgsz = LEVEL_SIZE(1); - a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); - if (a < addr || a >= addr + len || - P2PHASE((uintptr_t)addr ^ off, pgsz)) { - return (0); - } - len -= (a - addr); - if (len < pgsz) { - return (0); + if (flags & MAP_TEXT) { + if (!memcntl) + max_lpsize = max_utext_lpsize; + return (map_szcvec(addr, size, off, max_lpsize, + shm_lpg_min_physmem)); + + } else if (flags & MAP_INITDATA) { + if (!memcntl) + max_lpsize = max_uidata_lpsize; + return (map_szcvec(addr, size, off, max_lpsize, + privm_lpg_min_physmem)); + + } else if (type == MAPPGSZC_SHM) { + if (!memcntl) + max_lpsize = max_shm_lpsize; + return (map_szcvec(addr, size, off, max_lpsize, + shm_lpg_min_physmem)); + + } else if (type == MAPPGSZC_HEAP) { + if (!memcntl) + max_lpsize = max_uheap_lpsize; + return (map_szcvec(addr, size, off, max_lpsize, + privm_lpg_min_physmem)); + + } else if (type == MAPPGSZC_STACK) { + if (!memcntl) + max_lpsize = max_ustack_lpsize; + return (map_szcvec(addr, size, off, max_lpsize, + privm_lpg_min_physmem)); + + } else { + if (!memcntl) + max_lpsize = max_privmap_lpsize; + return (map_szcvec(addr, size, off, max_lpsize, + privm_lpg_min_physmem)); } - return (1 << 1); } /* diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.c b/usr/src/uts/sfmmu/vm/hat_sfmmu.c index d374745fca..7ab7c28ca6 100644 --- a/usr/src/uts/sfmmu/vm/hat_sfmmu.c +++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.c @@ -139,14 +139,21 @@ int sfmmu_allow_nc_trans = 0; #define LARGE_PAGES_OFF 0x1 /* - * WARNING: 512K pages MUST be disabled for ISM/DISM. If not - * a process would page fault indefinitely if it tried to - * access a 512K page. + * The disable_large_pages and disable_ism_large_pages variables control + * hat_memload_array and the page sizes to be used by ISM and the kernel. + * + * The disable_auto_data_large_pages and disable_auto_text_large_pages variables + * are only used to control which OOB pages to use at upper VM segment creation + * time, and are set in hat_init_pagesizes and used in the map_pgsz* routines. + * Their values may come from platform or CPU specific code to disable page + * sizes that should not be used. + * + * WARNING: 512K pages are currently not supported for ISM/DISM. */ -int disable_ism_large_pages = (1 << TTE512K); -int disable_large_pages = 0; -int disable_auto_large_pages = 0; -int disable_shm_large_pages = 0; +uint_t disable_large_pages = 0; +uint_t disable_ism_large_pages = (1 << TTE512K); +uint_t disable_auto_data_large_pages = 0; +uint_t disable_auto_text_large_pages = 0; /* * Private sfmmu data structures for hat management @@ -891,17 +898,12 @@ hat_init_pagesizes() mmu_exported_page_sizes = 0; for (i = TTE8K; i < max_mmu_page_sizes; i++) { - extern int disable_text_largepages; - extern int disable_initdata_largepages; szc_2_userszc[i] = (uint_t)-1; userszc_2_szc[i] = (uint_t)-1; if ((mmu_exported_pagesize_mask & (1 << i)) == 0) { disable_large_pages |= (1 << i); - disable_ism_large_pages |= (1 << i); - disable_text_largepages |= (1 << i); - disable_initdata_largepages |= (1 << i); } else { szc_2_userszc[i] = mmu_exported_page_sizes; userszc_2_szc[mmu_exported_page_sizes] = i; @@ -909,7 +911,9 @@ hat_init_pagesizes() } } - disable_auto_large_pages = disable_large_pages; + disable_ism_large_pages |= disable_large_pages; + disable_auto_data_large_pages = disable_large_pages; + disable_auto_text_large_pages = disable_large_pages; /* * Initialize mmu-specific large page sizes. @@ -918,11 +922,11 @@ hat_init_pagesizes() disable_large_pages |= mmu_large_pages_disabled(HAT_LOAD); disable_ism_large_pages |= mmu_large_pages_disabled(HAT_LOAD_SHARE); - disable_auto_large_pages |= - mmu_large_pages_disabled(HAT_LOAD_AUTOLPG); + disable_auto_data_large_pages |= + mmu_large_pages_disabled(HAT_AUTO_DATA); + disable_auto_text_large_pages |= + mmu_large_pages_disabled(HAT_AUTO_TEXT); } - - disable_shm_large_pages = disable_auto_large_pages; } /* @@ -1993,7 +1997,7 @@ hat_memload_array(struct hat *hat, caddr_t addr, size_t len, pgcnt_t numpg, npgs; tte_t tte; page_t *pp; - int large_pages_disable; + uint_t large_pages_disable; ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET)); diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.h b/usr/src/uts/sfmmu/vm/hat_sfmmu.h index 526367186a..bab9f5db7d 100644 --- a/usr/src/uts/sfmmu/vm/hat_sfmmu.h +++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.h @@ -1782,7 +1782,7 @@ extern struct hme_blk *sfmmu_hmetohblk(struct sf_hment *); #pragma weak mmu_set_ctx_page_sizes #pragma weak mmu_check_page_sizes -extern int mmu_large_pages_disabled(uint_t); +extern uint_t mmu_large_pages_disabled(uint_t); extern void mmu_set_ctx_page_sizes(sfmmu_t *); extern void mmu_check_page_sizes(sfmmu_t *, uint64_t *); @@ -1822,6 +1822,11 @@ extern caddr_t utsb4m_vabase; extern vmem_t *kmem_tsb_default_arena[]; extern int tsb_lgrp_affinity; +extern uint_t disable_large_pages; +extern uint_t disable_ism_large_pages; +extern uint_t disable_auto_data_large_pages; +extern uint_t disable_auto_text_large_pages; + /* kpm externals */ extern pfn_t sfmmu_kpm_vatopfn(caddr_t); extern void sfmmu_kpm_patch_tlbm(void); diff --git a/usr/src/uts/sun4/os/startup.c b/usr/src/uts/sun4/os/startup.c index e90382f441..40f02671b0 100644 --- a/usr/src/uts/sun4/os/startup.c +++ b/usr/src/uts/sun4/os/startup.c @@ -1878,7 +1878,7 @@ startup_vm(void) pgcnt_t max_phys_segkp; int mnode; - extern int exec_lpg_disable, use_brk_lpg, use_stk_lpg, use_zmap_lpg; + extern int use_brk_lpg, use_stk_lpg; /* * get prom's mappings, create hments for them and switch @@ -1974,12 +1974,12 @@ startup_vm(void) avmem = (uint64_t)freemem << PAGESHIFT; cmn_err(CE_CONT, "?avail mem = %lld\n", (unsigned long long)avmem); - /* For small memory systems disable automatic large pages. */ - if (physmem < auto_lpg_min_physmem) { - exec_lpg_disable = 1; + /* + * For small memory systems disable automatic large pages. + */ + if (physmem < privm_lpg_min_physmem) { use_brk_lpg = 0; use_stk_lpg = 0; - use_zmap_lpg = 0; } /* diff --git a/usr/src/uts/sun4/vm/vm_dep.c b/usr/src/uts/sun4/vm/vm_dep.c index 0c9e7324c2..aa86c2e9af 100644 --- a/usr/src/uts/sun4/vm/vm_dep.c +++ b/usr/src/uts/sun4/vm/vm_dep.c @@ -97,8 +97,6 @@ plcnt_t plcnt; /* page list count */ caddr_t errata57_limit; #endif -extern int disable_auto_large_pages; /* used by map_pgsz*() routines */ - extern void page_relocate_hash(page_t *, page_t *); /* @@ -467,89 +465,56 @@ getexinfo( } } -#define MAP_PGSZ_COMMON(pgsz, n, upper, lower, len) \ - for ((n) = (upper); (n) > (lower); (n)--) { \ - if (disable_auto_large_pages & (1 << (n))) \ - continue; \ - if (hw_page_array[(n)].hp_size <= (len)) { \ - (pgsz) = hw_page_array[(n)].hp_size; \ - break; \ - } \ - } - - -/*ARGSUSED*/ -static size_t -map_pgszva(struct proc *p, caddr_t addr, size_t len) +/* + * Return non 0 value if the address may cause a VAC alias with KPM mappings. + * KPM selects an address such that it's equal offset modulo shm_alignment and + * assumes it can't be in VAC conflict with any larger than PAGESIZE mapping. + */ +int +map_addr_vacalign_check(caddr_t addr, u_offset_t off) { - size_t pgsz = MMU_PAGESIZE; - int n, upper; - - /* - * Select the best fit page size within the constraints of - * auto_lpg_{min,max}szc. - * - * Note that we also take the heap size into account when - * deciding if we've crossed the threshold at which we should - * increase the page size. This isn't perfect since the heap - * may not have reached its full size yet, but it's better than - * not considering it at all. - */ - len += p->p_brksize; - if (ptob(auto_lpg_tlb_threshold) <= len) { - - upper = MIN(mmu_page_sizes - 1, auto_lpg_maxszc); - - /* - * Use auto_lpg_minszc - 1 as the limit so we never drop - * below auto_lpg_minszc. We don't have a size code to refer - * to like we have for bss and stack, so we assume 0. - * auto_lpg_minszc should always be >= 0. Using - * auto_lpg_minszc cuts off the loop. - */ - MAP_PGSZ_COMMON(pgsz, n, upper, auto_lpg_minszc - 1, len); + if (vac) { + return (((uintptr_t)addr ^ off) & shm_alignment - 1); + } else { + return (0); } - - return (pgsz); } +/* + * Sanity control. Don't use large pages regardless of user + * settings if there's less than priv or shm_lpg_min_physmem memory installed. + * The units for this variable is 8K pages. + */ +pgcnt_t shm_lpg_min_physmem = 131072; /* 1GB */ +pgcnt_t privm_lpg_min_physmem = 131072; /* 1GB */ + static size_t map_pgszheap(struct proc *p, caddr_t addr, size_t len) { - size_t pgsz; - int n, upper, lower; + size_t pgsz = MMU_PAGESIZE; + int szc; /* * If len is zero, retrieve from proc and don't demote the page size. + * Use atleast the default pagesize. */ if (len == 0) { - len = p->p_brksize; + len = p->p_brkbase + p->p_brksize - p->p_bssbase; } + len = MAX(len, default_uheap_lpsize); - /* - * Still zero? Then we don't have a heap yet, so pick the default - * heap size. - */ - if (len == 0) { - pgsz = auto_lpg_heap_default; - } else { - pgsz = hw_page_array[p->p_brkpageszc].hp_size; - } - - if ((pgsz * auto_lpg_tlb_threshold) <= len) { - /* - * We're past the threshold, so select the best fit - * page size within the constraints of - * auto_lpg_{min,max}szc and the minimum required - * alignment. - */ - upper = MIN(mmu_page_sizes - 1, auto_lpg_maxszc); - lower = MAX(auto_lpg_minszc - 1, p->p_brkpageszc); - MAP_PGSZ_COMMON(pgsz, n, upper, lower, len); + for (szc = mmu_page_sizes - 1; szc >= 0; szc--) { + pgsz = hw_page_array[szc].hp_size; + if ((disable_auto_data_large_pages & (1 << szc)) || + pgsz > max_uheap_lpsize) + continue; + if (len >= pgsz) { + break; + } } /* - * If addr == 0 we were called by memcntl() or exec_args() when the + * If addr == 0 we were called by memcntl() when the * size code is 0. Don't set pgsz less than current size. */ if (addr == 0 && (pgsz < hw_page_array[p->p_brkpageszc].hp_size)) { @@ -562,36 +527,26 @@ map_pgszheap(struct proc *p, caddr_t addr, size_t len) static size_t map_pgszstk(struct proc *p, caddr_t addr, size_t len) { - size_t pgsz; - int n, upper, lower; + size_t pgsz = MMU_PAGESIZE; + int szc; /* * If len is zero, retrieve from proc and don't demote the page size. + * Use atleast the default pagesize. */ if (len == 0) { len = p->p_stksize; } + len = MAX(len, default_ustack_lpsize); - /* - * Still zero? Then we don't have a heap yet, so pick the default - * stack size. - */ - if (len == 0) { - pgsz = auto_lpg_stack_default; - } else { - pgsz = hw_page_array[p->p_stkpageszc].hp_size; - } - - if ((pgsz * auto_lpg_tlb_threshold) <= len) { - /* - * We're past the threshold, so select the best fit - * page size within the constraints of - * auto_lpg_{min,max}szc and the minimum required - * alignment. - */ - upper = MIN(mmu_page_sizes - 1, auto_lpg_maxszc); - lower = MAX(auto_lpg_minszc - 1, p->p_brkpageszc); - MAP_PGSZ_COMMON(pgsz, n, upper, lower, len); + for (szc = mmu_page_sizes - 1; szc >= 0; szc--) { + pgsz = hw_page_array[szc].hp_size; + if ((disable_auto_data_large_pages & (1 << szc)) || + pgsz > max_ustack_lpsize) + continue; + if (len >= pgsz) { + break; + } } /* @@ -610,7 +565,6 @@ map_pgszism(caddr_t addr, size_t len) { uint_t szc; size_t pgsz; - extern int disable_ism_large_pages; for (szc = mmu_page_sizes - 1; szc >= TTE4M; szc--) { if (disable_ism_large_pages & (1 << szc)) @@ -620,234 +574,69 @@ map_pgszism(caddr_t addr, size_t len) if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz)) return (pgsz); } + return (DEFAULT_ISM_PAGESIZE); } /* * Suggest a page size to be used to map a segment of type maptype and length * len. Returns a page size (not a size code). - * If remap is non-NULL, fill in a value suggesting whether or not to remap - * this segment. */ +/* ARGSUSED */ size_t -map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int *remap) +map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int memcntl) { - size_t pgsz = 0; + size_t pgsz = MMU_PAGESIZE; + + ASSERT(maptype != MAPPGSZ_VA); - if (remap != NULL) - *remap = (len > auto_lpg_remap_threshold); + if (maptype != MAPPGSZ_ISM && physmem < privm_lpg_min_physmem) { + return (MMU_PAGESIZE); + } switch (maptype) { case MAPPGSZ_ISM: pgsz = map_pgszism(addr, len); break; - case MAPPGSZ_VA: - pgsz = map_pgszva(p, addr, len); - break; - case MAPPGSZ_STK: - pgsz = map_pgszstk(p, addr, len); + if (max_ustack_lpsize > MMU_PAGESIZE) { + pgsz = map_pgszstk(p, addr, len); + } break; case MAPPGSZ_HEAP: - pgsz = map_pgszheap(p, addr, len); + if (max_uheap_lpsize > MMU_PAGESIZE) { + pgsz = map_pgszheap(p, addr, len); + } break; } return (pgsz); } -/* - * Return non 0 value if the address may cause a VAC alias with KPM mappings. - * KPM selects an address such that it's equal offset modulo shm_alignment and - * assumes it can't be in VAC conflict with any larger than PAGESIZE mapping. - */ -int -map_addr_vacalign_check(caddr_t addr, u_offset_t off) -{ - if (vac) { - return (((uintptr_t)addr ^ off) & shm_alignment - 1); - } else { - return (0); - } -} - -/* - * use_text_pgsz64k, use_initdata_pgsz64k and use_text_pgsz4m - * can be set in platform or CPU specific code but user can change the - * default values via /etc/system. - * - * Initial values are defined in architecture specific mach_vm_dep.c file. - */ -extern int use_text_pgsz64k; -extern int use_text_pgsz4m; -extern int use_initdata_pgsz64k; - -/* - * disable_text_largepages and disable_initdata_largepages bitmaks are set in - * platform or CPU specific code to disable page sizes that should not be - * used. These variables normally shouldn't be changed via /etc/system. A - * particular page size for text or inititialized data will be used by default - * if both one of use_* variables is set to 1 AND this page size is not - * disabled in the corresponding disable_* bitmask variable. - * - * Initial values are defined in architecture specific mach_vm_dep.c file. - */ -extern int disable_text_largepages; -extern int disable_initdata_largepages; - -/* - * Minimum segment size tunables before 64K or 4M large pages - * should be used to map it. - * - * Initial values are defined in architecture specific mach_vm_dep.c file. - */ -extern size_t text_pgsz64k_minsize; -extern size_t text_pgsz4m_minsize; -extern size_t initdata_pgsz64k_minsize; - -/* - * Sanity control. Don't use large pages regardless of user - * settings if there's less than execseg_lpg_min_physmem memory installed. - * The units for this variable is 8K pages. - */ -pgcnt_t execseg_lpg_min_physmem = 131072; /* 1GB */ - -extern int disable_shm_large_pages; -pgcnt_t shm_lpg_min_physmem = 131072; /* 1GB */ -extern size_t max_shm_lpsize; - /* assumes TTE8K...TTE4M == szc */ static uint_t -map_text_pgsz4m(caddr_t addr, size_t len) -{ - caddr_t a; - - if (len < text_pgsz4m_minsize) { - return (0); - } - - a = (caddr_t)P2ROUNDUP_TYPED(addr, MMU_PAGESIZE4M, uintptr_t); - if (a < addr || a >= addr + len) { - return (0); - } - len -= (a - addr); - if (len < MMU_PAGESIZE4M) { - return (0); - } - - return (1 << TTE4M); -} - -static uint_t -map_text_pgsz64k(caddr_t addr, size_t len) -{ - caddr_t a; - size_t svlen = len; - - if (len < text_pgsz64k_minsize) { - return (0); - } - - a = (caddr_t)P2ROUNDUP_TYPED(addr, MMU_PAGESIZE64K, uintptr_t); - if (a < addr || a >= addr + len) { - return (0); - } - len -= (a - addr); - if (len < MMU_PAGESIZE64K) { - return (0); - } - if (!use_text_pgsz4m || - disable_text_largepages & (1 << TTE4M)) { - return (1 << TTE64K); - } - if (svlen < text_pgsz4m_minsize) { - return (1 << TTE64K); - } - addr = a; - a = (caddr_t)P2ROUNDUP_TYPED(addr, MMU_PAGESIZE4M, uintptr_t); - if (a < addr || a >= addr + len) { - return (1 << TTE64K); - } - len -= (a - addr); - if (len < MMU_PAGESIZE4M) { - return (1 << TTE64K); - } - return ((1 << TTE4M) | (1 << TTE64K)); -} - -static uint_t -map_initdata_pgsz64k(caddr_t addr, size_t len) -{ - caddr_t a; - - if (len < initdata_pgsz64k_minsize) { - return (0); - } - - a = (caddr_t)P2ROUNDUP_TYPED(addr, MMU_PAGESIZE64K, uintptr_t); - if (a < addr || a >= addr + len) { - return (0); - } - len -= (a - addr); - if (len < MMU_PAGESIZE64K) { - return (0); - } - return (1 << TTE64K); -} - -/* - * Return a bit vector of large page size codes that - * can be used to map [addr, addr + len) region. - */ -uint_t -map_execseg_pgszcvec(int text, caddr_t addr, size_t len) -{ - uint_t ret = 0; - - if (physmem < execseg_lpg_min_physmem) { - return (0); - } - - if (text) { - if (use_text_pgsz64k && - !(disable_text_largepages & (1 << TTE64K))) { - ret = map_text_pgsz64k(addr, len); - } else if (use_text_pgsz4m && - !(disable_text_largepages & (1 << TTE4M))) { - ret = map_text_pgsz4m(addr, len); - } - } else if (use_initdata_pgsz64k && - !(disable_initdata_largepages & (1 << TTE64K))) { - ret = map_initdata_pgsz64k(addr, len); - } - - return (ret); -} - -uint_t -map_shm_pgszcvec(caddr_t addr, size_t size, uintptr_t off) +map_szcvec(caddr_t addr, size_t size, uintptr_t off, int disable_lpgs, + size_t max_lpsize, size_t min_physmem) { caddr_t eaddr = addr + size; uint_t szcvec = 0; - int i; caddr_t raddr; caddr_t readdr; size_t pgsz; + int i; - if (physmem < shm_lpg_min_physmem || mmu_page_sizes <= 1 || - max_shm_lpsize <= MMU_PAGESIZE) { + if (physmem < min_physmem || max_lpsize <= MMU_PAGESIZE) { return (0); } - for (i = mmu_page_sizes - 1; i > 0; i--) { - if (disable_shm_large_pages & (1 << i)) { + if (disable_lpgs & (1 << i)) { continue; } pgsz = page_get_pagesize(i); - if (pgsz > max_shm_lpsize) { + if (pgsz > max_lpsize) { continue; } raddr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); @@ -862,7 +651,7 @@ map_shm_pgszcvec(caddr_t addr, size_t size, uintptr_t off) /* * And or in the remaining enabled page sizes. */ - szcvec |= P2PHASE(~disable_shm_large_pages, (1 << i)); + szcvec |= P2PHASE(~disable_lpgs, (1 << i)); szcvec &= ~1; /* no need to return 8K pagesize */ break; } @@ -870,6 +659,41 @@ map_shm_pgszcvec(caddr_t addr, size_t size, uintptr_t off) } /* + * Return a bit vector of large page size codes that + * can be used to map [addr, addr + len) region. + */ +/* ARGSUSED */ +uint_t +map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, int type, + int memcntl) +{ + if (flags & MAP_TEXT) { + return (map_szcvec(addr, size, off, disable_auto_text_large_pages, + max_utext_lpsize, shm_lpg_min_physmem)); + + } else if (flags & MAP_INITDATA) { + return (map_szcvec(addr, size, off, disable_auto_data_large_pages, + max_uidata_lpsize, privm_lpg_min_physmem)); + + } else if (type == MAPPGSZC_SHM) { + return (map_szcvec(addr, size, off, disable_auto_data_large_pages, + max_shm_lpsize, shm_lpg_min_physmem)); + + } else if (type == MAPPGSZC_HEAP) { + return (map_szcvec(addr, size, off, disable_auto_data_large_pages, + max_uheap_lpsize, privm_lpg_min_physmem)); + + } else if (type == MAPPGSZC_STACK) { + return (map_szcvec(addr, size, off, disable_auto_data_large_pages, + max_ustack_lpsize, privm_lpg_min_physmem)); + + } else { + return (map_szcvec(addr, size, off, disable_auto_data_large_pages, + max_privmap_lpsize, privm_lpg_min_physmem)); + } +} + +/* * Anchored in the table below are counters used to keep track * of free contiguous physical memory. Each element of the table contains * the array of counters, the size of array which is allocated during @@ -1240,7 +1064,6 @@ get_segkmem_lpsize(size_t lpsize) size_t memtotal = physmem * PAGESIZE; size_t mmusz; uint_t szc; - extern int disable_large_pages; if (memtotal < segkmem_lpminphysmem) return (PAGESIZE); diff --git a/usr/src/uts/sun4/vm/vm_dep.h b/usr/src/uts/sun4/vm/vm_dep.h index da773362ca..296f8a8735 100644 --- a/usr/src/uts/sun4/vm/vm_dep.h +++ b/usr/src/uts/sun4/vm/vm_dep.h @@ -423,18 +423,33 @@ extern int vac_size; extern int vac_shift; /* - * Auto large page selection support variables. Some CPU - * implementations may differ from the defaults and will need - * to change these. + * Maximum and default values for user heap, stack, private and shared + * anonymous memory, and user text and initialized data. + * + * Initial values are defined in architecture specific mach_vm_dep.c file. + * Used by map_pgsz*() routines. + */ +extern size_t max_uheap_lpsize; +extern size_t default_uheap_lpsize; +extern size_t max_ustack_lpsize; +extern size_t default_ustack_lpsize; +extern size_t max_privmap_lpsize; +extern size_t max_uidata_lpsize; +extern size_t max_utext_lpsize; +extern size_t max_shm_lpsize; + +/* + * For adjusting the default lpsize, for DTLB-limited page sizes. + */ +extern void adjust_data_maxlpsize(size_t ismpagesize); + +/* + * Sanity control. Don't use large pages regardless of user + * settings if there's less than priv or shm_lpg_min_physmem memory installed. + * The units for this variable are 8K pages. */ -extern int auto_lpg_tlb_threshold; -extern int auto_lpg_minszc; -extern int auto_lpg_maxszc; -extern size_t auto_lpg_heap_default; -extern size_t auto_lpg_stack_default; -extern size_t auto_lpg_va_default; -extern size_t auto_lpg_remap_threshold; -extern pgcnt_t auto_lpg_min_physmem; +extern pgcnt_t privm_lpg_min_physmem; +extern pgcnt_t shm_lpg_min_physmem; /* * AS_2_BIN macro controls the page coloring policy. diff --git a/usr/src/uts/sun4u/cpu/opl_olympus.c b/usr/src/uts/sun4u/cpu/opl_olympus.c index 521b53442f..7f267676b0 100644 --- a/usr/src/uts/sun4u/cpu/opl_olympus.c +++ b/usr/src/uts/sun4u/cpu/opl_olympus.c @@ -494,8 +494,6 @@ cpu_setup(void) extern int at_flags; extern int disable_delay_tlb_flush, delay_tlb_flush; extern int cpc_has_overflow_intr; - extern int disable_text_largepages; - extern int use_text_pgsz4m; uint64_t cpu0_log; extern uint64_t opl_cpu0_err_log; @@ -590,16 +588,6 @@ cpu_setup(void) * fpRAS. */ fpras_implemented = 0; - - /* - * Enable 4M pages to be used for mapping user text by default. Don't - * use large pages for initialized data segments since we may not know - * at exec() time what should be the preferred large page size for DTLB - * programming. - */ - use_text_pgsz4m = 1; - disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) | - (1 << TTE32M) | (1 << TTE256M); } /* @@ -700,11 +688,14 @@ send_one_mondo(int cpuid) * */ int init_mmu_page_sizes = 0; -static int mmu_disable_ism_large_pages = ((1 << TTE64K) | + +static uint_t mmu_disable_large_pages = 0; +static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); -static int mmu_disable_auto_large_pages = ((1 << TTE64K) | +static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); -static int mmu_disable_large_pages = 0; +static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) | + (1 << TTE512K)); /* * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support. @@ -721,7 +712,6 @@ mmu_init_mmu_page_sizes(int32_t not_used) mmu_page_sizes = MMU_PAGE_SIZES; mmu_hashcnt = MAX_HASHCNT; mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE; - auto_lpg_maxszc = TTE4M; mmu_exported_pagesize_mask = (1 << TTE8K) | (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M) | (1 << TTE256M); @@ -747,19 +737,30 @@ static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = { /* * The function returns the mmu-specific values for the * hat's disable_large_pages, disable_ism_large_pages, and - * disable_auto_large_pages variables. + * disable_auto_data_large_pages and + * disable_text_data_large_pages variables. */ -int +uint_t mmu_large_pages_disabled(uint_t flag) { - int pages_disable = 0; + uint_t pages_disable = 0; + extern int use_text_pgsz64K; + extern int use_text_pgsz512K; if (flag == HAT_LOAD) { pages_disable = mmu_disable_large_pages; } else if (flag == HAT_LOAD_SHARE) { pages_disable = mmu_disable_ism_large_pages; - } else if (flag == HAT_LOAD_AUTOLPG) { - pages_disable = mmu_disable_auto_large_pages; + } else if (flag == HAT_AUTO_DATA) { + pages_disable = mmu_disable_auto_data_large_pages; + } else if (flag == HAT_AUTO_TEXT) { + pages_disable = mmu_disable_auto_text_large_pages; + if (use_text_pgsz512K) { + pages_disable &= ~(1 << TTE512K); + } + if (use_text_pgsz64K) { + pages_disable &= ~(1 << TTE64K); + } } return (pages_disable); } @@ -779,23 +780,22 @@ mmu_init_large_pages(size_t ism_pagesize) case MMU_PAGESIZE4M: mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); - mmu_disable_auto_large_pages = ((1 << TTE64K) | + mmu_disable_auto_data_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); - auto_lpg_maxszc = TTE4M; break; case MMU_PAGESIZE32M: mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE256M)); - mmu_disable_auto_large_pages = ((1 << TTE64K) | + mmu_disable_auto_data_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M)); - auto_lpg_maxszc = TTE32M; + adjust_data_maxlpsize(ism_pagesize); break; case MMU_PAGESIZE256M: mmu_disable_ism_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M)); - mmu_disable_auto_large_pages = ((1 << TTE64K) | + mmu_disable_auto_data_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M)); - auto_lpg_maxszc = TTE256M; + adjust_data_maxlpsize(ism_pagesize); break; default: cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx", diff --git a/usr/src/uts/sun4u/cpu/spitfire.c b/usr/src/uts/sun4u/cpu/spitfire.c index 3ff8f04b0f..96bc99cdb0 100644 --- a/usr/src/uts/sun4u/cpu/spitfire.c +++ b/usr/src/uts/sun4u/cpu/spitfire.c @@ -34,6 +34,7 @@ #include <sys/elf_SPARC.h> #include <vm/hat_sfmmu.h> #include <vm/page.h> +#include <vm/vm_dep.h> #include <sys/cpuvar.h> #include <sys/spitregs.h> #include <sys/async.h> @@ -431,9 +432,6 @@ cpu_setup(void) #if defined(SF_ERRATA_57) extern caddr_t errata57_limit; #endif - extern int disable_text_largepages; - extern int disable_initdata_largepages; - cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT); at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1; @@ -514,14 +512,10 @@ cpu_setup(void) #endif /* - * Allow only 8K, 64K and 4M pages for text by default. - * Allow only 8K and 64K page for initialized data segments by - * default. + * Disable text by default. + * Note that the other defaults are set in sun4u/vm/mach_vm_dep.c. */ - disable_text_largepages = (1 << TTE512K) | (1 << TTE32M) | - (1 << TTE256M); - disable_initdata_largepages = (1 << TTE512K) | (1 << TTE4M) | - (1 << TTE32M) | (1 << TTE256M); + max_utext_lpsize = MMU_PAGESIZE; } static int @@ -4490,27 +4484,6 @@ cpu_faulted_exit(struct cpu *cp) { } -static int mmu_disable_ism_large_pages = ((1 << TTE512K) | - (1 << TTE32M) | (1 << TTE256M)); -static int mmu_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M)); - -/* - * The function returns the US_II mmu-specific values for the - * hat's disable_large_pages and disable_ism_large_pages variables. - */ -int -mmu_large_pages_disabled(uint_t flag) -{ - int pages_disable = 0; - - if (flag == HAT_LOAD) { - pages_disable = mmu_disable_large_pages; - } else if (flag == HAT_LOAD_SHARE) { - pages_disable = mmu_disable_ism_large_pages; - } - return (pages_disable); -} - /*ARGSUSED*/ void mmu_init_kernel_pgsz(struct hat *hat) diff --git a/usr/src/uts/sun4u/cpu/us3_cheetah.c b/usr/src/uts/sun4u/cpu/us3_cheetah.c index 9a74d72be1..217f285ccc 100644 --- a/usr/src/uts/sun4u/cpu/us3_cheetah.c +++ b/usr/src/uts/sun4u/cpu/us3_cheetah.c @@ -69,6 +69,11 @@ #endif /* CHEETAHPLUS_ERRATUM_25 */ /* + * Note that 'Cheetah PRM' refers to: + * SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III + */ + +/* * Setup trap handlers. */ void @@ -122,10 +127,6 @@ cpu_fiximp(pnode_t dnode) "ecache-associativity", &ecache_associativity, CH_ECACHE_NWAY }; - extern int exec_lpg_disable, use_brk_lpg, use_stk_lpg, use_zmap_lpg; - extern size_t max_shm_lpsize; - - for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) *prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval); @@ -143,11 +144,12 @@ cpu_fiximp(pnode_t dnode) /* * Cheetah's large page support has problems with large numbers of * large pages, so just disable large pages out-of-the-box. + * Note that the other defaults are set in sun4u/vm/mach_vm_dep.c. */ - exec_lpg_disable = 1; - use_brk_lpg = 0; - use_stk_lpg = 0; - use_zmap_lpg = 0; + max_uheap_lpsize = MMU_PAGESIZE; + max_ustack_lpsize = MMU_PAGESIZE; + max_privmap_lpsize = MMU_PAGESIZE; + max_utext_lpsize = MMU_PAGESIZE; max_shm_lpsize = MMU_PAGESIZE; } diff --git a/usr/src/uts/sun4u/cpu/us3_common.c b/usr/src/uts/sun4u/cpu/us3_common.c index beee35f4aa..4904bff814 100644 --- a/usr/src/uts/sun4u/cpu/us3_common.c +++ b/usr/src/uts/sun4u/cpu/us3_common.c @@ -475,8 +475,6 @@ cpu_setup(void) extern int at_flags; extern int disable_delay_tlb_flush, delay_tlb_flush; extern int cpc_has_overflow_intr; - extern int disable_text_largepages; - extern int use_text_pgsz4m; /* * Setup chip-specific trap handlers. @@ -574,16 +572,6 @@ cpu_setup(void) fpras_implemented = 1; /* - * Enable 4M pages to be used for mapping user text by default. Don't - * use large pages for initialized data segments since we may not know - * at exec() time what should be the preferred large page size for DTLB - * programming. - */ - use_text_pgsz4m = 1; - disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) | - (1 << TTE32M) | (1 << TTE256M); - - /* * Setup CE lookup table */ CE_INITDISPTBL_POPULATE(ce_disp_table); diff --git a/usr/src/uts/sun4u/cpu/us3_common_mmu.c b/usr/src/uts/sun4u/cpu/us3_common_mmu.c index f62f37151f..2509c54d5a 100644 --- a/usr/src/uts/sun4u/cpu/us3_common_mmu.c +++ b/usr/src/uts/sun4u/cpu/us3_common_mmu.c @@ -42,60 +42,58 @@ #include <sys/panic.h> /* - * Note that 'Cheetah PRM' refers to: - * SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III - */ - -/* * pan_disable_ism_large_pages and pan_disable_large_pages are the Panther- * specific versions of disable_ism_large_pages and disable_large_pages, * and feed back into those two hat variables at hat initialization time, * for Panther-only systems. * - * chpjag_disable_ism_large_pages is the Ch/Jaguar-specific version of - * disable_ism_large_pages. Ditto for chjag_disable_large_pages. + * chpjag_disable_large_pages is the Ch/Jaguar-specific version of + * disable_large_pages. Ditto for pan_disable_large_pages. + * Note that the Panther and Ch/Jaguar ITLB do not support 32M/256M pages. */ static int panther_only = 0; -static int pan_disable_ism_large_pages = ((1 << TTE64K) | - (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); -static int pan_disable_large_pages = (1 << TTE256M); -static int pan_disable_auto_large_pages = ((1 << TTE64K) | - (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); +static uint_t pan_disable_large_pages = (1 << TTE256M); +static uint_t chjag_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M)); -static int chjag_disable_ism_large_pages = ((1 << TTE64K) | +static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) | + (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); +static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); -static int chjag_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M)); -static int chjag_disable_auto_large_pages = ((1 << TTE64K) | +static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); /* - * The function returns the USIII-IV mmu-specific values for the + * The function returns the USIII+(i)-IV+ mmu-specific values for the * hat's disable_large_pages and disable_ism_large_pages variables. * Currently the hat's disable_large_pages and disable_ism_large_pages * already contain the generic sparc 4 page size info, and the return * values are or'd with those values. */ -int +uint_t mmu_large_pages_disabled(uint_t flag) { - int pages_disable = 0; + uint_t pages_disable = 0; + extern int use_text_pgsz64K; + extern int use_text_pgsz512K; - if (panther_only) { - if (flag == HAT_LOAD) { + if (flag == HAT_LOAD) { + if (panther_only) { pages_disable = pan_disable_large_pages; - } else if (flag == HAT_LOAD_SHARE) { - pages_disable = pan_disable_ism_large_pages; - } else if (flag == HAT_LOAD_AUTOLPG) { - pages_disable = pan_disable_auto_large_pages; - } - } else { - if (flag == HAT_LOAD) { + } else { pages_disable = chjag_disable_large_pages; - } else if (flag == HAT_LOAD_SHARE) { - pages_disable = chjag_disable_ism_large_pages; - } else if (flag == HAT_LOAD_AUTOLPG) { - pages_disable = chjag_disable_auto_large_pages; + } + } else if (flag == HAT_LOAD_SHARE) { + pages_disable = mmu_disable_ism_large_pages; + } else if (flag == HAT_AUTO_DATA) { + pages_disable = mmu_disable_auto_data_large_pages; + } else if (flag == HAT_AUTO_TEXT) { + pages_disable = mmu_disable_auto_text_large_pages; + if (use_text_pgsz512K) { + pages_disable &= ~(1 << TTE512K); + } + if (use_text_pgsz64K) { + pages_disable &= ~(1 << TTE64K); } } return (pages_disable); @@ -141,7 +139,7 @@ int init_mmu_page_sizes = 0; * since it would be bad form to panic due * to a user typo. * - * The function re-initializes the pan_disable_ism_large_pages and + * The function re-initializes the disable_ism_large_pages and * pan_disable_large_pages variables, which are closely related. * Aka, if 32M is the desired [D]ISM page sizes, then 256M cannot be allowed * for non-ISM large page usage, or DTLB conflict will occur. Please see the @@ -151,37 +149,37 @@ void mmu_init_large_pages(size_t ism_pagesize) { if (cpu_impl_dual_pgsz == 0) { /* disable_dual_pgsz flag */ - pan_disable_ism_large_pages = ((1 << TTE64K) | - (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); pan_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M)); - auto_lpg_maxszc = TTE4M; + mmu_disable_ism_large_pages = ((1 << TTE64K) | + (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); + mmu_disable_auto_data_large_pages = ((1 << TTE64K) | + (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); return; } switch (ism_pagesize) { case MMU_PAGESIZE4M: - pan_disable_ism_large_pages = ((1 << TTE64K) | - (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); pan_disable_large_pages = (1 << TTE256M); - pan_disable_auto_large_pages = ((1 << TTE64K) | + mmu_disable_ism_large_pages = ((1 << TTE64K) | + (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); + mmu_disable_auto_data_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M)); - auto_lpg_maxszc = TTE4M; break; case MMU_PAGESIZE32M: - pan_disable_ism_large_pages = ((1 << TTE64K) | - (1 << TTE512K) | (1 << TTE256M)); pan_disable_large_pages = (1 << TTE256M); - pan_disable_auto_large_pages = ((1 << TTE64K) | + mmu_disable_ism_large_pages = ((1 << TTE64K) | + (1 << TTE512K) | (1 << TTE256M)); + mmu_disable_auto_data_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M)); - auto_lpg_maxszc = TTE32M; + adjust_data_maxlpsize(ism_pagesize); break; case MMU_PAGESIZE256M: - pan_disable_ism_large_pages = ((1 << TTE64K) | - (1 << TTE512K) | (1 << TTE32M)); pan_disable_large_pages = (1 << TTE32M); - pan_disable_auto_large_pages = ((1 << TTE64K) | + mmu_disable_ism_large_pages = ((1 << TTE64K) | + (1 << TTE512K) | (1 << TTE32M)); + mmu_disable_auto_data_large_pages = ((1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M)); - auto_lpg_maxszc = TTE256M; + adjust_data_maxlpsize(ism_pagesize); break; default: cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx", @@ -211,7 +209,6 @@ mmu_init_mmu_page_sizes(int cinfo) (1 << TTE32M) | (1 << TTE256M); panther_dtlb_restrictions = 1; panther_only = 1; - auto_lpg_maxszc = TTE4M; } else if (npanther > 0) { panther_dtlb_restrictions = 1; } diff --git a/usr/src/uts/sun4u/vm/mach_vm_dep.c b/usr/src/uts/sun4u/vm/mach_vm_dep.c index 29f5541778..dc4995e263 100644 --- a/usr/src/uts/sun4u/vm/mach_vm_dep.c +++ b/usr/src/uts/sun4u/vm/mach_vm_dep.c @@ -95,55 +95,42 @@ hw_pagesize_t hw_page_array[] = { }; /* - * use_text_pgsz64k, use_initdata_pgsz64k and use_text_pgsz4m - * can be set in platform or CPU specific code but user can change the - * default values via /etc/system. + * use_text_pgsz64k and use_text_pgsz512k allow the user to turn on these + * additional text page sizes for USIII-IV+ and OPL by changing the default + * values via /etc/system. */ - -int use_text_pgsz64k = 0; -int use_text_pgsz4m = 0; -int use_initdata_pgsz64k = 0; - -/* - * disable_text_largepages and disable_initdata_largepages bitmaks are set in - * platform or CPU specific code to disable page sizes that should not be - * used. These variables normally shouldn't be changed via /etc/system. A - * particular page size for text or inititialized data will be used by default - * if both one of use_* variables is set to 1 AND this page size is not - * disabled in the corresponding disable_* bitmask variable. - */ - -int disable_text_largepages = (1 << TTE4M) | (1 << TTE64K); -int disable_initdata_largepages = (1 << TTE64K); +int use_text_pgsz64K = 0; +int use_text_pgsz512K = 0; /* - * Minimum segment size tunables before 64K or 4M large pages - * should be used to map it. + * Maximum and default segment size tunables for user heap, stack, private + * and shared anonymous memory, and user text and initialized data. */ -size_t text_pgsz64k_minsize = MMU_PAGESIZE64K; -size_t text_pgsz4m_minsize = MMU_PAGESIZE4M; -size_t initdata_pgsz64k_minsize = MMU_PAGESIZE64K; +size_t max_uheap_lpsize = MMU_PAGESIZE4M; +size_t default_uheap_lpsize = MMU_PAGESIZE; +size_t max_ustack_lpsize = MMU_PAGESIZE4M; +size_t default_ustack_lpsize = MMU_PAGESIZE; +size_t max_privmap_lpsize = MMU_PAGESIZE4M; +size_t max_uidata_lpsize = MMU_PAGESIZE; +size_t max_utext_lpsize = MMU_PAGESIZE4M; +size_t max_shm_lpsize = MMU_PAGESIZE4M; -size_t max_shm_lpsize = ULONG_MAX; - -/* - * Platforms with smaller or larger TLBs may wish to change this. Most - * sun4u platforms can hold 1024 8K entries by default and most processes - * are observed to be < 6MB on these machines, so we decide to move up - * here to give ourselves some wiggle room for other, smaller segments. - */ -int auto_lpg_tlb_threshold = 768; -int auto_lpg_minszc = TTE4M; -int auto_lpg_maxszc = TTE4M; -size_t auto_lpg_heap_default = MMU_PAGESIZE; -size_t auto_lpg_stack_default = MMU_PAGESIZE; -size_t auto_lpg_va_default = MMU_PAGESIZE; -size_t auto_lpg_remap_threshold = 0; -/* - * Number of pages in 1 GB. Don't enable automatic large pages if we have - * fewer than this many pages. - */ -pgcnt_t auto_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); +void +adjust_data_maxlpsize(size_t ismpagesize) +{ + if (max_uheap_lpsize == MMU_PAGESIZE4M) { + max_uheap_lpsize = ismpagesize; + } + if (max_ustack_lpsize == MMU_PAGESIZE4M) { + max_ustack_lpsize = ismpagesize; + } + if (max_privmap_lpsize == MMU_PAGESIZE4M) { + max_privmap_lpsize = ismpagesize; + } + if (max_shm_lpsize == MMU_PAGESIZE4M) { + max_shm_lpsize = ismpagesize; + } +} /* * map_addr_proc() is the routine called when the system is to diff --git a/usr/src/uts/sun4v/vm/mach_vm_dep.c b/usr/src/uts/sun4v/vm/mach_vm_dep.c index 44bf58dc1e..4dfa62d3c9 100644 --- a/usr/src/uts/sun4v/vm/mach_vm_dep.c +++ b/usr/src/uts/sun4v/vm/mach_vm_dep.c @@ -91,63 +91,18 @@ hw_pagesize_t hw_page_array[] = { }; /* - * Enable usage of 64k/4M pages for text and 64k pages for initdata for - * all sun4v platforms. These variables can be overwritten by the platmod - * or the CPU module. User can also change the setting via /etc/system. + * Maximum and default segment size tunables for user heap, stack, private + * and shared anonymous memory, and user text and initialized data. */ - -int use_text_pgsz64k = 1; -int use_text_pgsz4m = 1; -int use_initdata_pgsz64k = 1; - -/* - * disable_text_largepages and disable_initdata_largepages bitmaks reflect - * both unconfigured and undesirable page sizes. Current implementation - * supports 64K and 4M page sizes for text and only 64K for data. Rest of - * the page sizes are not currently supported, hence disabled below. In - * future, when support is added for any other page size, it should be - * reflected below. - * - * Note that these bitmask can be set in platform or CPU specific code to - * disable page sizes that should not be used. These variables normally - * shouldn't be changed via /etc/system. - * - * These bitmasks are also updated within hat_init to reflect unsupported - * page sizes on a sun4v processor per mmu_exported_pagesize_mask global - * variable. - */ - -int disable_text_largepages = - (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M) | (1 << TTE2G) | - (1 << TTE16G); -int disable_initdata_largepages = - (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M) | (1 << TTE256M) | - (1 << TTE2G) | (1 << TTE16G); - -/* - * Minimum segment size tunables before 64K or 4M large pages - * should be used to map it. - */ -size_t text_pgsz64k_minsize = MMU_PAGESIZE64K; -size_t text_pgsz4m_minsize = MMU_PAGESIZE4M; -size_t initdata_pgsz64k_minsize = MMU_PAGESIZE64K; - +size_t max_uheap_lpsize = MMU_PAGESIZE64K; +size_t default_uheap_lpsize = MMU_PAGESIZE64K; +size_t max_ustack_lpsize = MMU_PAGESIZE64K; +size_t default_ustack_lpsize = MMU_PAGESIZE64K; +size_t max_privmap_lpsize = MMU_PAGESIZE64K; +size_t max_uidata_lpsize = MMU_PAGESIZE64K; +size_t max_utext_lpsize = MMU_PAGESIZE4M; size_t max_shm_lpsize = MMU_PAGESIZE4M; -/* Auto large page tunables. */ -int auto_lpg_tlb_threshold = 32; -int auto_lpg_minszc = TTE64K; -int auto_lpg_maxszc = TTE64K; -size_t auto_lpg_heap_default = MMU_PAGESIZE64K; -size_t auto_lpg_stack_default = MMU_PAGESIZE64K; -size_t auto_lpg_va_default = MMU_PAGESIZE64K; -size_t auto_lpg_remap_threshold = 0; /* always remap */ -/* - * Number of pages in 1 GB. Don't enable automatic large pages if we have - * fewer than this many pages. - */ -pgcnt_t auto_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); - /* * map_addr_proc() is the routine called when the system is to * choose an address for the user. We will pick an address |