diff options
author | aguzovsk <none@none> | 2006-07-21 21:13:27 -0700 |
---|---|---|
committer | aguzovsk <none@none> | 2006-07-21 21:13:27 -0700 |
commit | 07b65a646252c0f0ec200acf82c256c5bf6883b1 (patch) | |
tree | 376eab52fa782b1e9b5fc1ef1ce725a0a7e10fe2 | |
parent | 33bc63788dbca06354c73b744c41d4781504e89b (diff) | |
download | illumos-gate-07b65a646252c0f0ec200acf82c256c5bf6883b1.tar.gz |
4614772 MPSS to be extended to shared memory
6413095 madvise() freeing up whole pages even when passed sizes smaller than a page
6416714 anon_map_getpages panic during stress test
6428347 multilple pagelocks/softlocks on the same large page should decrement availrmem just once
-rw-r--r-- | usr/src/uts/common/fs/swapfs/swap_vnops.c | 21 | ||||
-rw-r--r-- | usr/src/uts/common/os/grow.c | 9 | ||||
-rw-r--r-- | usr/src/uts/common/os/shm.c | 35 | ||||
-rw-r--r-- | usr/src/uts/common/sys/vmsystm.h | 8 | ||||
-rw-r--r-- | usr/src/uts/common/syscall/memcntl.c | 10 | ||||
-rw-r--r-- | usr/src/uts/common/vm/anon.h | 12 | ||||
-rw-r--r-- | usr/src/uts/common/vm/page.h | 6 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_spt.c | 93 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_vn.c | 671 | ||||
-rw-r--r-- | usr/src/uts/common/vm/seg_vn.h | 11 | ||||
-rw-r--r-- | usr/src/uts/common/vm/vm_anon.c | 311 | ||||
-rw-r--r-- | usr/src/uts/common/vm/vm_as.c | 140 | ||||
-rw-r--r-- | usr/src/uts/common/vm/vm_page.c | 22 | ||||
-rw-r--r-- | usr/src/uts/i86pc/vm/vm_machdep.c | 24 | ||||
-rw-r--r-- | usr/src/uts/sfmmu/vm/hat_sfmmu.c | 2 | ||||
-rw-r--r-- | usr/src/uts/sun4/vm/vm_dep.c | 46 | ||||
-rw-r--r-- | usr/src/uts/sun4u/cpu/us3_cheetah.c | 9 | ||||
-rw-r--r-- | usr/src/uts/sun4u/vm/mach_vm_dep.c | 9 | ||||
-rw-r--r-- | usr/src/uts/sun4v/vm/mach_vm_dep.c | 2 |
19 files changed, 1111 insertions, 330 deletions
diff --git a/usr/src/uts/common/fs/swapfs/swap_vnops.c b/usr/src/uts/common/fs/swapfs/swap_vnops.c index efc547cee7..348392da2c 100644 --- a/usr/src/uts/common/fs/swapfs/swap_vnops.c +++ b/usr/src/uts/common/fs/swapfs/swap_vnops.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -68,8 +67,9 @@ static int swap_getapage(struct vnode *vp, u_offset_t off, size_t len, struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr); int swap_getconpage(struct vnode *vp, u_offset_t off, size_t len, - uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp, spgcnt_t *nreloc, - struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr); + uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp, + uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr, + enum seg_rw rw, struct cred *cr); static int swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off, size_t *lenp, int flags, struct cred *cr); @@ -316,6 +316,7 @@ swap_getconpage( page_t *pl[], size_t plsz, page_t *conpp, + uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr, @@ -362,18 +363,22 @@ swap_getconpage( */ if (pp != conpp) { ASSERT(rw != S_CREATE); + ASSERT(pszc != NULL); ASSERT(PAGE_SHARED(pp)); if (pp->p_szc < conpp->p_szc) { + *pszc = pp->p_szc; page_unlock(pp); err = -1; - } else if (pp->p_szc > conpp->p_szc) { + } else if (pp->p_szc > conpp->p_szc && + seg->s_szc > conpp->p_szc) { + *pszc = MIN(pp->p_szc, seg->s_szc); page_unlock(pp); err = -2; } else { pl[0] = pp; pl[1] = NULL; if (page_pptonum(pp) & - (page_get_pagecnt(pp->p_szc) - 1)) + (page_get_pagecnt(conpp->p_szc) - 1)) cmn_err(CE_PANIC, "swap_getconpage: no root"); } return (err); diff --git a/usr/src/uts/common/os/grow.c b/usr/src/uts/common/os/grow.c index e14ed3f43e..c1e3326c67 100644 --- a/usr/src/uts/common/os/grow.c +++ b/usr/src/uts/common/os/grow.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -587,7 +586,7 @@ zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags, * If the requested address or length are aligned to the selected large * page size, l1 or l3 may also be 0. */ - if (use_zmap_lpg) { + if (use_zmap_lpg && a.type == MAP_PRIVATE) { pgsz = map_pgsz(MAPPGSZ_VA, p, *addrp, len, NULL); if (pgsz <= PAGESIZE || len < pgsz) { diff --git a/usr/src/uts/common/os/shm.c b/usr/src/uts/common/os/shm.c index b39b801a08..a9ea0b59fc 100644 --- a/usr/src/uts/common/os/shm.c +++ b/usr/src/uts/common/os/shm.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -761,6 +760,10 @@ shmdt(caddr_t addr) mutex_exit(&pp->p_lock); return (EINVAL); } + if (sap->sa_addr != addr) { + mutex_exit(&pp->p_lock); + return (EINVAL); + } avl_remove(pp->p_segacct, sap); mutex_exit(&pp->p_lock); @@ -942,13 +945,21 @@ shm_sacompar(const void *x, const void *y) segacct_t *sa1 = (segacct_t *)x; segacct_t *sa2 = (segacct_t *)y; - if (sa1->sa_addr < sa2->sa_addr) + if (sa1->sa_addr < sa2->sa_addr) { return (-1); - if (sa1->sa_addr > sa2->sa_addr) + } else if (sa2->sa_len != 0) { + if (sa1->sa_addr >= sa2->sa_addr + sa2->sa_len) { + return (1); + } else if (sa1->sa_len != 0) { + return (1); + } else { + return (0); + } + } else if (sa1->sa_addr > sa2->sa_addr) { return (1); - if ((sa1->sa_len == 0) || (sa2->sa_len == 0)) + } else { return (0); - return (1); + } } /* @@ -1240,7 +1251,13 @@ shm_rm_amp(struct anon_map *amp, uint_t lckflag) * Free up the anon_map. */ lgrp_shm_policy_fini(amp, NULL); - anon_free(amp->ahp, 0, amp->size); + if (amp->a_szc != 0) { + ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); + anon_shmap_free_pages(amp, 0, amp->size); + ANON_LOCK_EXIT(&->a_rwlock); + } else { + anon_free(amp->ahp, 0, amp->size); + } anon_unresv(amp->swresv); anonmap_free(amp); } diff --git a/usr/src/uts/common/sys/vmsystm.h b/usr/src/uts/common/sys/vmsystm.h index 199753d799..1f0aea0235 100644 --- a/usr/src/uts/common/sys/vmsystm.h +++ b/usr/src/uts/common/sys/vmsystm.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -122,6 +121,7 @@ extern int useracc(void *, size_t, int); extern size_t map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int *remap); extern uint_t map_execseg_pgszcvec(int, caddr_t, size_t); +extern uint_t map_shm_pgszcvec(caddr_t, size_t, uintptr_t); extern void map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags); extern int map_addr_vacalign_check(caddr_t, u_offset_t); diff --git a/usr/src/uts/common/syscall/memcntl.c b/usr/src/uts/common/syscall/memcntl.c index 85f54e9944..6bdf5a1cc2 100644 --- a/usr/src/uts/common/syscall/memcntl.c +++ b/usr/src/uts/common/syscall/memcntl.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -340,6 +339,9 @@ memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask) } return (error); case MC_ADVISE: + if ((uintptr_t)arg == MADV_FREE) { + len &= PAGEMASK; + } switch ((uintptr_t)arg) { case MADV_WILLNEED: fc = as_faulta(as, addr, len); diff --git a/usr/src/uts/common/vm/anon.h b/usr/src/uts/common/vm/anon.h index 97a8b31ca4..294867ca01 100644 --- a/usr/src/uts/common/vm/anon.h +++ b/usr/src/uts/common/vm/anon.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -282,7 +281,7 @@ struct anon_map { struct anon_hdr *ahp; /* anon array header pointer, containing */ /* anon pointer array(s) */ size_t swresv; /* swap space reserved for this anon_map */ - uint_t refcnt; /* reference count on this structure */ + ulong_t refcnt; /* reference count on this structure */ ushort_t a_szc; /* max szc among shared processes */ void *locality; /* lgroup locality info */ }; @@ -362,7 +361,7 @@ extern void anon_disclaim(struct anon_map *, ulong_t, size_t, int); extern int anon_getpage(struct anon **, uint_t *, struct page **, size_t, struct seg *, caddr_t, enum seg_rw, struct cred *); extern int swap_getconpage(struct vnode *, u_offset_t, size_t, - uint_t *, page_t *[], size_t, page_t *, + uint_t *, page_t *[], size_t, page_t *, uint_t *, spgcnt_t *, struct seg *, caddr_t, enum seg_rw, struct cred *); extern int anon_map_getpages(struct anon_map *, ulong_t, @@ -384,6 +383,7 @@ extern int anon_map_createpages(struct anon_map *, ulong_t, extern int anon_map_demotepages(struct anon_map *, ulong_t, struct seg *, caddr_t, uint_t, struct vpage [], struct cred *); +extern void anon_shmap_free_pages(struct anon_map *, ulong_t, size_t); extern int anon_resvmem(size_t, uint_t); extern void anon_unresv(size_t); extern struct anon_map *anonmap_alloc(size_t, size_t); diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h index 38faf2afef..590675b8a9 100644 --- a/usr/src/uts/common/vm/page.h +++ b/usr/src/uts/common/vm/page.h @@ -497,7 +497,7 @@ typedef struct page { #if defined(_LP64) uint_t p_sharepad; /* pad for growing p_share */ #endif - uint_t p_msresv_1; /* reserved for future use */ + uint_t p_slckcnt; /* number of softlocks */ #if defined(__sparc) uint_t p_kpmref; /* number of kpm mapping sharers */ struct kpme *p_kpmelist; /* kpm specific mapping info */ @@ -516,6 +516,10 @@ typedef struct page { typedef page_t devpage_t; #define devpage page +#define PAGE_LOCK_MAXIMUM \ + ((1 << (sizeof (((page_t *)0)->p_lckcnt) * NBBY)) - 1) + +#define PAGE_SLOCK_MAXIMUM UINT_MAX /* * Page hash table is a power-of-two in size, externally chained diff --git a/usr/src/uts/common/vm/seg_spt.c b/usr/src/uts/common/vm/seg_spt.c index 444ac3c153..774a9c3b9f 100644 --- a/usr/src/uts/common/vm/seg_spt.c +++ b/usr/src/uts/common/vm/seg_spt.c @@ -376,6 +376,11 @@ segspt_create(struct seg *seg, caddr_t argsp) struct vnode *vp; page_t **ppa; uint_t hat_flags; + size_t pgsz; + pgcnt_t pgcnt; + caddr_t a; + pgcnt_t pidx; + size_t sz; /* * We are holding the a_lock on the underlying dummy as, @@ -419,7 +424,9 @@ segspt_create(struct seg *seg, caddr_t argsp) seg->s_szc = sptcargs->szc; ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); - amp->a_szc = seg->s_szc; + if (seg->s_szc > amp->a_szc) { + amp->a_szc = seg->s_szc; + } ANON_LOCK_EXIT(&->a_rwlock); /* @@ -506,8 +513,20 @@ segspt_create(struct seg *seg, caddr_t argsp) if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, NULL)) hat_flags |= HAT_LOAD_LOCK; - hat_memload_array(seg->s_as->a_hat, addr, ptob(npages), - ppa, sptd->spt_prot, hat_flags); + /* + * Load translations one lare page at a time + * to make sure we don't create mappings bigger than + * segment's size code in case underlying pages + * are shared with segvn's segment that uses bigger + * size code than we do. + */ + pgsz = page_get_pagesize(seg->s_szc); + pgcnt = page_get_pagecnt(seg->s_szc); + for (a = addr, pidx = 0; pidx < npages; a += pgsz, pidx += pgcnt) { + sz = MIN(pgsz, ptob(npages - pidx)); + hat_memload_array(seg->s_as->a_hat, a, sz, + &ppa[pidx], sptd->spt_prot, hat_flags); + } /* * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP, @@ -1701,13 +1720,17 @@ segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr, struct as *curspt = shmd->shm_sptas; struct spt_data *sptd = sptseg->s_data; pgcnt_t npages; - size_t share_sz, size; + size_t size; caddr_t segspt_addr, shm_addr; page_t **ppa; int i; ulong_t an_idx = 0; int err = 0; int dyn_ism_unmap = hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0); + size_t pgsz; + pgcnt_t pgcnt; + caddr_t a; + pgcnt_t pidx; #ifdef lint hat = hat; @@ -1740,9 +1763,10 @@ segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr, * layer by calling hat_memload_array() with differing page sizes * over a given virtual range. */ - share_sz = page_get_pagesize(sptseg->s_szc); - shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_sz); - size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), share_sz); + pgsz = page_get_pagesize(sptseg->s_szc); + pgcnt = page_get_pagecnt(sptseg->s_szc); + shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz); + size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), pgsz); npages = btopr(size); /* @@ -1792,15 +1816,19 @@ segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr, goto dism_err; } AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER); + a = segspt_addr; + pidx = 0; if (type == F_SOFTLOCK) { /* * Load up the translation keeping it * locked and don't unlock the page. */ - hat_memload_array(sptseg->s_as->a_hat, segspt_addr, - size, ppa, sptd->spt_prot, - HAT_LOAD_LOCK | HAT_LOAD_SHARE); + for (; pidx < npages; a += pgsz, pidx += pgcnt) { + hat_memload_array(sptseg->s_as->a_hat, + a, pgsz, &ppa[pidx], sptd->spt_prot, + HAT_LOAD_LOCK | HAT_LOAD_SHARE); + } } else { if (hat == seg->s_as->a_hat) { @@ -1812,9 +1840,13 @@ segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr, npages); /* CPU HAT */ - hat_memload_array(sptseg->s_as->a_hat, - segspt_addr, size, ppa, sptd->spt_prot, - HAT_LOAD_SHARE); + for (; pidx < npages; + a += pgsz, pidx += pgcnt) { + hat_memload_array(sptseg->s_as->a_hat, + a, pgsz, &ppa[pidx], + sptd->spt_prot, + HAT_LOAD_SHARE); + } } else { /* XHAT. Pass real address */ hat_memload_array(hat, shm_addr, @@ -1896,7 +1928,7 @@ segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr, struct as *curspt = shmd->shm_sptas; struct spt_data *sptd = sptseg->s_data; pgcnt_t npages; - size_t share_size, size; + size_t size; caddr_t sptseg_addr, shm_addr; page_t *pp, **ppa; int i; @@ -1906,6 +1938,11 @@ segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr, struct anon_map *amp; /* XXX - for locknest */ struct anon *ap = NULL; anon_sync_obj_t cookie; + size_t pgsz; + pgcnt_t pgcnt; + caddr_t a; + pgcnt_t pidx; + size_t sz; #ifdef lint hat = hat; @@ -1943,9 +1980,10 @@ segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr, * layer by calling hat_memload_array() with differing page sizes * over a given virtual range. */ - share_size = page_get_pagesize(sptseg->s_szc); - shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_size); - size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), share_size); + pgsz = page_get_pagesize(sptseg->s_szc); + pgcnt = page_get_pagecnt(sptseg->s_szc); + shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), pgsz); + size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), pgsz); npages = btopr(size); /* @@ -2045,14 +2083,19 @@ segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr, * underlying HAT layer. */ AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER); + a = sptseg_addr; + pidx = 0; if (type == F_SOFTLOCK) { /* * Load up the translation keeping it * locked and don't unlock the page. */ - hat_memload_array(sptseg->s_as->a_hat, sptseg_addr, - ptob(npages), ppa, sptd->spt_prot, - HAT_LOAD_LOCK | HAT_LOAD_SHARE); + for (; pidx < npages; a += pgsz, pidx += pgcnt) { + sz = MIN(pgsz, ptob(npages - pidx)); + hat_memload_array(sptseg->s_as->a_hat, a, + sz, &ppa[pidx], sptd->spt_prot, + HAT_LOAD_LOCK | HAT_LOAD_SHARE); + } } else { if (hat == seg->s_as->a_hat) { @@ -2064,9 +2107,13 @@ segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr, npages); /* CPU HAT */ - hat_memload_array(sptseg->s_as->a_hat, - sptseg_addr, ptob(npages), ppa, - sptd->spt_prot, HAT_LOAD_SHARE); + for (; pidx < npages; + a += pgsz, pidx += pgcnt) { + sz = MIN(pgsz, ptob(npages - pidx)); + hat_memload_array(sptseg->s_as->a_hat, + a, sz, &ppa[pidx], + sptd->spt_prot, HAT_LOAD_SHARE); + } } else { /* XHAT. Pass real address */ hat_memload_array(hat, shm_addr, diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c index 2f8b6fbcd1..6c26b2d49c 100644 --- a/usr/src/uts/common/vm/seg_vn.c +++ b/usr/src/uts/common/vm/seg_vn.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -176,7 +175,7 @@ static faultcode_t segvn_fault_anonpages(struct hat *, struct seg *, caddr_t, caddr_t, enum fault_type, enum seg_rw, caddr_t, caddr_t, int); static faultcode_t segvn_faultpage(struct hat *, struct seg *, caddr_t, u_offset_t, struct vpage *, page_t **, uint_t, - enum fault_type, enum seg_rw, int); + enum fault_type, enum seg_rw, int, int); static void segvn_vpage(struct seg *); static void segvn_purge(struct seg *seg); @@ -185,12 +184,15 @@ static int segvn_reclaim(struct seg *, caddr_t, size_t, struct page **, static int sameprot(struct seg *, caddr_t, size_t); -static int segvn_demote_range(struct seg *, caddr_t, size_t, int); +static int segvn_demote_range(struct seg *, caddr_t, size_t, int, uint_t); static int segvn_clrszc(struct seg *); static struct seg *segvn_split_seg(struct seg *, caddr_t); static int segvn_claim_pages(struct seg *, struct vpage *, u_offset_t, ulong_t, uint_t); +static int segvn_pp_lock_anonpages(page_t *, int); +static void segvn_pp_unlock_anonpages(page_t *, int); + static struct kmem_cache *segvn_cache; #ifdef VM_STATS @@ -272,6 +274,7 @@ ulong_t segvn_vmpss_clrszc_err; ulong_t segvn_fltvnpages_clrszc_cnt; ulong_t segvn_fltvnpages_clrszc_err; ulong_t segvn_setpgsz_align_err; +ulong_t segvn_setpgsz_anon_align_err; ulong_t segvn_setpgsz_getattr_err; ulong_t segvn_setpgsz_eof_err; ulong_t segvn_faultvnmpss_align_err1; @@ -388,8 +391,8 @@ segvn_create(struct seg *seg, void *argsp) a->flags &= ~MAP_NORESERVE; if (a->szc != 0) { - if (segvn_lpg_disable != 0 || a->amp != NULL || - (a->type == MAP_SHARED && a->vp == NULL) || + if (segvn_lpg_disable != 0 || + (a->amp != NULL && a->type == MAP_PRIVATE) || (a->flags & MAP_NORESERVE) || seg->s_as == &kas) { a->szc = 0; } else { @@ -412,6 +415,12 @@ segvn_create(struct seg *seg, void *argsp) a->offset & PAGEMASK)) { a->szc = 0; } + } else if (a->amp != NULL) { + pgcnt_t anum = btopr(a->offset); + pgcnt_t pgcnt = page_get_pagecnt(a->szc); + if (!IS_P2ALIGNED(anum, pgcnt)) { + a->szc = 0; + } } } } @@ -657,6 +666,9 @@ segvn_create(struct seg *seg, void *argsp) */ ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); amp->refcnt++; + if (a->szc > amp->a_szc) { + amp->a_szc = a->szc; + } ANON_LOCK_EXIT(&->a_rwlock); svd->anon_index = anon_num; svd->swresv = 0; @@ -754,10 +766,10 @@ segvn_create(struct seg *seg, void *argsp) * Concatenate two existing segments, if possible. * Return 0 on success, -1 if two segments are not compatible * or -2 on memory allocation failure. - * If private == 1 then try and concat segments with private pages. + * If amp_cat == 1 then try and concat segments with anon maps */ static int -segvn_concat(struct seg *seg1, struct seg *seg2, int private) +segvn_concat(struct seg *seg1, struct seg *seg2, int amp_cat) { struct segvn_data *svd1 = seg1->s_data; struct segvn_data *svd2 = seg2->s_data; @@ -793,13 +805,23 @@ segvn_concat(struct seg *seg1, struct seg *seg2, int private) /* * Fail early if we're not supposed to concatenate - * private pages. + * segments with non NULL amp. */ - if ((private == 0 || svd1->type != MAP_PRIVATE) && - (amp1 != NULL || amp2 != NULL)) { + if (amp_cat == 0 && (amp1 != NULL || amp2 != NULL)) { return (-1); } + if (svd1->vp == NULL && svd1->type == MAP_SHARED) { + if (amp1 != amp2) { + return (-1); + } + if (amp1 != NULL && svd1->anon_index + btop(seg1->s_size) != + svd2->anon_index) { + return (-1); + } + ASSERT(amp1 == NULL || amp1->refcnt >= 2); + } + /* * If either seg has vpages, create a new merged vpage array. */ @@ -840,13 +862,24 @@ segvn_concat(struct seg *seg1, struct seg *seg2, int private) /* * If either segment has private pages, create a new merged anon - * array. + * array. If mergeing shared anon segments just decrement anon map's + * refcnt. */ - if (amp1 != NULL || amp2 != NULL) { + if (amp1 != NULL && svd1->type == MAP_SHARED) { + ASSERT(amp1 == amp2 && svd1->vp == NULL); + ANON_LOCK_ENTER(&1->a_rwlock, RW_WRITER); + ASSERT(amp1->refcnt >= 2); + amp1->refcnt--; + ANON_LOCK_EXIT(&1->a_rwlock); + svd2->amp = NULL; + } else if (amp1 != NULL || amp2 != NULL) { struct anon_hdr *nahp; struct anon_map *namp = NULL; - size_t asize = seg1->s_size + seg2->s_size; + size_t asize; + + ASSERT(svd1->type == MAP_PRIVATE); + asize = seg1->s_size + seg2->s_size; if ((nahp = anon_create(btop(asize), ANON_NOSLEEP)) == NULL) { if (nvpage != NULL) { kmem_free(nvpage, nvpsize); @@ -1442,7 +1475,7 @@ retry: if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(len, pgsz)) { ASSERT(seg->s_base != addr || seg->s_size != len); VM_STAT_ADD(segvnvmstats.demoterange[0]); - err = segvn_demote_range(seg, addr, len, SDR_END); + err = segvn_demote_range(seg, addr, len, SDR_END, 0); if (err == 0) { return (IE_RETRY); } @@ -1490,6 +1523,7 @@ retry: dpages = btop(len); npages = opages - dpages; amp = svd->amp; + ASSERT(amp == NULL || amp->a_szc >= seg->s_szc); /* * Check for beginning of segment @@ -1514,17 +1548,27 @@ retry: /* * Free up now unused parts of anon_map array. */ - if (seg->s_szc != 0) { - anon_free_pages(amp->ahp, - svd->anon_index, len, seg->s_szc); + if (amp->a_szc == seg->s_szc) { + if (seg->s_szc != 0) { + anon_free_pages(amp->ahp, + svd->anon_index, len, + seg->s_szc); + } else { + anon_free(amp->ahp, + svd->anon_index, + len); + } } else { - anon_free(amp->ahp, svd->anon_index, - len); + ASSERT(svd->type == MAP_SHARED); + ASSERT(amp->a_szc > seg->s_szc); + anon_shmap_free_pages(amp, + svd->anon_index, len); } /* - * Unreserve swap space for the unmapped chunk - * of this segment in case it's MAP_SHARED + * Unreserve swap space for the + * unmapped chunk of this segment in + * case it's MAP_SHARED */ if (svd->type == MAP_SHARED) { anon_unresv(len); @@ -1580,20 +1624,29 @@ retry: ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); if (amp->refcnt == 1 || svd->type == MAP_PRIVATE) { /* - * Free up now unused parts of anon_map array + * Free up now unused parts of anon_map array. */ - if (seg->s_szc != 0) { - ulong_t an_idx = svd->anon_index + - npages; - anon_free_pages(amp->ahp, an_idx, - len, seg->s_szc); + ulong_t an_idx = svd->anon_index + npages; + if (amp->a_szc == seg->s_szc) { + if (seg->s_szc != 0) { + anon_free_pages(amp->ahp, + an_idx, len, + seg->s_szc); + } else { + anon_free(amp->ahp, an_idx, + len); + } } else { - anon_free(amp->ahp, - svd->anon_index + npages, len); + ASSERT(svd->type == MAP_SHARED); + ASSERT(amp->a_szc > seg->s_szc); + anon_shmap_free_pages(amp, + an_idx, len); } + /* - * Unreserve swap space for the unmapped chunk - * of this segment in case it's MAP_SHARED + * Unreserve swap space for the + * unmapped chunk of this segment in + * case it's MAP_SHARED */ if (svd->type == MAP_SHARED) { anon_unresv(len); @@ -1689,31 +1742,36 @@ retry: ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); if (amp->refcnt == 1 || svd->type == MAP_PRIVATE) { /* - * Free up now unused parts of anon_map array + * Free up now unused parts of anon_map array. */ - if (seg->s_szc != 0) { - ulong_t an_idx = svd->anon_index + opages; - anon_free_pages(amp->ahp, an_idx, len, - seg->s_szc); + ulong_t an_idx = svd->anon_index + opages; + if (amp->a_szc == seg->s_szc) { + if (seg->s_szc != 0) { + anon_free_pages(amp->ahp, an_idx, len, + seg->s_szc); + } else { + anon_free(amp->ahp, an_idx, + len); + } } else { - anon_free(amp->ahp, svd->anon_index + opages, - len); + ASSERT(svd->type == MAP_SHARED); + ASSERT(amp->a_szc > seg->s_szc); + anon_shmap_free_pages(amp, an_idx, len); } /* - * Unreserve swap space for the unmapped chunk - * of this segment in case it's MAP_SHARED + * Unreserve swap space for the + * unmapped chunk of this segment in + * case it's MAP_SHARED */ if (svd->type == MAP_SHARED) { anon_unresv(len); amp->swresv -= len; } } - nsvd->anon_index = svd->anon_index + btop((uintptr_t)(nseg->s_base - seg->s_base)); if (svd->type == MAP_SHARED) { - ASSERT(seg->s_szc == 0); amp->refcnt++; nsvd->amp = amp; } else { @@ -1799,6 +1857,7 @@ segvn_free(struct seg *seg) * up all the anon slot pointers that we can. */ ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); + ASSERT(amp->a_szc >= seg->s_szc); if (--amp->refcnt == 0) { if (svd->type == MAP_PRIVATE) { /* @@ -1819,8 +1878,12 @@ segvn_free(struct seg *seg) * anon_map's worth of stuff and * release any swap reservation. */ - ASSERT(seg->s_szc == 0); - anon_free(amp->ahp, 0, amp->size); + if (amp->a_szc != 0) { + anon_shmap_free_pages(amp, 0, + amp->size); + } else { + anon_free(amp->ahp, 0, amp->size); + } if ((len = amp->swresv) != 0) { anon_unresv(len); TRACE_3(TR_FAC_VM, TR_ANON_PROC, @@ -1876,6 +1939,140 @@ segvn_free(struct seg *seg) kmem_cache_free(segvn_cache, svd); } +ulong_t segvn_lpglck_limit = 0; +/* + * Support routines used by segvn_pagelock() and softlock faults for anonymous + * pages to implement availrmem accounting in a way that makes sure the + * same memory is accounted just once for all softlock/pagelock purposes. + * This prevents a bug when availrmem is quickly incorrectly exausted from + * several pagelocks to different parts of the same large page since each + * pagelock has to decrement availrmem by the size of the entire large + * page. Note those pages are not COW shared until softunlock/pageunlock so + * we don't need to use cow style accounting here. We also need to make sure + * the entire large page is accounted even if softlock range is less than the + * entire large page because large anon pages can't be demoted when any of + * constituent pages is locked. The caller calls this routine for every page_t + * it locks. The very first page in the range may not be the root page of a + * large page. For all other pages it's guranteed we are going to visit the + * root of a particular large page before any other constituent page as we are + * locking sequential pages belonging to the same anon map. So we do all the + * locking when the root is encountered except for the very first page. Since + * softlocking is not supported (except S_READ_NOCOW special case) for vmpss + * segments and since vnode pages can be demoted without locking all + * constituent pages vnode pages don't come here. Unlocking relies on the + * fact that pagesize can't change whenever any of constituent large pages is + * locked at least SE_SHARED. This allows unlocking code to find the right + * root and decrement availrmem by the same amount it was incremented when the + * page was locked. + */ +static int +segvn_pp_lock_anonpages(page_t *pp, int first) +{ + pgcnt_t pages; + pfn_t pfn; + uchar_t szc = pp->p_szc; + + ASSERT(PAGE_LOCKED(pp)); + ASSERT(pp->p_vnode != NULL); + ASSERT(IS_SWAPFSVP(pp->p_vnode)); + + /* + * pagesize won't change as long as any constituent page is locked. + */ + pages = page_get_pagecnt(pp->p_szc); + pfn = page_pptonum(pp); + + if (!first) { + if (!IS_P2ALIGNED(pfn, pages)) { +#ifdef DEBUG + pp = &pp[-(spgcnt_t)(pfn & (pages - 1))]; + pfn = page_pptonum(pp); + ASSERT(IS_P2ALIGNED(pfn, pages)); + ASSERT(pp->p_szc == szc); + ASSERT(pp->p_vnode != NULL); + ASSERT(IS_SWAPFSVP(pp->p_vnode)); + ASSERT(pp->p_slckcnt != 0); +#endif /* DEBUG */ + return (1); + } + } else if (!IS_P2ALIGNED(pfn, pages)) { + pp = &pp[-(spgcnt_t)(pfn & (pages - 1))]; +#ifdef DEBUG + pfn = page_pptonum(pp); + ASSERT(IS_P2ALIGNED(pfn, pages)); + ASSERT(pp->p_szc == szc); + ASSERT(pp->p_vnode != NULL); + ASSERT(IS_SWAPFSVP(pp->p_vnode)); +#endif /* DEBUG */ + } + + /* + * pp is a root page. + * We haven't locked this large page yet. + */ + page_struct_lock(pp); + if (pp->p_slckcnt != 0) { + if (pp->p_slckcnt < PAGE_SLOCK_MAXIMUM) { + pp->p_slckcnt++; + page_struct_unlock(pp); + return (1); + } + page_struct_unlock(pp); + segvn_lpglck_limit++; + return (0); + } + mutex_enter(&freemem_lock); + if (availrmem < tune.t_minarmem + pages) { + mutex_exit(&freemem_lock); + page_struct_unlock(pp); + return (0); + } + pp->p_slckcnt++; + availrmem -= pages; + mutex_exit(&freemem_lock); + page_struct_unlock(pp); + return (1); +} + +static void +segvn_pp_unlock_anonpages(page_t *pp, int first) +{ + pgcnt_t pages; + pfn_t pfn; + + ASSERT(PAGE_LOCKED(pp)); + ASSERT(pp->p_vnode != NULL); + ASSERT(IS_SWAPFSVP(pp->p_vnode)); + + /* + * pagesize won't change as long as any constituent page is locked. + */ + pages = page_get_pagecnt(pp->p_szc); + pfn = page_pptonum(pp); + + if (!first) { + if (!IS_P2ALIGNED(pfn, pages)) { + return; + } + } else if (!IS_P2ALIGNED(pfn, pages)) { + pp = &pp[-(spgcnt_t)(pfn & (pages - 1))]; +#ifdef DEBUG + pfn = page_pptonum(pp); + ASSERT(IS_P2ALIGNED(pfn, pages)); +#endif /* DEBUG */ + } + ASSERT(pp->p_vnode != NULL); + ASSERT(IS_SWAPFSVP(pp->p_vnode)); + ASSERT(pp->p_slckcnt != 0); + page_struct_lock(pp); + if (--pp->p_slckcnt == 0) { + mutex_enter(&freemem_lock); + availrmem += pages; + mutex_exit(&freemem_lock); + } + page_struct_unlock(pp); +} + /* * Do a F_SOFTUNLOCK call over the range requested. The range must have * already been F_SOFTLOCK'ed. @@ -1943,10 +2140,15 @@ segvn_softunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw) } TRACE_3(TR_FAC_VM, TR_SEGVN_FAULT, "segvn_fault:pp %p vp %p offset %llx", pp, vp, offset); + if (svd->vp == NULL) { + segvn_pp_unlock_anonpages(pp, adr == addr); + } page_unlock(pp); } mutex_enter(&freemem_lock); /* for availrmem */ - availrmem += btop(len); + if (svd->vp != NULL) { + availrmem += btop(len); + } segvn_pages_locked -= btop(len); svd->softlockcnt -= btop(len); mutex_exit(&freemem_lock); @@ -2028,7 +2230,8 @@ segvn_faultpage( uint_t vpprot, /* access allowed to object pages */ enum fault_type type, /* type of fault */ enum seg_rw rw, /* type of access at fault */ - int brkcow) /* we may need to break cow */ + int brkcow, /* we may need to break cow */ + int first) /* first page for this fault if 1 */ { struct segvn_data *svd = (struct segvn_data *)seg->s_data; page_t *pp, **ppp; @@ -2084,14 +2287,14 @@ segvn_faultpage( prot = svd->prot; } - if (type == F_SOFTLOCK) { + if (type == F_SOFTLOCK && svd->vp != NULL) { mutex_enter(&freemem_lock); if (availrmem <= tune.t_minarmem) { mutex_exit(&freemem_lock); return (FC_MAKE_ERR(ENOMEM)); /* out of real memory */ } else { - svd->softlockcnt++; availrmem--; + svd->softlockcnt++; segvn_pages_locked++; } mutex_exit(&freemem_lock); @@ -2134,6 +2337,21 @@ segvn_faultpage( */ (void) anon_set_ptr(amp->ahp, anon_index, ap, ANON_SLEEP); + + ASSERT(pp->p_szc == 0); + if (type == F_SOFTLOCK) { + if (!segvn_pp_lock_anonpages(pp, first)) { + page_unlock(pp); + err = ENOMEM; + goto out; + } else { + mutex_enter(&freemem_lock); + svd->softlockcnt++; + segvn_pages_locked++; + mutex_exit(&freemem_lock); + } + } + if (enable_mbit_wa) { if (rw == S_WRITE) hat_setmod(pp); @@ -2263,6 +2481,23 @@ segvn_faultpage( * and return. */ if (cow == 0) { + if (type == F_SOFTLOCK && svd->vp == NULL) { + + ASSERT(opp->p_szc == 0 || + (svd->type == MAP_SHARED && + amp != NULL && amp->a_szc != 0)); + + if (!segvn_pp_lock_anonpages(opp, first)) { + page_unlock(opp); + err = ENOMEM; + goto out; + } else { + mutex_enter(&freemem_lock); + svd->softlockcnt++; + segvn_pages_locked++; + mutex_exit(&freemem_lock); + } + } if (IS_VMODSORT(opp->p_vnode) || enable_mbit_wa) { if (rw == S_WRITE) hat_setmod(opp); @@ -2380,6 +2615,20 @@ segvn_faultpage( (void) anon_set_ptr(amp->ahp, anon_index, ap, ANON_SLEEP); + ASSERT(pp->p_szc == 0); + if (type == F_SOFTLOCK && svd->vp == NULL) { + if (!segvn_pp_lock_anonpages(pp, first)) { + page_unlock(pp); + err = ENOMEM; + goto out; + } else { + mutex_enter(&freemem_lock); + svd->softlockcnt++; + segvn_pages_locked++; + mutex_exit(&freemem_lock); + } + } + ASSERT(!IS_VMODSORT(pp->p_vnode)); if (enable_mbit_wa) { if (rw == S_WRITE) @@ -2406,7 +2655,7 @@ out: if (anon_lock) anon_array_exit(&cookie); - if (type == F_SOFTLOCK) { + if (type == F_SOFTLOCK && svd->vp != NULL) { mutex_enter(&freemem_lock); availrmem++; segvn_pages_locked--; @@ -3660,9 +3909,17 @@ segvn_fault_vnodepages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, } SEGVN_UPDATE_MODBITS(ppa, pages, rw, prot, vpprot); - for (i = 0; i < pages; i++) { - hat_memload(hat, a + (i << PAGESHIFT), - ppa[i], prot & vpprot, hat_flag); + if (upgrdfail && segvn_anypgsz_vnode) { + /* SOFTLOCK case */ + hat_memload_array(hat, a, pgsz, + ppa, prot & vpprot, hat_flag); + } else { + for (i = 0; i < pages; i++) { + hat_memload(hat, + a + (i << PAGESHIFT), + ppa[i], prot & vpprot, + hat_flag); + } } if (!(hat_flag & HAT_LOAD_LOCK)) { for (i = 0; i < pages; i++) { @@ -3942,16 +4199,18 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, faultcode_t err; int ierr; uint_t protchk, prot, vpprot; - int i; + ulong_t i; int hat_flag = (type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD; anon_sync_obj_t cookie; + int first = 1; + int adjszc_chk; + int purged = 0; ASSERT(szc != 0); ASSERT(amp != NULL); ASSERT(enable_mbit_wa == 0); /* no mbit simulations with large pages */ ASSERT(!(svd->flags & MAP_NORESERVE)); ASSERT(type != F_SOFTUNLOCK); - ASSERT(segtype == MAP_PRIVATE); ASSERT(IS_P2ALIGNED(a, maxpgsz)); ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock)); @@ -3988,6 +4247,7 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, ppa = kmem_alloc(ppasize, KM_SLEEP); ANON_LOCK_ENTER(&->a_rwlock, RW_READER); for (;;) { + adjszc_chk = 0; for (; a < lpgeaddr; a += pgsz, aindx += pages) { if (svd->pageprot != 0 && IS_P2ALIGNED(a, maxpgsz)) { VM_STAT_ADD(segvnvmstats.fltanpages[3]); @@ -3999,7 +4259,17 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, goto error; } } - if (type == F_SOFTLOCK) { + if (adjszc_chk && IS_P2ALIGNED(a, maxpgsz) && + pgsz < maxpgsz) { + ASSERT(a > lpgaddr); + szc = seg->s_szc; + pgsz = maxpgsz; + pages = btop(pgsz); + ASSERT(IS_P2ALIGNED(aindx, pages)); + lpgeaddr = (caddr_t)P2ROUNDUP((uintptr_t)eaddr, + pgsz); + } + if (type == F_SOFTLOCK && svd->vp != NULL) { mutex_enter(&freemem_lock); if (availrmem < tune.t_minarmem + pages) { mutex_exit(&freemem_lock); @@ -4020,7 +4290,7 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, if (ierr != 0) { anon_array_exit(&cookie); VM_STAT_ADD(segvnvmstats.fltanpages[4]); - if (type == F_SOFTLOCK) { + if (type == F_SOFTLOCK && svd->vp != NULL) { VM_STAT_ADD(segvnvmstats.fltanpages[5]); mutex_enter(&freemem_lock); availrmem += pages; @@ -4038,12 +4308,41 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, ASSERT(!IS_VMODSORT(ppa[0]->p_vnode)); + ASSERT(segtype == MAP_SHARED || + ppa[0]->p_szc <= szc); + ASSERT(segtype == MAP_PRIVATE || + ppa[0]->p_szc >= szc); + + if (type == F_SOFTLOCK && svd->vp == NULL) { + /* + * All pages in ppa array belong to the same + * large page. This means it's ok to call + * segvn_pp_lock_anonpages just for ppa[0]. + */ + if (!segvn_pp_lock_anonpages(ppa[0], first)) { + for (i = 0; i < pages; i++) { + page_unlock(ppa[i]); + } + err = FC_MAKE_ERR(ENOMEM); + goto error; + } + first = 0; + mutex_enter(&freemem_lock); + svd->softlockcnt += pages; + segvn_pages_locked += pages; + mutex_exit(&freemem_lock); + } + /* * Handle pages that have been marked for migration */ if (lgrp_optimizations()) page_migrate(seg, a, ppa, pages); + if (segtype == MAP_SHARED) { + vpprot |= PROT_WRITE; + } + hat_memload_array(hat, a, pgsz, ppa, prot & vpprot, hat_flag); @@ -4058,6 +4357,7 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, vpage += pages; anon_array_exit(&cookie); + adjszc_chk = 1; } if (a == lpgeaddr) break; @@ -4078,6 +4378,18 @@ segvn_fault_anonpages(struct hat *hat, struct seg *seg, caddr_t lpgaddr, * have relocated locked pages. */ ASSERT(ierr == -1 || ierr == -2); + /* + * For the very first relocation failure try to purge this + * segment's cache so that the relocator can obtain an + * exclusive lock on pages we want to relocate. + */ + if (!purged && ierr == -1 && ppa_szc != (uint_t)-1 && + svd->softlockcnt != 0) { + purged = 1; + segvn_purge(seg); + continue; + } + if (segvn_anypgsz) { ASSERT(ierr == -2 || szc != 0); ASSERT(ierr == -1 || szc < seg->s_szc); @@ -4377,15 +4689,8 @@ top: if (seg->s_szc != 0) { pgsz = page_get_pagesize(seg->s_szc); ASSERT(SEGVN_LOCK_HELD(seg->s_as, &svd->lock)); - /* - * We may need to do relocations so purge seg_pcache to allow - * pages to be locked exclusively. - */ - if (svd->softlockcnt != 0) - segvn_purge(seg); CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr); if (svd->vp == NULL) { - ASSERT(svd->type == MAP_PRIVATE); err = segvn_fault_anonpages(hat, seg, lpgaddr, lpgeaddr, type, rw, addr, addr + len, brkcow); } else { @@ -4704,13 +5009,14 @@ slow: */ for (a = addr; a < addr + len; a += PAGESIZE, off += PAGESIZE) { err = segvn_faultpage(hat, seg, a, off, vpage, plp, vpprot, - type, rw, brkcow); + type, rw, brkcow, a == addr); if (err) { if (amp != NULL) ANON_LOCK_EXIT(&->a_rwlock); - if (type == F_SOFTLOCK && a > addr) + if (type == F_SOFTLOCK && a > addr) { segvn_softunlock(seg, addr, (a - addr), S_OTHER); + } SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); segvn_pagelist_rele(plp); if (pl_alloc_sz) @@ -4938,7 +5244,15 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) if (AS_READ_HELD(seg->s_as, &seg->s_as->a_lock)) return (IE_RETRY); VM_STAT_ADD(segvnvmstats.demoterange[1]); - err = segvn_demote_range(seg, addr, len, SDR_END); + if (svd->type == MAP_PRIVATE || svd->vp != NULL) { + err = segvn_demote_range(seg, addr, len, + SDR_END, 0); + } else { + uint_t szcvec = map_shm_pgszcvec(seg->s_base, + pgsz, (uintptr_t)seg->s_base); + err = segvn_demote_range(seg, addr, len, + SDR_END, szcvec); + } if (err == 0) return (IE_RETRY); if (err == ENOMEM) @@ -4993,7 +5307,7 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) return (0); /* all done */ } svd->prot = (uchar_t)prot; - } else { + } else if (svd->type == MAP_PRIVATE) { struct anon *ap = NULL; page_t *pp; u_offset_t offset, off; @@ -5026,10 +5340,7 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) */ for (svp = &svd->vpage[seg_page(seg, addr)]; svp < evp; svp++) { - ASSERT(seg->s_szc == 0 || - (svd->vp != NULL || svd->type == MAP_PRIVATE)); - - if (seg->s_szc != 0 && svd->type == MAP_PRIVATE) { + if (seg->s_szc != 0) { if (amp != NULL) { anon_array_enter(amp, anon_idx, &cookie); @@ -5054,8 +5365,7 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) } if (VPP_ISPPLOCK(svp) && - (VPP_PROT(svp) != prot) && - (svd->type == MAP_PRIVATE)) { + VPP_PROT(svp) != prot) { if (amp == NULL || ap == NULL) { vp = svd->vp; @@ -5109,9 +5419,17 @@ segvn_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); return (IE_NOMEM); } + } else { + segvn_vpage(seg); + evp = &svd->vpage[seg_page(seg, addr + len)]; + for (svp = &svd->vpage[seg_page(seg, addr)]; svp < evp; svp++) { + VPP_SETPROT(svp, prot); + } } - if ((prot & PROT_WRITE) != 0 || (prot & ~PROT_USER) == PROT_NONE) { + if (((prot & PROT_WRITE) != 0 && + (svd->vp != NULL || svd->type == MAP_PRIVATE)) || + (prot & ~PROT_USER) == PROT_NONE) { /* * Either private or shared data with write access (in * which case we need to throw out all former translations @@ -5152,6 +5470,7 @@ segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) struct seg *nseg; caddr_t eaddr = addr + len, a; size_t pgsz = page_get_pagesize(szc); + pgcnt_t pgcnt = page_get_pagecnt(szc); int err; u_offset_t off = svd->offset + (uintptr_t)(addr - seg->s_base); extern struct vnode kvp; @@ -5178,8 +5497,16 @@ segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) return (EINVAL); } - if ((svd->vp == NULL && svd->type == MAP_SHARED) || - (svd->flags & MAP_NORESERVE) || seg->s_as == &kas || + if (amp != NULL && svd->type == MAP_SHARED) { + ulong_t an_idx = svd->anon_index + seg_page(seg, addr); + if (!IS_P2ALIGNED(an_idx, pgcnt)) { + + segvn_setpgsz_anon_align_err++; + return (EINVAL); + } + } + + if ((svd->flags & MAP_NORESERVE) || seg->s_as == &kas || szc > segvn_maxpgszc) { return (EINVAL); } @@ -5237,7 +5564,7 @@ segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) if (addr != seg->s_base || eaddr != (seg->s_base + seg->s_size)) { if (szc < seg->s_szc) { VM_STAT_ADD(segvnvmstats.demoterange[2]); - err = segvn_demote_range(seg, addr, len, SDR_RANGE); + err = segvn_demote_range(seg, addr, len, SDR_RANGE, 0); if (err == 0) { return (IE_RETRY); } @@ -5313,10 +5640,11 @@ segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) * new szc. */ if (amp != NULL) { - pgcnt_t pgcnt = pgsz >> PAGESHIFT; if (!IS_P2ALIGNED(svd->anon_index, pgcnt)) { struct anon_hdr *nahp; + ASSERT(svd->type == MAP_PRIVATE); + ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); ASSERT(amp->refcnt == 1); nahp = anon_create(btop(amp->size), ANON_NOSLEEP); @@ -5371,7 +5699,11 @@ segvn_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) if (amp != NULL) { ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); - amp->a_szc = szc; + if (svd->type == MAP_PRIVATE) { + amp->a_szc = szc; + } else if (szc > amp->a_szc) { + amp->a_szc = szc; + } ANON_LOCK_EXIT(&->a_rwlock); } @@ -5399,8 +5731,6 @@ segvn_clrszc(struct seg *seg) ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock) || SEGVN_WRITE_HELD(seg->s_as, &svd->lock)); - ASSERT(svd->type == MAP_PRIVATE || - (vp != NULL && svd->amp == NULL)); if (vp == NULL && amp == NULL) { seg->s_szc = 0; @@ -5415,7 +5745,7 @@ segvn_clrszc(struct seg *seg) hat_unload(seg->s_as->a_hat, seg->s_base, seg->s_size, HAT_UNLOAD_UNMAP); - if (amp == NULL) { + if (amp == NULL || svd->type == MAP_SHARED) { seg->s_szc = 0; return (0); } @@ -5575,7 +5905,6 @@ segvn_split_seg(struct seg *seg, caddr_t addr) struct segvn_data *nsvd; ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); - ASSERT(svd->type == MAP_PRIVATE || svd->amp == NULL); ASSERT(addr >= seg->s_base); ASSERT(addr <= seg->s_base + seg->s_size); @@ -5628,7 +5957,7 @@ segvn_split_seg(struct seg *seg, caddr_t addr) bcopy(ovpage + seg_pages(seg), nsvd->vpage, nbytes); kmem_free(ovpage, bytes + nbytes); } - if (svd->amp != NULL) { + if (svd->amp != NULL && svd->type == MAP_PRIVATE) { struct anon_map *oamp = svd->amp, *namp; struct anon_hdr *nahp; @@ -5650,6 +5979,15 @@ segvn_split_seg(struct seg *seg, caddr_t addr) nsvd->amp = namp; nsvd->anon_index = 0; ANON_LOCK_EXIT(&oamp->a_rwlock); + } else if (svd->amp != NULL) { + pgcnt_t pgcnt = page_get_pagecnt(seg->s_szc); + ASSERT(svd->amp == nsvd->amp); + ASSERT(seg->s_szc <= svd->amp->a_szc); + nsvd->anon_index = svd->anon_index + seg_pages(seg); + ASSERT(IS_P2ALIGNED(nsvd->anon_index, pgcnt)); + ANON_LOCK_ENTER(&svd->amp->a_rwlock, RW_WRITER); + svd->amp->refcnt++; + ANON_LOCK_EXIT(&svd->amp->a_rwlock); } /* @@ -5681,7 +6019,6 @@ segvn_split_seg(struct seg *seg, caddr_t addr) return (nseg); } - /* * called on memory operations (unmap, setprot, setpagesize) for a subset * of a large page segment to either demote the memory range (SDR_RANGE) @@ -5690,7 +6027,12 @@ segvn_split_seg(struct seg *seg, caddr_t addr) * returns 0 on success. returns errno, including ENOMEM, on failure. */ static int -segvn_demote_range(struct seg *seg, caddr_t addr, size_t len, int flag) +segvn_demote_range( + struct seg *seg, + caddr_t addr, + size_t len, + int flag, + uint_t szcvec) { caddr_t eaddr = addr + len; caddr_t lpgaddr, lpgeaddr; @@ -5700,15 +6042,16 @@ segvn_demote_range(struct seg *seg, caddr_t addr, size_t len, int flag) size_t pgsz; struct segvn_data *svd = (struct segvn_data *)seg->s_data; int err; + uint_t szc = seg->s_szc; + uint_t tszcvec; ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); - ASSERT(seg->s_szc != 0); - pgsz = page_get_pagesize(seg->s_szc); + ASSERT(szc != 0); + pgsz = page_get_pagesize(szc); ASSERT(seg->s_base != addr || seg->s_size != len); ASSERT(addr >= seg->s_base && eaddr <= seg->s_base + seg->s_size); ASSERT(svd->softlockcnt == 0); - ASSERT(svd->type == MAP_PRIVATE || - (svd->vp != NULL && svd->amp == NULL)); + ASSERT(szcvec == 0 || (flag == SDR_END && svd->type == MAP_SHARED)); CALC_LPG_REGION(pgsz, seg, addr, len, lpgaddr, lpgeaddr); ASSERT(flag == SDR_RANGE || eaddr < lpgeaddr || addr > lpgaddr); @@ -5749,25 +6092,77 @@ segvn_demote_range(struct seg *seg, caddr_t addr, size_t len, int flag) } ASSERT(badseg1 != NULL); - ASSERT(badseg1->s_szc != 0); - ASSERT(page_get_pagesize(badseg1->s_szc) == pgsz); + ASSERT(badseg1->s_szc == szc); ASSERT(flag == SDR_RANGE || badseg1->s_size == pgsz || badseg1->s_size == 2 * pgsz); + ASSERT(sameprot(badseg1, badseg1->s_base, pgsz)); + ASSERT(badseg1->s_size == pgsz || + sameprot(badseg1, badseg1->s_base + pgsz, pgsz)); if (err = segvn_clrszc(badseg1)) { return (err); } ASSERT(badseg1->s_szc == 0); + if (szc > 1 && (tszcvec = P2PHASE(szcvec, 1 << szc)) > 1) { + uint_t tszc = highbit(tszcvec) - 1; + caddr_t ta = MAX(addr, badseg1->s_base); + caddr_t te; + size_t tpgsz = page_get_pagesize(tszc); + + ASSERT(svd->type == MAP_SHARED); + ASSERT(flag == SDR_END); + ASSERT(tszc < szc && tszc > 0); + + if (eaddr > badseg1->s_base + badseg1->s_size) { + te = badseg1->s_base + badseg1->s_size; + } else { + te = eaddr; + } + + ASSERT(ta <= te); + badseg1->s_szc = tszc; + if (!IS_P2ALIGNED(ta, tpgsz) || !IS_P2ALIGNED(te, tpgsz)) { + if (badseg2 != NULL) { + err = segvn_demote_range(badseg1, ta, te - ta, + SDR_END, tszcvec); + if (err != 0) { + return (err); + } + } else { + return (segvn_demote_range(badseg1, ta, + te - ta, SDR_END, tszcvec)); + } + } + } + if (badseg2 == NULL) return (0); - ASSERT(badseg2->s_szc != 0); - ASSERT(page_get_pagesize(badseg2->s_szc) == pgsz); + ASSERT(badseg2->s_szc == szc); ASSERT(badseg2->s_size == pgsz); ASSERT(sameprot(badseg2, badseg2->s_base, badseg2->s_size)); if (err = segvn_clrszc(badseg2)) { return (err); } ASSERT(badseg2->s_szc == 0); + + if (szc > 1 && (tszcvec = P2PHASE(szcvec, 1 << szc)) > 1) { + uint_t tszc = highbit(tszcvec) - 1; + size_t tpgsz = page_get_pagesize(tszc); + + ASSERT(svd->type == MAP_SHARED); + ASSERT(flag == SDR_END); + ASSERT(tszc < szc && tszc > 0); + ASSERT(badseg2->s_base > addr); + ASSERT(eaddr > badseg2->s_base); + ASSERT(eaddr < badseg2->s_base + badseg2->s_size); + + badseg2->s_szc = tszc; + if (!IS_P2ALIGNED(eaddr, tpgsz)) { + return (segvn_demote_range(badseg2, badseg2->s_base, + eaddr - badseg2->s_base, SDR_END, tszcvec)); + } + } + return (0); } @@ -7344,6 +7739,7 @@ segvn_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp, caddr_t a; size_t page; caddr_t lpgaddr, lpgeaddr; + pgcnt_t szc0_npages = 0; TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEGVN_START, "segvn_pagelock: start seg %p addr %p", seg, addr); @@ -7520,18 +7916,24 @@ segvn_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp, } } - mutex_enter(&freemem_lock); - if (availrmem < tune.t_minarmem + npages) { - mutex_exit(&freemem_lock); - mutex_exit(&svd->segp_slock); - error = ENOMEM; - goto out; - } else { - svd->softlockcnt += npages; + /* + * Avoid per page overhead of segvn_pp_lock_anonpages() for small + * pages. For large pages segvn_pp_lock_anonpages() only does real + * work once per large page. The tradeoff is that we may decrement + * availrmem more than once for the same page but this is ok + * for small pages. + */ + if (seg->s_szc == 0) { + mutex_enter(&freemem_lock); + if (availrmem < tune.t_minarmem + npages) { + mutex_exit(&freemem_lock); + mutex_exit(&svd->segp_slock); + error = ENOMEM; + goto out; + } availrmem -= npages; - segvn_pages_locked += npages; + mutex_exit(&freemem_lock); } - mutex_exit(&freemem_lock); pplist = kmem_alloc(sizeof (page_t *) * npages, KM_SLEEP); pl = pplist; @@ -7574,11 +7976,29 @@ segvn_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp, if (pp == NULL) { break; } + if (seg->s_szc != 0 || pp->p_szc != 0) { + if (!segvn_pp_lock_anonpages(pp, a == addr)) { + page_unlock(pp); + break; + } + } else { + szc0_npages++; + } *pplist++ = pp; } ANON_LOCK_EXIT(&->a_rwlock); + ASSERT(npages >= szc0_npages); + if (a >= addr + len) { + mutex_enter(&freemem_lock); + if (seg->s_szc == 0 && npages != szc0_npages) { + ASSERT(svd->type == MAP_SHARED && amp->a_szc > 0); + availrmem += (npages - szc0_npages); + } + svd->softlockcnt += npages; + segvn_pages_locked += npages; + mutex_exit(&freemem_lock); (void) seg_pinsert(seg, addr, len, pl, rw, SEGP_ASYNC_FLUSH, segvn_reclaim); mutex_exit(&svd->segp_slock); @@ -7589,31 +8009,24 @@ segvn_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp, } mutex_exit(&svd->segp_slock); + if (seg->s_szc == 0) { + mutex_enter(&freemem_lock); + availrmem += npages; + mutex_exit(&freemem_lock); + } error = EFAULT; pplist = pl; np = ((uintptr_t)(a - addr)) >> PAGESHIFT; while (np > (uint_t)0) { + ASSERT(PAGE_LOCKED(*pplist)); + if (seg->s_szc != 0 || (*pplist)->p_szc != 0) { + segvn_pp_unlock_anonpages(*pplist, pplist == pl); + } page_unlock(*pplist); np--; pplist++; } kmem_free(pl, sizeof (page_t *) * npages); - mutex_enter(&freemem_lock); - svd->softlockcnt -= npages; - availrmem += npages; - segvn_pages_locked -= npages; - mutex_exit(&freemem_lock); - if (svd->softlockcnt <= 0) { - if (AS_ISUNMAPWAIT(seg->s_as)) { - mutex_enter(&seg->s_as->a_contents); - if (AS_ISUNMAPWAIT(seg->s_as)) { - AS_CLRUNMAPWAIT(seg->s_as); - cv_broadcast(&seg->s_as->a_cv); - } - mutex_exit(&seg->s_as->a_contents); - } - } - out: SEGVN_LOCK_EXIT(seg->s_as, &svd->lock); *ppp = NULL; @@ -7638,6 +8051,7 @@ segvn_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist, struct segvn_data *svd = (struct segvn_data *)seg->s_data; pgcnt_t np, npages; struct page **pl; + pgcnt_t szc0_npages = 0; #ifdef lint addr = addr; @@ -7654,12 +8068,19 @@ segvn_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist, } } + ASSERT(svd->vp == NULL && svd->amp != NULL); + while (np > (uint_t)0) { if (rw == S_WRITE) { hat_setrefmod(*pplist); } else { hat_setref(*pplist); } + if (seg->s_szc != 0 || (*pplist)->p_szc != 0) { + segvn_pp_unlock_anonpages(*pplist, pplist == pl); + } else { + szc0_npages++; + } page_unlock(*pplist); np--; pplist++; @@ -7667,9 +8088,11 @@ segvn_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist, kmem_free(pl, sizeof (page_t *) * npages); mutex_enter(&freemem_lock); - availrmem += npages; segvn_pages_locked -= npages; svd->softlockcnt -= npages; + if (szc0_npages != 0) { + availrmem += szc0_npages; + } mutex_exit(&freemem_lock); if (svd->softlockcnt <= 0) { if (AS_ISUNMAPWAIT(seg->s_as)) { diff --git a/usr/src/uts/common/vm/seg_vn.h b/usr/src/uts/common/vm/seg_vn.h index 4f66d495dd..10cd0f1835 100644 --- a/usr/src/uts/common/vm/seg_vn.h +++ b/usr/src/uts/common/vm/seg_vn.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -145,6 +144,10 @@ typedef struct segvn_data { ((struct segvn_crargs *)(argsp))->vp != NULL && \ ((struct segvn_crargs *)(argsp))->amp == NULL) +#define AS_MAP_SHAMP(crfp, argsp) \ + ((crfp) == (int (*)())segvn_create && \ + ((struct segvn_crargs *)(argsp))->type == MAP_SHARED && \ + ((struct segvn_crargs *)(argsp))->vp == NULL) extern void segvn_init(void); extern int segvn_create(struct seg *, void *); diff --git a/usr/src/uts/common/vm/vm_anon.c b/usr/src/uts/common/vm/vm_anon.c index 415f96a0d1..e503432b02 100644 --- a/usr/src/uts/common/vm/vm_anon.c +++ b/usr/src/uts/common/vm/vm_anon.c @@ -1024,6 +1024,15 @@ anon_decref_pages( ASSERT(szc != 0); ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); ASSERT(IS_P2ALIGNED(an_idx, pgcnt)); + ASSERT(an_idx < ahp->size); + + if (ahp->size - an_idx < pgcnt) { + /* + * In case of shared mappings total anon map size may not be + * the largest page size aligned. + */ + pgcnt = ahp->size - an_idx; + } VM_STAT_ADD(anonvmstats.decrefpages[0]); @@ -1474,6 +1483,7 @@ anon_free_pages( npages = btopr(size); ASSERT(IS_P2ALIGNED(npages, pgcnt)); ASSERT(IS_P2ALIGNED(an_idx, pgcnt)); + ASSERT(an_idx < ahp->size); VM_STAT_ADD(anonvmstats.freepages[0]); @@ -1621,7 +1631,7 @@ anon_disclaim(struct anon_map *amp, ulong_t index, size_t size, int flags) } pgcnt = page_get_pagecnt(pp->p_szc); - if (!IS_P2ALIGNED(index, pgcnt)) { + if (!IS_P2ALIGNED(index, pgcnt) || npages < pgcnt) { if (!page_try_demote_pages(pp)) { mutex_exit(ahm); page_unlock(pp); @@ -1802,6 +1812,7 @@ anon_map_getpages( int prealloc = 1; int err, slotcreate; uint_t vpprot; + int upsize = (szc < seg->s_szc); #if !defined(__i386) && !defined(__amd64) ASSERT(seg->s_szc != 0); @@ -1824,9 +1835,10 @@ anon_map_getpages( ppa[0] = pl[0]; if (brkcow == 0 || (*protp & PROT_WRITE)) { VM_STAT_ADD(anonvmstats.getpages[2]); - if (ppa[0]->p_szc != 0) { + if (ppa[0]->p_szc != 0 && upsize) { VM_STAT_ADD(anonvmstats.getpages[3]); - *ppa_szc = ppa[0]->p_szc; + *ppa_szc = MIN(ppa[0]->p_szc, + seg->s_szc); page_unlock(ppa[0]); return (-2); } @@ -1859,11 +1871,11 @@ anon_map_getpages( uint_t pszc; swap_xlate(ap, &vp, &off); if (page_exists_forreal(vp, (u_offset_t)off, &pszc)) { - if (pszc > szc) { - *ppa_szc = pszc; + if (pszc > szc && upsize) { + *ppa_szc = MIN(pszc, seg->s_szc); return (-2); } - if (pszc == szc) { + if (pszc >= szc) { prealloc = 0; } } @@ -1980,10 +1992,11 @@ top: * Similar to the anon_zero case. */ err = swap_getconpage(vp, (u_offset_t)off, PAGESIZE, - NULL, pl, PAGESIZE, conpp, &nreloc, seg, vaddr, + NULL, pl, PAGESIZE, conpp, ppa_szc, &nreloc, seg, vaddr, slotcreate == 1 ? S_CREATE : rw, cred); if (err) { + ASSERT(err != -2 || upsize); VM_STAT_ADD(anonvmstats.getpages[12]); ASSERT(slotcreate == 0); goto io_err; @@ -1991,12 +2004,14 @@ top: pp = pl[0]; - if (pp->p_szc != szc) { + if (pp->p_szc < szc || (pp->p_szc > szc && upsize)) { VM_STAT_ADD(anonvmstats.getpages[13]); ASSERT(slotcreate == 0); ASSERT(prealloc == 0); ASSERT(pg_idx == 0); if (pp->p_szc > szc) { + ASSERT(upsize); + *ppa_szc = MIN(pp->p_szc, seg->s_szc); page_unlock(pp); VM_STAT_ADD(anonvmstats.getpages[14]); return (-2); @@ -2063,8 +2078,11 @@ top: if (pg_idx > 0 && ((page_pptonum(pp) != page_pptonum(ppa[pg_idx - 1]) + 1) || - (pp->p_szc != ppa[pg_idx - 1]->p_szc))) + (pp->p_szc != ppa[pg_idx - 1]->p_szc))) { panic("anon_map_getpages: unexpected page"); + } else if (pg_idx == 0 && (page_pptonum(pp) & (pgcnt - 1))) { + panic("anon_map_getpages: unaligned page"); + } if (prealloc == 0) { ppa[pg_idx] = pp; @@ -2122,7 +2140,7 @@ io_err: * unlocked. */ - ASSERT(err != -2 || pg_idx == 0); + ASSERT(err != -2 || ((pg_idx == 0) && upsize)); VM_STAT_COND_ADD(err > 0, anonvmstats.getpages[22]); VM_STAT_COND_ADD(err == -1, anonvmstats.getpages[23]); @@ -2490,7 +2508,8 @@ anon_map_privatepages( } err = swap_getconpage(vp, (u_offset_t)off, PAGESIZE, NULL, pl, - PAGESIZE, conpp, &nreloc, seg, vaddr, S_CREATE, cred); + PAGESIZE, conpp, NULL, &nreloc, seg, vaddr, + S_CREATE, cred); /* * Impossible to fail this is S_CREATE. @@ -2788,8 +2807,8 @@ anon_map_createpages( conpp = pp; err = swap_getconpage(ap_vp, ap_off, PAGESIZE, - (uint_t *)NULL, anon_pl, PAGESIZE, conpp, &nreloc, - seg, addr, S_CREATE, cred); + (uint_t *)NULL, anon_pl, PAGESIZE, conpp, NULL, + &nreloc, seg, addr, S_CREATE, cred); if (err) { ANON_LOCK_EXIT(&->a_rwlock); @@ -2822,6 +2841,124 @@ anon_map_createpages( return (0); } +static int +anon_try_demote_pages( + struct anon_hdr *ahp, + ulong_t sidx, + uint_t szc, + page_t **ppa, + int private) +{ + struct anon *ap; + pgcnt_t pgcnt = page_get_pagecnt(szc); + page_t *pp; + pgcnt_t i; + kmutex_t *ahmpages = NULL; + int root = 0; + pgcnt_t npgs; + pgcnt_t curnpgs = 0; + size_t ppasize = 0; + + ASSERT(szc != 0); + ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); + ASSERT(IS_P2ALIGNED(sidx, pgcnt)); + ASSERT(sidx < ahp->size); + + if (ppa == NULL) { + ppasize = pgcnt * sizeof (page_t *); + ppa = kmem_alloc(ppasize, KM_SLEEP); + } + + ap = anon_get_ptr(ahp, sidx); + if (ap != NULL && private) { + VM_STAT_ADD(anonvmstats.demotepages[1]); + ahmpages = &anonpages_hash_lock[AH_LOCK(ap->an_vp, ap->an_off)]; + mutex_enter(ahmpages); + } + + if (ap != NULL && ap->an_refcnt > 1) { + if (ahmpages != NULL) { + VM_STAT_ADD(anonvmstats.demotepages[2]); + mutex_exit(ahmpages); + } + if (ppasize != 0) { + kmem_free(ppa, ppasize); + } + return (0); + } + if (ahmpages != NULL) { + mutex_exit(ahmpages); + } + if (ahp->size - sidx < pgcnt) { + ASSERT(private == 0); + pgcnt = ahp->size - sidx; + } + for (i = 0; i < pgcnt; i++, sidx++) { + ap = anon_get_ptr(ahp, sidx); + if (ap != NULL) { + if (ap->an_refcnt != 1) { + panic("anon_try_demote_pages: an_refcnt != 1"); + } + pp = ppa[i] = page_lookup(ap->an_vp, ap->an_off, + SE_EXCL); + if (pp != NULL) { + (void) hat_pageunload(pp, + HAT_FORCE_PGUNLOAD); + } + } else { + ppa[i] = NULL; + } + } + for (i = 0; i < pgcnt; i++) { + if ((pp = ppa[i]) != NULL && pp->p_szc != 0) { + ASSERT(pp->p_szc <= szc); + if (!root) { + VM_STAT_ADD(anonvmstats.demotepages[3]); + if (curnpgs != 0) + panic("anon_try_demote_pages: " + "bad large page"); + + root = 1; + curnpgs = npgs = + page_get_pagecnt(pp->p_szc); + + ASSERT(npgs <= pgcnt); + ASSERT(IS_P2ALIGNED(npgs, npgs)); + ASSERT(!(page_pptonum(pp) & + (npgs - 1))); + } else { + ASSERT(i > 0); + ASSERT(page_pptonum(pp) - 1 == + page_pptonum(ppa[i - 1])); + if ((page_pptonum(pp) & (npgs - 1)) == + npgs - 1) + root = 0; + } + ASSERT(PAGE_EXCL(pp)); + pp->p_szc = 0; + ASSERT(curnpgs > 0); + curnpgs--; + } + } + if (root != 0 || curnpgs != 0) + panic("anon_try_demote_pages: bad large page"); + + for (i = 0; i < pgcnt; i++) { + if ((pp = ppa[i]) != NULL) { + ASSERT(!hat_page_is_mapped(pp)); + ASSERT(pp->p_szc == 0); + page_unlock(pp); + } + } + if (ppasize != 0) { + kmem_free(ppa, ppasize); + } + return (1); +} + +/* + * anon_map_demotepages() can only be called by MAP_PRIVATE segments. + */ int anon_map_demotepages( struct anon_map *amp, @@ -2842,7 +2979,6 @@ anon_map_demotepages( pgcnt_t i, pg_idx; ulong_t an_idx; caddr_t vaddr; - kmutex_t *ahmpages = NULL; int err; int retry = 0; uint_t vpprot; @@ -2851,87 +2987,15 @@ anon_map_demotepages( ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); ASSERT(IS_P2ALIGNED(start_idx, pgcnt)); ASSERT(ppa != NULL); + ASSERT(szc != 0); + ASSERT(szc == amp->a_szc); VM_STAT_ADD(anonvmstats.demotepages[0]); - ap = anon_get_ptr(amp->ahp, start_idx); - if (ap != NULL) { - VM_STAT_ADD(anonvmstats.demotepages[1]); - ahmpages = &anonpages_hash_lock[AH_LOCK(ap->an_vp, ap->an_off)]; - mutex_enter(ahmpages); - } top: - if (ap == NULL || ap->an_refcnt <= 1) { - int root = 0; - pgcnt_t npgs, curnpgs = 0; - - VM_STAT_ADD(anonvmstats.demotepages[2]); - - ASSERT(retry == 0 || ap != NULL); - - if (ahmpages != NULL) - mutex_exit(ahmpages); - an_idx = start_idx; - for (i = 0; i < pgcnt; i++, an_idx++) { - ap = anon_get_ptr(amp->ahp, an_idx); - if (ap != NULL) { - ASSERT(ap->an_refcnt == 1); - pp = ppa[i] = page_lookup(ap->an_vp, ap->an_off, - SE_EXCL); - if (pp != NULL) { - (void) hat_pageunload(pp, - HAT_FORCE_PGUNLOAD); - } - } else { - ppa[i] = NULL; - } - } - for (i = 0; i < pgcnt; i++) { - if ((pp = ppa[i]) != NULL && pp->p_szc != 0) { - ASSERT(pp->p_szc <= szc); - if (!root) { - VM_STAT_ADD(anonvmstats.demotepages[3]); - if (curnpgs != 0) - panic("anon_map_demotepages: " - "bad large page"); - - root = 1; - curnpgs = npgs = - page_get_pagecnt(pp->p_szc); - - ASSERT(npgs <= pgcnt); - ASSERT(IS_P2ALIGNED(npgs, npgs)); - ASSERT(!(page_pptonum(pp) & - (npgs - 1))); - } else { - ASSERT(i > 0); - ASSERT(page_pptonum(pp) - 1 == - page_pptonum(ppa[i - 1])); - if ((page_pptonum(pp) & (npgs - 1)) == - npgs - 1) - root = 0; - } - ASSERT(PAGE_EXCL(pp)); - pp->p_szc = 0; - curnpgs--; - } - } - if (root != 0 || curnpgs != 0) - panic("anon_map_demotepages: bad large page"); - - for (i = 0; i < pgcnt; i++) { - if ((pp = ppa[i]) != NULL) { - ASSERT(!hat_page_is_mapped(pp)); - ASSERT(pp->p_szc == 0); - page_unlock(pp); - } - } - kmem_free(ppa, ppasize); + if (anon_try_demote_pages(amp->ahp, start_idx, szc, ppa, 1)) { return (0); } - ASSERT(ahmpages != NULL); - mutex_exit(ahmpages); - ahmpages = NULL; VM_STAT_ADD(anonvmstats.demotepages[4]); @@ -2988,6 +3052,75 @@ top: } /* + * Free pages of shared anon map. It's assumed that anon maps don't share anon + * structures with private anon maps. Therefore all anon structures should + * have at most one reference at this point. This means underlying pages can + * be exclusively locked and demoted or freed. If not freeing the entire + * large pages demote the ends of the region we free to be able to free + * subpages. Page roots correspend to aligned index positions in anon map. + */ +void +anon_shmap_free_pages(struct anon_map *amp, ulong_t sidx, size_t len) +{ + ulong_t eidx = sidx + btopr(len); + pgcnt_t pages = page_get_pagecnt(amp->a_szc); + struct anon_hdr *ahp = amp->ahp; + ulong_t tidx; + size_t size; + ulong_t sidx_aligned; + ulong_t eidx_aligned; + + ASSERT(RW_WRITE_HELD(&->a_rwlock)); + ASSERT(amp->refcnt <= 1); + ASSERT(amp->a_szc > 0); + ASSERT(eidx <= ahp->size); + ASSERT(!anon_share(ahp, sidx, btopr(len))); + + if (len == 0) { /* XXX */ + return; + } + + sidx_aligned = P2ALIGN(sidx, pages); + if (sidx_aligned != sidx || + (eidx < sidx_aligned + pages && eidx < ahp->size)) { + if (!anon_try_demote_pages(ahp, sidx_aligned, + amp->a_szc, NULL, 0)) { + panic("anon_shmap_free_pages: demote failed"); + } + size = (eidx <= sidx_aligned + pages) ? (eidx - sidx) : + P2NPHASE(sidx, pages); + size <<= PAGESHIFT; + anon_free(ahp, sidx, size); + sidx = sidx_aligned + pages; + if (eidx <= sidx) { + return; + } + } + eidx_aligned = P2ALIGN(eidx, pages); + if (sidx < eidx_aligned) { + anon_free_pages(ahp, sidx, + (eidx_aligned - sidx) << PAGESHIFT, + amp->a_szc); + sidx = eidx_aligned; + } + ASSERT(sidx == eidx_aligned); + if (eidx == eidx_aligned) { + return; + } + tidx = eidx; + if (eidx != ahp->size && anon_get_next_ptr(ahp, &tidx) != NULL && + tidx - sidx < pages) { + if (!anon_try_demote_pages(ahp, sidx, amp->a_szc, NULL, 0)) { + panic("anon_shmap_free_pages: demote failed"); + } + size = (eidx - sidx) << PAGESHIFT; + anon_free(ahp, sidx, size); + } else { + anon_free_pages(ahp, sidx, pages << PAGESHIFT, amp->a_szc); + } +} + +/* * Allocate and initialize an anon_map structure for seg * associating the given swap reservation with the new anon_map. */ diff --git a/usr/src/uts/common/vm/vm_as.c b/usr/src/uts/common/vm/vm_as.c index f7533f56a6..4e807fd670 100644 --- a/usr/src/uts/common/vm/vm_as.c +++ b/usr/src/uts/common/vm/vm_as.c @@ -1463,11 +1463,9 @@ top: } static int -as_map_vnsegs(struct as *as, caddr_t addr, size_t size, +as_map_segvn_segs(struct as *as, caddr_t addr, size_t size, uint_t szcvec, int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) { - int text = vn_a->flags & MAP_TEXT; - uint_t szcvec = map_execseg_pgszcvec(text, addr, size); uint_t szc; uint_t nszc; int error; @@ -1475,19 +1473,18 @@ as_map_vnsegs(struct as *as, caddr_t addr, size_t size, caddr_t eaddr; size_t segsize; struct seg *seg; - uint_t save_szcvec; size_t pgsz; - struct vattr va; - u_offset_t eoff; - size_t save_size = 0; + int do_off = (vn_a->vp != NULL || vn_a->amp != NULL); + uint_t save_szcvec; ASSERT(AS_WRITE_HELD(as, &as->a_lock)); ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); ASSERT(IS_P2ALIGNED(size, PAGESIZE)); - ASSERT(vn_a->vp != NULL); - ASSERT(vn_a->amp == NULL); + ASSERT(vn_a->vp == NULL || vn_a->amp == NULL); + if (!do_off) { + vn_a->offset = 0; + } -again: if (szcvec <= 1) { seg = seg_alloc(as, addr, size); if (seg == NULL) { @@ -1501,28 +1498,6 @@ again: return (error); } - va.va_mask = AT_SIZE; - if (VOP_GETATTR(vn_a->vp, &va, ATTR_HINT, vn_a->cred) != 0) { - szcvec = 0; - goto again; - } - eoff = vn_a->offset & PAGEMASK; - if (eoff >= va.va_size) { - szcvec = 0; - goto again; - } - eoff += size; - if (btopr(va.va_size) < btopr(eoff)) { - save_size = size; - size = va.va_size - (vn_a->offset & PAGEMASK); - size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t); - szcvec = map_execseg_pgszcvec(text, addr, size); - if (szcvec <= 1) { - size = save_size; - goto again; - } - } - eaddr = addr + size; save_szcvec = szcvec; szcvec >>= 1; @@ -1551,7 +1526,9 @@ again: return (error); } *segcreated = 1; - vn_a->offset += segsize; + if (do_off) { + vn_a->offset += segsize; + } addr = a; } szc = nszc; @@ -1576,7 +1553,9 @@ again: return (error); } *segcreated = 1; - vn_a->offset += segsize; + if (do_off) { + vn_a->offset += segsize; + } addr = a; } szcvec &= ~(1 << szc); @@ -1587,14 +1566,94 @@ again: } ASSERT(addr == eaddr); + return (0); +} + +static int +as_map_vnsegs(struct as *as, caddr_t addr, size_t size, + int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) +{ + int text = vn_a->flags & MAP_TEXT; + uint_t szcvec = map_execseg_pgszcvec(text, addr, size); + int error; + struct seg *seg; + struct vattr va; + u_offset_t eoff; + size_t save_size = 0; + + ASSERT(AS_WRITE_HELD(as, &as->a_lock)); + ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); + ASSERT(IS_P2ALIGNED(size, PAGESIZE)); + ASSERT(vn_a->vp != NULL); + ASSERT(vn_a->amp == NULL); + +again: + if (szcvec <= 1) { + seg = seg_alloc(as, addr, size); + if (seg == NULL) { + return (ENOMEM); + } + vn_a->szc = 0; + error = (*crfp)(seg, vn_a); + if (error != 0) { + seg_free(seg); + } + return (error); + } + + va.va_mask = AT_SIZE; + if (VOP_GETATTR(vn_a->vp, &va, ATTR_HINT, vn_a->cred) != 0) { + szcvec = 0; + goto again; + } + eoff = vn_a->offset & PAGEMASK; + if (eoff >= va.va_size) { + szcvec = 0; + goto again; + } + eoff += size; + if (btopr(va.va_size) < btopr(eoff)) { + save_size = size; + size = va.va_size - (vn_a->offset & PAGEMASK); + size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t); + szcvec = map_execseg_pgszcvec(text, addr, size); + if (szcvec <= 1) { + size = save_size; + goto again; + } + } + + error = as_map_segvn_segs(as, addr, size, szcvec, crfp, vn_a, + segcreated); + if (error != 0) { + return (error); + } if (save_size) { + addr += size; size = save_size - size; + szcvec = 0; goto again; } - return (0); } +static int +as_map_sham(struct as *as, caddr_t addr, size_t size, + int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) +{ + uint_t szcvec = map_shm_pgszcvec(addr, size, + vn_a->amp == NULL ? (uintptr_t)addr : + (uintptr_t)P2ROUNDUP(vn_a->offset, PAGESIZE)); + + ASSERT(AS_WRITE_HELD(as, &as->a_lock)); + ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); + ASSERT(IS_P2ALIGNED(size, PAGESIZE)); + ASSERT(vn_a->vp == NULL); + + return (as_map_segvn_segs(as, addr, size, szcvec, + crfp, vn_a, segcreated)); +} + int as_map(struct as *as, caddr_t addr, size_t size, int (*crfp)(), void *argsp) { @@ -1636,10 +1695,15 @@ as_map_locked(struct as *as, caddr_t addr, size_t size, int (*crfp)(), return (ENOMEM); } - if (AS_MAP_VNSEGS_USELPGS(crfp, argsp)) { + if (AS_MAP_VNSEGS_USELPGS(crfp, argsp) || AS_MAP_SHAMP(crfp, argsp)) { int unmap = 0; - error = as_map_vnsegs(as, raddr, rsize, crfp, - (struct segvn_crargs *)argsp, &unmap); + if (AS_MAP_SHAMP(crfp, argsp)) { + error = as_map_sham(as, raddr, rsize, crfp, + (struct segvn_crargs *)argsp, &unmap); + } else { + error = as_map_vnsegs(as, raddr, rsize, crfp, + (struct segvn_crargs *)argsp, &unmap); + } if (error != 0) { AS_LOCK_EXIT(as, &as->a_lock); if (unmap) { diff --git a/usr/src/uts/common/vm/vm_page.c b/usr/src/uts/common/vm/vm_page.c index 9fa821131b..05bfe662be 100644 --- a/usr/src/uts/common/vm/vm_page.c +++ b/usr/src/uts/common/vm/vm_page.c @@ -200,9 +200,6 @@ kmutex_t pcgs_cagelock; /* serializes NOSLEEP cage allocs */ kmutex_t pcgs_wait_lock; /* used for delay in pcgs */ static kcondvar_t pcgs_cv; /* cv for delay in pcgs */ -#define PAGE_LOCK_MAXIMUM \ - ((1 << (sizeof (((page_t *)0)->p_lckcnt) * NBBY)) - 1) - #ifdef VM_STATS /* @@ -552,6 +549,10 @@ add_physmem( */ add_physmem_cb(pp, pnum); + pp->p_lckcnt = 0; + pp->p_cowcnt = 0; + pp->p_slckcnt = 0; + /* * Initialize the page lock as unlocked, since nobody * can see or access this page yet. @@ -2711,9 +2712,11 @@ page_free(page_t *pp, int dontneed) * The page_struct_lock need not be acquired to examine these * fields since the page has an "exclusive" lock. */ - if (hat_page_is_mapped(pp) || pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { - panic("page_free pp=%p, pfn=%lx, lckcnt=%d, cowcnt=%d", - pp, page_pptonum(pp), pp->p_lckcnt, pp->p_cowcnt); + if (hat_page_is_mapped(pp) || pp->p_lckcnt != 0 || pp->p_cowcnt != 0 || + pp->p_slckcnt != 0) { + panic("page_free pp=%p, pfn=%lx, lckcnt=%d, cowcnt=%d " + "slckcnt = %d", pp, page_pptonum(pp), pp->p_lckcnt, + pp->p_cowcnt, pp->p_slckcnt); /*NOTREACHED*/ } @@ -2853,7 +2856,7 @@ page_free_pages(page_t *pp) /*NOTREACHED*/ } if (hat_page_is_mapped(tpp) || tpp->p_lckcnt != 0 || - tpp->p_cowcnt != 0) { + tpp->p_cowcnt != 0 || tpp->p_slckcnt != 0) { panic("page_free_pages %p", (void *)tpp); /*NOTREACHED*/ } @@ -3142,6 +3145,7 @@ page_destroy(page_t *pp, int dontfree) { ASSERT((PAGE_EXCL(pp) && !page_iolock_assert(pp)) || panicstr); + ASSERT(pp->p_slckcnt == 0 || panicstr); if (pp->p_szc != 0) { if (pp->p_vnode == NULL || IS_SWAPFSVP(pp->p_vnode) || @@ -3210,6 +3214,7 @@ page_destroy_pages(page_t *pp) for (i = 0, tpp = pp; i < pgcnt; i++, tpp++) { ASSERT((PAGE_EXCL(tpp) && !page_iolock_assert(tpp)) || panicstr); + ASSERT(tpp->p_slckcnt == 0 || panicstr); (void) hat_pageunload(tpp, HAT_FORCE_PGUNLOAD); page_hashout(tpp, NULL); ASSERT(tpp->p_offset == (u_offset_t)-1); @@ -4921,6 +4926,8 @@ do_page_relocate( for (i = 0; i < npgs; i++) { ASSERT(PAGE_EXCL(targ)); + ASSERT(targ->p_slckcnt == 0); + ASSERT(repl->p_slckcnt == 0); (void) hat_pageunload(targ, HAT_FORCE_PGUNLOAD); @@ -5269,6 +5276,7 @@ page_try_demote_pages(page_t *pp) for (tpp = rootpp, i = 0; i < npgs; i++, tpp++) { ASSERT(PAGE_EXCL(tpp)); + ASSERT(tpp->p_slckcnt == 0); (void) hat_pageunload(tpp, HAT_FORCE_PGUNLOAD); tpp->p_szc = 0; } diff --git a/usr/src/uts/i86pc/vm/vm_machdep.c b/usr/src/uts/i86pc/vm/vm_machdep.c index ab0ab9181f..8512bdc99f 100644 --- a/usr/src/uts/i86pc/vm/vm_machdep.c +++ b/usr/src/uts/i86pc/vm/vm_machdep.c @@ -172,6 +172,7 @@ map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int *remap) * to be used for mapping application and libraries text segments. */ int use_text_largepages = 0; +int use_shm_largepages = 0; /* * Return a bit vector of large page size codes that @@ -201,6 +202,29 @@ map_execseg_pgszcvec(int text, caddr_t addr, size_t len) return (1 << 1); } +uint_t +map_shm_pgszcvec(caddr_t addr, size_t len, uintptr_t off) +{ + size_t pgsz; + caddr_t a; + + if (!use_shm_largepages || mmu.max_page_level == 0) { + return (0); + } + + pgsz = LEVEL_SIZE(1); + a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); + if (a < addr || a >= addr + len || + P2PHASE((uintptr_t)addr ^ off, pgsz)) { + return (0); + } + len -= (a - addr); + if (len < pgsz) { + return (0); + } + return (1 << 1); +} + /* * Handle a pagefault. */ diff --git a/usr/src/uts/sfmmu/vm/hat_sfmmu.c b/usr/src/uts/sfmmu/vm/hat_sfmmu.c index c8cbc1183f..c9474ee8fa 100644 --- a/usr/src/uts/sfmmu/vm/hat_sfmmu.c +++ b/usr/src/uts/sfmmu/vm/hat_sfmmu.c @@ -146,6 +146,7 @@ int sfmmu_allow_nc_trans = 0; int disable_ism_large_pages = (1 << TTE512K); int disable_large_pages = 0; int disable_auto_large_pages = 0; +int disable_shm_large_pages = 0; /* * Private sfmmu data structures for hat management @@ -918,6 +919,7 @@ hat_init_pagesizes() mmu_large_pages_disabled(HAT_LOAD_AUTOLPG); } + disable_shm_large_pages = disable_auto_large_pages; } /* diff --git a/usr/src/uts/sun4/vm/vm_dep.c b/usr/src/uts/sun4/vm/vm_dep.c index 6109ed7054..9f2eebc551 100644 --- a/usr/src/uts/sun4/vm/vm_dep.c +++ b/usr/src/uts/sun4/vm/vm_dep.c @@ -657,6 +657,10 @@ extern size_t initdata_pgsz64k_minsize; */ pgcnt_t execseg_lpg_min_physmem = 131072; /* 1GB */ +extern int disable_shm_large_pages; +pgcnt_t shm_lpg_min_physmem = 131072; /* 1GB */ +extern size_t max_shm_lpsize; + /* assumes TTE8K...TTE4M == szc */ @@ -767,6 +771,48 @@ map_execseg_pgszcvec(int text, caddr_t addr, size_t len) return (ret); } +uint_t +map_shm_pgszcvec(caddr_t addr, size_t size, uintptr_t off) +{ + caddr_t eaddr = addr + size; + uint_t szcvec = 0; + int i; + caddr_t raddr; + caddr_t readdr; + size_t pgsz; + + if (physmem < shm_lpg_min_physmem || mmu_page_sizes <= 1 || + max_shm_lpsize <= MMU_PAGESIZE) { + return (0); + } + + for (i = mmu_page_sizes - 1; i > 0; i--) { + if (disable_shm_large_pages & (1 << i)) { + continue; + } + pgsz = page_get_pagesize(i); + if (pgsz > max_shm_lpsize) { + continue; + } + raddr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); + readdr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); + if (raddr < addr || raddr >= readdr) { + continue; + } + if (P2PHASE((uintptr_t)addr ^ off, pgsz)) { + continue; + } + szcvec |= (1 << i); + /* + * And or in the remaining enabled page sizes. + */ + szcvec |= P2PHASE(~disable_shm_large_pages, (1 << i)); + szcvec &= ~1; /* no need to return 8K pagesize */ + break; + } + return (szcvec); +} + #define PNUM_SIZE(size_code) \ (hw_page_array[size_code].hp_size >> hw_page_array[0].hp_shift) diff --git a/usr/src/uts/sun4u/cpu/us3_cheetah.c b/usr/src/uts/sun4u/cpu/us3_cheetah.c index b530b6754d..9a74d72be1 100644 --- a/usr/src/uts/sun4u/cpu/us3_cheetah.c +++ b/usr/src/uts/sun4u/cpu/us3_cheetah.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -124,6 +123,7 @@ cpu_fiximp(pnode_t dnode) }; extern int exec_lpg_disable, use_brk_lpg, use_stk_lpg, use_zmap_lpg; + extern size_t max_shm_lpsize; for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) @@ -148,6 +148,7 @@ cpu_fiximp(pnode_t dnode) use_brk_lpg = 0; use_stk_lpg = 0; use_zmap_lpg = 0; + max_shm_lpsize = MMU_PAGESIZE; } void diff --git a/usr/src/uts/sun4u/vm/mach_vm_dep.c b/usr/src/uts/sun4u/vm/mach_vm_dep.c index d31d8321a9..d9907b3616 100644 --- a/usr/src/uts/sun4u/vm/mach_vm_dep.c +++ b/usr/src/uts/sun4u/vm/mach_vm_dep.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -123,6 +122,8 @@ size_t text_pgsz64k_minsize = MMU_PAGESIZE64K; size_t text_pgsz4m_minsize = MMU_PAGESIZE4M; size_t initdata_pgsz64k_minsize = MMU_PAGESIZE64K; +size_t max_shm_lpsize = ULONG_MAX; + /* * map_addr_proc() is the routine called when the system is to * choose an address for the user. We will pick an address diff --git a/usr/src/uts/sun4v/vm/mach_vm_dep.c b/usr/src/uts/sun4v/vm/mach_vm_dep.c index d214849fac..5b9e380a30 100644 --- a/usr/src/uts/sun4v/vm/mach_vm_dep.c +++ b/usr/src/uts/sun4v/vm/mach_vm_dep.c @@ -130,6 +130,8 @@ size_t text_pgsz64k_minsize = MMU_PAGESIZE64K; size_t text_pgsz4m_minsize = MMU_PAGESIZE4M; size_t initdata_pgsz64k_minsize = MMU_PAGESIZE64K; +size_t max_shm_lpsize = MMU_PAGESIZE4M; + /* * map_addr_proc() is the routine called when the system is to * choose an address for the user. We will pick an address |